Browse Source

initial commit

fs
yggverse 2 months ago
parent
commit
a09f941a55
  1. 4
      .gitignore
  2. 33
      README.md
  3. 19
      composer.json
  4. 23
      example/config.json
  5. 132
      src/crawler.php

4
.gitignore vendored

@ -0,0 +1,4 @@ @@ -0,0 +1,4 @@
/composer.lock
/config.json
/data/
/vendor/

33
README.md

@ -1,2 +1,33 @@ @@ -1,2 +1,33 @@
# Pulsar
RSS Aggregator for Gemini Protocol
RSS Aggregator for [Gemini Protocol](https://geminiprotocol.net)
Simple RSS feed converter to static Gemtext format, useful for news portals or localhost usage.
## Usage
1. `git clone https://github.com/YGGverse/Pulsar.git`
2. `cp example/config.json config.json` - setup your feeds there!
3. `php src/crawler.php` - crontab schedule
## Config
Configuration file supports multiple feed channels with custom configurations:
* `source` - string, filepath or URL to the valid RSS feed
* `target` - string, relative or absolute path to Gemtext dumps
* `item`
* `limit` - integer, how many items to display
* `template` - string, custom pattern for feed item, that supports following macros
* `{nl}` - new line separator
* `{link}` - item link
* `{guid}` - item guid
* `{pubDate}` - item pubDate, soon with custom time format e.g. `{pubDate:Y-m-d H:s}`
* `{title}` - item title
* `{description}` - item description
Resulting files could be generated to the any folder for personal reading on localhost, or shared with others using [gmid](https://github.com/omar-polo/gmid), [twins](https://code.rocket9labs.com/tslocum/twins) or any other [Gemini server](https://github.com/kr1sp1n/awesome-gemini#servers).
## Instances
Coming soon!

19
composer.json

@ -0,0 +1,19 @@ @@ -0,0 +1,19 @@
{
"name": "yggverse/pulsar",
"description": "RSS Aggregator for Gemini Protocol",
"keywords": [ "yggverse", "gemini", "gemini-protocol", "gemtext", "gmi", "rss", "feed", "converter", "aggregator" ],
"homepage": "https://github.com/yggverse/pulsar",
"type": "project",
"license": "MIT",
"autoload": {
"psr-4": {
"Yggverse\\Pulsar\\": "src/"
}
},
"authors": [
{
"name": "YGGverse"
}
],
"require": {}
}

23
example/config.json

@ -0,0 +1,23 @@ @@ -0,0 +1,23 @@
{
"feed":
[
{
"source":"https://www.omglinux.com/feed",
"target":"data/omglinux.com/feed.gmi",
"item":
{
"template":"=> {link} {title}{nl}{nl}{description}",
"limit":20
}
},
{
"source":"https://omgubuntu.co.uk/feed",
"target":"data/omgubuntu.co.uk/feed.gmi",
"item":
{
"template":"=> {link} {title}{nl}{nl}{description}",
"limit":20
}
}
]
}

132
src/crawler.php

@ -0,0 +1,132 @@ @@ -0,0 +1,132 @@
<?php
// Prevent multi-thread execution
$semaphore = sem_get(
crc32(
__DIR__
), 1
);
if (false === sem_acquire($semaphore, true))
{
exit;
}
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'config.json'
)
);
// Update feeds
foreach ($config->feed as $feed)
{
// Init feed location
$filename = str_starts_with(
$feed->target,
DIRECTORY_SEPARATOR
) ? $feed->target : __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $feed->target;
// Init destination storage
@mkdir(
dirname(
$filename
),
0755,
true
);
// Get feed data
if (!$channel = simplexml_load_file($feed->source)->channel)
{
continue;
}
// Update title
if (!empty($channel->title))
{
$title = trim(
strip_tags(
html_entity_decode(
$channel->title
)
)
);
}
else
{
$title = parse_url(
$feed->source,
PHP_URL_HOST
);
}
file_put_contents(
$filename,
sprintf(
'# %s',
$title
) . PHP_EOL
);
// Append description
if (!empty($channel->description))
{
file_put_contents(
$filename,
PHP_EOL . trim(
strip_tags(
html_entity_decode(
$channel->description
)
)
) . PHP_EOL,
FILE_APPEND | LOCK_EX
);
}
// Append items
$i = 1; foreach ($channel->item as $item)
{
// Apply items limit
if ($i > $feed->item->limit)
{
break;
}
// Format item
file_put_contents(
$filename,
PHP_EOL . trim(
preg_replace(
'/[\s]{3,}/ui',
PHP_EOL . PHP_EOL,
str_replace(
[
'{nl}',
'{link}',
'{guid}',
'{pubDate}',
'{title}',
'{description}'
],
[
PHP_EOL,
!empty($item->link) ? trim($item->link) : '',
!empty($item->guid) ? trim($item->guid) : '',
!empty($item->pubDate) ? trim($item->pubDate) : '',
!empty($item->title) ? trim(strip_tags(html_entity_decode($item->title))) : '',
!empty($item->description) ? trim(strip_tags(html_entity_decode($item->description))) : ''
],
$feed->item->template
) . PHP_EOL
)
) . PHP_EOL,
FILE_APPEND | LOCK_EX
);
$i++;
}
}
Loading…
Cancel
Save