From a09f941a552a10d5da9658826a3aa64d3cace6c6 Mon Sep 17 00:00:00 2001 From: yggverse Date: Fri, 19 Apr 2024 16:53:12 +0300 Subject: [PATCH] initial commit --- .gitignore | 4 ++ README.md | 33 ++++++++++- composer.json | 19 +++++++ example/config.json | 23 ++++++++ src/crawler.php | 132 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 composer.json create mode 100644 example/config.json create mode 100644 src/crawler.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f9e733a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/composer.lock +/config.json +/data/ +/vendor/ diff --git a/README.md b/README.md index f709247..aaaea0c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,33 @@ # Pulsar -RSS Aggregator for Gemini Protocol + +RSS Aggregator for [Gemini Protocol](https://geminiprotocol.net) + +Simple RSS feed converter to static Gemtext format, useful for news portals or localhost usage. + +## Usage + +1. `git clone https://github.com/YGGverse/Pulsar.git` +2. `cp example/config.json config.json` - setup your feeds there! +3. `php src/crawler.php` - crontab schedule + +## Config + +Configuration file supports multiple feed channels with custom configurations: + +* `source` - string, filepath or URL to the valid RSS feed +* `target` - string, relative or absolute path to Gemtext dumps +* `item` + * `limit` - integer, how many items to display + * `template` - string, custom pattern for feed item, that supports following macros + * `{nl}` - new line separator + * `{link}` - item link + * `{guid}` - item guid + * `{pubDate}` - item pubDate, soon with custom time format e.g. `{pubDate:Y-m-d H:s}` + * `{title}` - item title + * `{description}` - item description + +Resulting files could be generated to the any folder for personal reading on localhost, or shared with others using [gmid](https://github.com/omar-polo/gmid), [twins](https://code.rocket9labs.com/tslocum/twins) or any other [Gemini server](https://github.com/kr1sp1n/awesome-gemini#servers). + +## Instances + +Coming soon! \ No newline at end of file diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..1d19097 --- /dev/null +++ b/composer.json @@ -0,0 +1,19 @@ +{ + "name": "yggverse/pulsar", + "description": "RSS Aggregator for Gemini Protocol", + "keywords": [ "yggverse", "gemini", "gemini-protocol", "gemtext", "gmi", "rss", "feed", "converter", "aggregator" ], + "homepage": "https://github.com/yggverse/pulsar", + "type": "project", + "license": "MIT", + "autoload": { + "psr-4": { + "Yggverse\\Pulsar\\": "src/" + } + }, + "authors": [ + { + "name": "YGGverse" + } + ], + "require": {} +} diff --git a/example/config.json b/example/config.json new file mode 100644 index 0000000..210511c --- /dev/null +++ b/example/config.json @@ -0,0 +1,23 @@ +{ + "feed": + [ + { + "source":"https://www.omglinux.com/feed", + "target":"data/omglinux.com/feed.gmi", + "item": + { + "template":"=> {link} {title}{nl}{nl}{description}", + "limit":20 + } + }, + { + "source":"https://omgubuntu.co.uk/feed", + "target":"data/omgubuntu.co.uk/feed.gmi", + "item": + { + "template":"=> {link} {title}{nl}{nl}{description}", + "limit":20 + } + } + ] +} \ No newline at end of file diff --git a/src/crawler.php b/src/crawler.php new file mode 100644 index 0000000..5222575 --- /dev/null +++ b/src/crawler.php @@ -0,0 +1,132 @@ +feed as $feed) +{ + // Init feed location + $filename = str_starts_with( + $feed->target, + DIRECTORY_SEPARATOR + ) ? $feed->target : __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $feed->target; + + // Init destination storage + @mkdir( + dirname( + $filename + ), + 0755, + true + ); + + // Get feed data + if (!$channel = simplexml_load_file($feed->source)->channel) + { + continue; + } + + // Update title + if (!empty($channel->title)) + { + $title = trim( + strip_tags( + html_entity_decode( + $channel->title + ) + ) + ); + } + + else + { + $title = parse_url( + $feed->source, + PHP_URL_HOST + ); + } + + file_put_contents( + $filename, + sprintf( + '# %s', + $title + ) . PHP_EOL + ); + + // Append description + if (!empty($channel->description)) + { + file_put_contents( + $filename, + PHP_EOL . trim( + strip_tags( + html_entity_decode( + $channel->description + ) + ) + ) . PHP_EOL, + FILE_APPEND | LOCK_EX + ); + } + + // Append items + $i = 1; foreach ($channel->item as $item) + { + // Apply items limit + if ($i > $feed->item->limit) + { + break; + } + + // Format item + file_put_contents( + $filename, + PHP_EOL . trim( + preg_replace( + '/[\s]{3,}/ui', + PHP_EOL . PHP_EOL, + str_replace( + [ + '{nl}', + '{link}', + '{guid}', + '{pubDate}', + '{title}', + '{description}' + ], + [ + PHP_EOL, + !empty($item->link) ? trim($item->link) : '', + !empty($item->guid) ? trim($item->guid) : '', + !empty($item->pubDate) ? trim($item->pubDate) : '', + !empty($item->title) ? trim(strip_tags(html_entity_decode($item->title))) : '', + !empty($item->description) ? trim(strip_tags(html_entity_decode($item->description))) : '' + ], + $feed->item->template + ) . PHP_EOL + ) + ) . PHP_EOL, + FILE_APPEND | LOCK_EX + ); + + $i++; + } +} \ No newline at end of file