diff --git a/.gitignore b/.gitignore index 3accf68..afc499a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ README.html *.pyc *.swp *~ +*.conf diff --git a/README.md b/README.md index afd64e4..c22d281 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,17 @@ -Feed2twister is a simple script to post items from RSS/ATOM feeds to [Twister](http://twister.net.co). +Feed2twister is a simple script to post items from RSS/ATOM feeds to +[Twister](http://twister.net.co). + +---- + +**Note:** If you upgrade an old installation where you don't have +`feed2twister.conf` yet, use a copy of `feed2twister.conf.example`s and +edit it so that it contains similar settings to those you had at `conf.py` + +---- ### Prerequisites +* Python 2 * [Twister](http://twister.net.co/) (of course) * [python-bitcoinrpc](https://pypi.python.org/pypi/python-bitcoinrpc/) * [feedparser](https://pypi.python.org/pypi/feedparser/) @@ -9,22 +19,47 @@ Feed2twister is a simple script to post items from RSS/ATOM feeds to [Twister](h ### Installing - * run `git submodule update --init` - (to install a [patched version](https://github.com/thedod/python-bitcoinrpc/tree/unicode-fix-for-twister) - of bitcoin-rpc (a twister-related unicode fix). - If you don't have git(?) you can [download the zip](https://github.com/thedod/python-bitcoinrpc/archive/unicode-fix-for-twister.zip), - and copy the bitcoinrpc directory into this directory (overwrite whatever you have ther now. probably an empty folder). + * run `git submodule update --init` (to install a [patched + version](https://github.com/thedod/python-bitcoinrpc/tree/unicode-fix-for-twister) + of bitcoin-rpc (a twister-related unicode fix). If you don't have + git(?) you can [download the + zip](https://github.com/thedod/python-bitcoinrpc/archive/unicode-fix-for-twister.zip), + and copy the bitcoinrpc directory into this directory (overwrite + whatever you have ther now. probably an empty folder). - * Copy `config-example.py` to `config.py` and edit it to taste. + * Copy `feed2twister.conf.example` to `feed2twister.conf` and edit it + to taste. ### Running -Normally, you would run this as a cron task: `cd /path/to/this ; python feed2twister.py` [`N`] +Normally, you would run this as a cron task: +`/path/to/this/feed2twister.py [-c CONFIGFILE] [N]` + +if [optional] `N` is supplied, it's used as the maximum items to post +(per feed). Default is (by presence order) max_new_items_per_feed from +conf file or 0. + +If there are more than `N` new items in a feed, "over quota" items get +marked as if they were posted (this can be handy when you add a new feed +with a long history). + +Specifically, `python feed2twister.py 0` would make all feeds "catch up" +without posting anything. + +if [optional] `CONFIGFILE` is supplied, it is used as a custom config +file, instead of the first file found in the following list: +`./feed2twister.conf`, `~/.config/feed2twister.conf`, +`~/.feed2twister.conf` + -if [optional] `N` is supplied, it's used as the maximum items to post (per feed). Default is `conf.MAX_NEW_ITEMS_PER_FEED`. +### Hidden configuration -If there are more than `N` new items in a feed, "over quota" items get marked as if they were posted -(this can be handy when you add a new feed with a long history). +Some more options are available: -Specifically, `python feed2twister.py 0` would make all feeds "catch up" without posting anything. +* `do_not_include_link` (boolean, default False): if True, feed2twister + will NOT prepend the feed item title with the item link before posting + it to twister. +* `skip_first_title_char` (int, default None): if set, feed2twister will + strip this amount of character from the begining of the feed item + title. Usefull to skip your login from app.net feed for example. diff --git a/conf-example.py b/conf-example.py deleted file mode 100644 index 4f0f73e..0000000 --- a/conf-example.py +++ /dev/null @@ -1,14 +0,0 @@ -import logging -logging.basicConfig(level=logging.INFO) -#logging.basicConfig(level=logging.ERROR) # For deployment. It's on a don't wanna know basis :) -USERNAME = 'MYTWISTERUSERNAME' # e.g 'thedod' -RPC_URL = 'http://MYRPCUSER:MYRPCPASSWORD@127.0.0.1:28332' # change to rpcuser and rpcpassword from ~/.twister/twister.conf -DB_FILENAME = 'items.db' # db is mainly there to keep track of "what not to post again" :) (debugging too, I guess) -MAX_URL_LENGTH = 100 # this leaves 36 characters and a ... to get to 140. If we don't have that, we skip the item :( -MAX_NEW_ITEMS_PER_FEED = 3 # we don't want to flood more than that in a single run. -USE_SHORTENER = False # to enable this, you need gdshortener: https://github.com/torre76/gd_shortener/ -SHORTENER_STATS = True # tell is.gd to publicly show statistics for the shortened url -FEEDS = [ # Use your own feeds, of course :) - 'https://swatwt.com/favs/rss/en', - 'https://github.com/thedod.atom' -] diff --git a/feed2twister.conf.example b/feed2twister.conf.example new file mode 100644 index 0000000..b434b44 --- /dev/null +++ b/feed2twister.conf.example @@ -0,0 +1,26 @@ +[DEFAULT] +# e.g 'thedod' +username = MYTWISTERUSERNAME +# change to rpcuser and rpcpassword from ~/.twister/twister.conf +rpc_url = http://MYRPCUSER:MYRPCPASSWORD@127.0.0.1:28332 +# db is mainly there to keep track of "what not to post again" :) (debugging too, I guess) +db_filename = ~/.feed2twister.db +# this leaves 36 characters and a ... to get to 140. If we don't have that, we skip the item :( +max_url_length = 100 +# Use your own feeds, of course :) +feeds = https://swatwt.com/favs/rss/en + https://github.com/thedod.atom + https://github.com/milouse.atom + + +# All the following options are optional + +# For deployment. It's on a don't wanna know basis :) +# logging_level = error +logging_level = debug +# we don't want to flood more than that in a single run. +max_new_items_per_feed = 3 +# to enable this, you need gdshortener: https://github.com/torre76/gd_shortener/ +use_shortener = False +# tell is.gd to publicly show statistics for the shortened url +shortener_stats = True diff --git a/feed2twister.py b/feed2twister.py old mode 100644 new mode 100755 index 13eaaab..bf40c02 --- a/feed2twister.py +++ b/feed2twister.py @@ -1,13 +1,52 @@ -from conf import * -import feedparser,anydbm,sys +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +import os,feedparser,anydbm,argparse,ConfigParser from bitcoinrpc.authproxy import AuthServiceProxy -if USE_SHORTENER: +SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) + +arg_parser = argparse.ArgumentParser(description='Feed2twister is a simple script to post items from RSS/ATOM feeds to Twister.') +arg_parser.add_argument('--config', '-c', help='Alternate config file. Default is {0}.'.format(os.path.join(SCRIPT_PATH, 'feed2twister.conf'))) +arg_parser.add_argument('maxitems', metavar='N', type=int, nargs='?', default=None, + help="""Maximum items to post (per feed). Default is 0. +If there are more than N new items in a feed, "over quota" items get marked as if they were posted (this can be handy when you add a new feed with a long history). Specifically, %(prog)s 0 would make all feeds "catch up" without posting anything.""") +args = arg_parser.parse_args() + +from xdg.BaseDirectory import xdg_config_home +main_config_file = ConfigParser.ConfigParser() +if args.config: + main_config_file.read([os.path.expanduser(args.config)]) +else: + main_config_file.read([os.path.join(SCRIPT_PATH, 'feed2twister.conf'), os.path.join(xdg_config_home, 'feed2twister.conf'), os.path.expanduser('~/.feed2twister.conf')]) +main_config = main_config_file.defaults() + +def get_bool_conf_option(option): + if option in main_config and main_config[option]: + v = main_config[option] + return str(v).lower() in ('yes', 'true', 't', '1') + return False + +def get_array_conf_option(option): + if option in main_config and main_config[option]: + return main_config[option].split("\n") + return [] + +import logging +log_level = logging.ERROR +if 'logging_level' in main_config and main_config['logging_level']: + log_level = main_config['logging_level'] + log_level = getattr(logging, log_level.upper()) + +logging.basicConfig(level=log_level) + +if get_bool_conf_option('use_shortener'): try: import gdshortener except ImportError: USE_SHORTENER = False + ### truncated_utf8() is based on http://stackoverflow.com/a/13738452 def _is_utf8_lead_byte(b): '''A UTF-8 intermediate byte starts with the bits 10xxxxxx.''' @@ -31,53 +70,73 @@ def get_next_k(twister,username): return 0 def main(max_items): - db = anydbm.open(DB_FILENAME,'c') - twister = AuthServiceProxy(RPC_URL) - for feed_url in FEEDS: + db = anydbm.open(os.path.expanduser(main_config['db_filename']),'c') + twister = AuthServiceProxy(main_config['rpc_url']) + + for feed_url in get_array_conf_option('feeds'): logging.info(feed_url) feed = feedparser.parse(feed_url) n_items = 0 + for e in feed.entries: eid = '{0}|{1}'.format(feed_url,e.id) + if db.has_key(eid): # been there, done that (or not - for a reason) logging.debug('Skipping duplicate {0}'.format(eid)) + else: # format as a <=140 character string - # Construct the link, possibly with shortener - entry_url = gdshortener.ISGDShortener().shorten(url=e.link, log_stat=SHORTENER_STATS)[0] if USE_SHORTENER else e.link - if len(entry_url)<=MAX_URL_LENGTH: - msg = u'{0} {1}'.format(entry_url,e.title) - if len(msg)>140: # Truncate (and hope it's still meaningful) - msg = msg[:137]+u'...' - else: # Link too long. Not enough space left for text :( - msg = '' + if not get_bool_conf_option('do_not_include_link'): + # Construct the link, possibly with shortener + entry_url = gdshortener.ISGDShortener().shorten(url=e.link, log_stat=get_bool_conf_option('shortener_stats'))[0] if get_bool_conf_option('use_shortener') else e.link + + if len(entry_url) <= int(main_config['max_url_length']): + msg = u'{0} {1}'.format(entry_url,e.title) + + else: # Link too long. Not enough space left for text :( + msg = '' + + else: + entry_title = e.title + if 'skip_first_title_char' in main_config and main_config['skip_first_title_char']: + entry_title = entry_title[int(main_config['skip_first_title_char']):] + + msg = u'{0}'.format(entry_title) + + + if len(msg)>140: # Truncate (and hope it's still meaningful) + msg = msg[:137]+u'...' + + utfmsg = truncated_utf8(msg,140)# limit is 140 utf-8 bytes (not chars) msg = unicode(utfmsg,'utf-8') # AuthServiceProxy needs unicode [we just needed to know where to truncate, and that's utf-8] db[eid] = utfmsg # anydbm, on the other hand, can't handle unicode, so it's a good thing we've also kept the utf-8 :) if not msg: # We've marked it as "posted", but no sense really posting it. logging.warn(u'Link too long at {0}'.format(eid)) continue - if n_items>=max_items: # Avoid accidental flooding + + if n_items >= max_items: # Avoid accidental flooding logging.warn(u'Skipping "over quota" item: {0}'.format(msg)) continue + logging.info(u'posting {0}'.format(msg)) + try: - twister.newpostmsg(USERNAME,get_next_k(twister,USERNAME),msg) + twister.newpostmsg(main_config['username'], + get_next_k(twister, + main_config['username']), + msg) except Exception,e: logging.error(`e`) # usually not very informative :( + n_items+=1 + + if __name__=='__main__': - if len(sys.argv)>1: - if len(sys.argv)>2 or not sys.argv[1].isdigit(): - sys.stderr.write("""Usage: {cmd} [N] -if [optional] N is supplied, it's used as the maximum items to post (per feed). Default is {n}. -If there are more than N new items in a feed, "over quota" items get marked as if they were posted -(this can be handy when you add a new feed with a long history). -Specifically, {cmd} 0 would make all feeds "catch up" without posting anything. -""".format(cmd=sys.argv[0],n=MAX_NEW_ITEMS_PER_FEED)) - sys.exit(-1) - else: - n = int(sys.argv[1]) + if args.maxitems != None: + n = args.maxitems + elif 'max_new_items_per_feed' in main_config and main_config['max_new_items_per_feed']: + n = int(main_config['max_new_items_per_feed']) else: - n = MAX_NEW_ITEMS_PER_FEED + n = 0 main(n)