Browse Source

Big refactoring in order to use argparse and ConfigParser module instead of using python import. Add possibilities to use custom config files

master
Étienne Deparis 10 years ago
parent
commit
c2c40b144e
  1. 8
      README.md
  2. 14
      conf-example.py
  3. 22
      feed2twister.conf.example
  4. 91
      feed2twister.py

8
README.md

@ -15,15 +15,17 @@ Feed2twister is a simple script to post items from RSS/ATOM feeds to [Twister](h @@ -15,15 +15,17 @@ Feed2twister is a simple script to post items from RSS/ATOM feeds to [Twister](h
If you don't have git(?) you can [download the zip](https://github.com/thedod/python-bitcoinrpc/archive/unicode-fix-for-twister.zip),
and copy the bitcoinrpc directory into this directory (overwrite whatever you have ther now. probably an empty folder).
* Copy `conf-example.py` to `conf.py` and edit it to taste.
* Copy `feed2twister.conf.example` to `feed2twister.conf` and edit it to taste.
### Running
Normally, you would run this as a cron task: `cd /path/to/this ; python feed2twister.py` [`N`]
Normally, you would run this as a cron task: `cd /path/to/this ; python feed2twister.py` [`-n N`] [`-c CONFIGFILE`]
if [optional] `N` is supplied, it's used as the maximum items to post (per feed). Default is `conf.MAX_NEW_ITEMS_PER_FEED`.
if [optional] `N` is supplied, it's used as the maximum items to post (per feed). Default is `0`.
If there are more than `N` new items in a feed, "over quota" items get marked as if they were posted
(this can be handy when you add a new feed with a long history).
Specifically, `python feed2twister.py 0` would make all feeds "catch up" without posting anything.
if [optional] `CONFIGFILE` is supplied, it is used as a custom config file, instead of the first file found in the following list: ./feed2twister.conf, ~/.config/feed2twister.conf, ~/.feed2twister.conf

14
conf-example.py

@ -1,14 +0,0 @@ @@ -1,14 +0,0 @@
import logging
logging.basicConfig(level=logging.INFO)
#logging.basicConfig(level=logging.ERROR) # For deployment. It's on a don't wanna know basis :)
USERNAME = 'MYTWISTERUSERNAME' # e.g 'thedod'
RPC_URL = 'http://MYRPCUSER:MYRPCPASSWORD@127.0.0.1:28332' # change to rpcuser and rpcpassword from ~/.twister/twister.conf
DB_FILENAME = 'items.db' # db is mainly there to keep track of "what not to post again" :) (debugging too, I guess)
MAX_URL_LENGTH = 100 # this leaves 36 characters and a ... to get to 140. If we don't have that, we skip the item :(
MAX_NEW_ITEMS_PER_FEED = 3 # we don't want to flood more than that in a single run.
USE_SHORTENER = False # to enable this, you need gdshortener: https://github.com/torre76/gd_shortener/
SHORTENER_STATS = True # tell is.gd to publicly show statistics for the shortened url
FEEDS = [ # Use your own feeds, of course :)
'https://swatwt.com/favs/rss/en',
'https://github.com/thedod.atom'
]

22
feed2twister.conf.example

@ -0,0 +1,22 @@ @@ -0,0 +1,22 @@
[DEFAULT]
# For deployment. It's on a don't wanna know basis :)
# logging_level = error
logging_level = debug
# e.g 'thedod'
username = MYTWISTERUSERNAME
# change to rpcuser and rpcpassword from ~/.twister/twister.conf
rpc_url = http://MYRPCUSER:MYRPCPASSWORD@127.0.0.1:28332
# db is mainly there to keep track of "what not to post again" :) (debugging too, I guess)
db_filename = items.db
# this leaves 36 characters and a ... to get to 140. If we don't have that, we skip the item :(
max_url_length = 100
# we don't want to flood more than that in a single run.
max_new_items_per_feed = 3
# to enable this, you need gdshortener: https://github.com/torre76/gd_shortener/
use_shortener = False
# tell is.gd to publicly show statistics for the shortened url
shortener_stats = True
# Use your own feeds, of course :)
feeds = https://swatwt.com/favs/rss/en
https://github.com/thedod.atom
https://github.com/milouse.atom

91
feed2twister.py

@ -1,13 +1,49 @@ @@ -1,13 +1,49 @@
from conf import *
import feedparser,anydbm,sys
import os,feedparser,anydbm,argparse,ConfigParser
from bitcoinrpc.authproxy import AuthServiceProxy
if USE_SHORTENER:
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
arg_parser = argparse.ArgumentParser(description='Feed2twister is a simple script to post items from RSS/ATOM feeds to Twister.')
arg_parser.add_argument('--config', '-c', help='Alternate config file. Default is {0}.'.format(os.path.join(SCRIPT_PATH, 'feed2twister.conf')))
arg_parser.add_argument('--maxitems', '-n', type=int, metavar='N',
help="""Maximum items to post (per feed). Default is 0.
If there are more than N new items in a feed, "over quota" items get marked as if they were posted (this can be handy when you add a new feed with a long history). Specifically, %(prog)s 0 would make all feeds "catch up" without posting anything.""")
args = arg_parser.parse_args()
from xdg.BaseDirectory import xdg_config_home
main_config_file = ConfigParser.ConfigParser()
if args.config:
main_config_file.read([os.path.expanduser(args.config)])
else:
main_config_file.read([os.path.join(SCRIPT_PATH, 'feed2twister.conf'), os.path.join(xdg_config_home, 'feed2twister.conf'), os.path.expanduser('~/.feed2twister.conf')])
main_config = main_config_file.defaults()
def get_bool_conf_option(option):
if main_config[option]:
v = main_config[option]
return str(v).lower() in ('yes', 'true', 't', '1')
return False
def get_array_conf_option(option):
if main_config[option]:
return main_config[option].split("\n")
return []
import logging
log_level = logging.ERROR
if main_config['logging_level']:
log_level = main_config['logging_level']
log_level = getattr(logging, log_level.upper())
logging.basicConfig(level=log_level)
if get_bool_conf_option('use_shortener'):
try:
import gdshortener
except ImportError:
USE_SHORTENER = False
### truncated_utf8() is based on http://stackoverflow.com/a/13738452
def _is_utf8_lead_byte(b):
'''A UTF-8 intermediate byte starts with the bits 10xxxxxx.'''
@ -31,53 +67,64 @@ def get_next_k(twister,username): @@ -31,53 +67,64 @@ def get_next_k(twister,username):
return 0
def main(max_items):
db = anydbm.open(DB_FILENAME,'c')
twister = AuthServiceProxy(RPC_URL)
for feed_url in FEEDS:
db = anydbm.open(main_config['db_filename'],'c')
twister = AuthServiceProxy(main_config['rpc_url'])
for feed_url in get_array_conf_option('feeds'):
logging.info(feed_url)
feed = feedparser.parse(feed_url)
n_items = 0
for e in feed.entries:
eid = '{0}|{1}'.format(feed_url,e.id)
if db.has_key(eid): # been there, done that (or not - for a reason)
logging.debug('Skipping duplicate {0}'.format(eid))
else: # format as a <=140 character string
# Construct the link, possibly with shortener
entry_url = gdshortener.ISGDShortener().shorten(url=e.link, log_stat=SHORTENER_STATS)[0] if USE_SHORTENER else e.link
if len(entry_url)<=MAX_URL_LENGTH:
entry_url = gdshortener.ISGDShortener().shorten(url=e.link, log_stat=get_bool_conf_option('shortener_stats'))[0] if get_bool_conf_option('use_shortener') else e.link
if len(entry_url) <= int(main_config['max_url_length']):
msg = u'{0} {1}'.format(entry_url,e.title)
if len(msg)>140: # Truncate (and hope it's still meaningful)
msg = msg[:137]+u'...'
else: # Link too long. Not enough space left for text :(
msg = ''
utfmsg = truncated_utf8(msg,140)# limit is 140 utf-8 bytes (not chars)
msg = unicode(utfmsg,'utf-8') # AuthServiceProxy needs unicode [we just needed to know where to truncate, and that's utf-8]
db[eid] = utfmsg # anydbm, on the other hand, can't handle unicode, so it's a good thing we've also kept the utf-8 :)
if not msg: # We've marked it as "posted", but no sense really posting it.
logging.warn(u'Link too long at {0}'.format(eid))
continue
if n_items>=max_items: # Avoid accidental flooding
if n_items >= max_items: # Avoid accidental flooding
logging.warn(u'Skipping "over quota" item: {0}'.format(msg))
continue
logging.info(u'posting {0}'.format(msg))
try:
twister.newpostmsg(USERNAME,get_next_k(twister,USERNAME),msg)
twister.newpostmsg(main_config['username'],
get_next_k(twister,
main_config['username']),
msg)
except Exception,e:
logging.error(`e`) # usually not very informative :(
n_items+=1
if __name__=='__main__':
if len(sys.argv)>1:
if len(sys.argv)>2 or not sys.argv[1].isdigit():
sys.stderr.write("""Usage: {cmd} [N]
if [optional] N is supplied, it's used as the maximum items to post (per feed). Default is {n}.
If there are more than N new items in a feed, "over quota" items get marked as if they were posted
(this can be handy when you add a new feed with a long history).
Specifically, {cmd} 0 would make all feeds "catch up" without posting anything.
""".format(cmd=sys.argv[0],n=MAX_NEW_ITEMS_PER_FEED))
sys.exit(-1)
else:
n = int(sys.argv[1])
if args.maxitems:
n = args.maxitems
elif main_config['max_new_items_per_feed']:
n = int(main_config['max_new_items_per_feed'])
else:
n = MAX_NEW_ITEMS_PER_FEED
n = 0
main(n)

Loading…
Cancel
Save