mirror of
https://github.com/twisterarmy/feed2twister.git
synced 2025-02-01 01:24:21 +00:00
Big refactoring in order to use argparse and ConfigParser module instead of using python import. Add possibilities to use custom config files
This commit is contained in:
parent
823d8d36fa
commit
c2c40b144e
@ -15,15 +15,17 @@ Feed2twister is a simple script to post items from RSS/ATOM feeds to [Twister](h
|
||||
If you don't have git(?) you can [download the zip](https://github.com/thedod/python-bitcoinrpc/archive/unicode-fix-for-twister.zip),
|
||||
and copy the bitcoinrpc directory into this directory (overwrite whatever you have ther now. probably an empty folder).
|
||||
|
||||
* Copy `conf-example.py` to `conf.py` and edit it to taste.
|
||||
* Copy `feed2twister.conf.example` to `feed2twister.conf` and edit it to taste.
|
||||
|
||||
### Running
|
||||
|
||||
Normally, you would run this as a cron task: `cd /path/to/this ; python feed2twister.py` [`N`]
|
||||
Normally, you would run this as a cron task: `cd /path/to/this ; python feed2twister.py` [`-n N`] [`-c CONFIGFILE`]
|
||||
|
||||
if [optional] `N` is supplied, it's used as the maximum items to post (per feed). Default is `conf.MAX_NEW_ITEMS_PER_FEED`.
|
||||
if [optional] `N` is supplied, it's used as the maximum items to post (per feed). Default is `0`.
|
||||
|
||||
If there are more than `N` new items in a feed, "over quota" items get marked as if they were posted
|
||||
(this can be handy when you add a new feed with a long history).
|
||||
|
||||
Specifically, `python feed2twister.py 0` would make all feeds "catch up" without posting anything.
|
||||
|
||||
if [optional] `CONFIGFILE` is supplied, it is used as a custom config file, instead of the first file found in the following list: ./feed2twister.conf, ~/.config/feed2twister.conf, ~/.feed2twister.conf
|
@ -1,14 +0,0 @@
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
#logging.basicConfig(level=logging.ERROR) # For deployment. It's on a don't wanna know basis :)
|
||||
USERNAME = 'MYTWISTERUSERNAME' # e.g 'thedod'
|
||||
RPC_URL = 'http://MYRPCUSER:MYRPCPASSWORD@127.0.0.1:28332' # change to rpcuser and rpcpassword from ~/.twister/twister.conf
|
||||
DB_FILENAME = 'items.db' # db is mainly there to keep track of "what not to post again" :) (debugging too, I guess)
|
||||
MAX_URL_LENGTH = 100 # this leaves 36 characters and a ... to get to 140. If we don't have that, we skip the item :(
|
||||
MAX_NEW_ITEMS_PER_FEED = 3 # we don't want to flood more than that in a single run.
|
||||
USE_SHORTENER = False # to enable this, you need gdshortener: https://github.com/torre76/gd_shortener/
|
||||
SHORTENER_STATS = True # tell is.gd to publicly show statistics for the shortened url
|
||||
FEEDS = [ # Use your own feeds, of course :)
|
||||
'https://swatwt.com/favs/rss/en',
|
||||
'https://github.com/thedod.atom'
|
||||
]
|
22
feed2twister.conf.example
Normal file
22
feed2twister.conf.example
Normal file
@ -0,0 +1,22 @@
|
||||
[DEFAULT]
|
||||
# For deployment. It's on a don't wanna know basis :)
|
||||
# logging_level = error
|
||||
logging_level = debug
|
||||
# e.g 'thedod'
|
||||
username = MYTWISTERUSERNAME
|
||||
# change to rpcuser and rpcpassword from ~/.twister/twister.conf
|
||||
rpc_url = http://MYRPCUSER:MYRPCPASSWORD@127.0.0.1:28332
|
||||
# db is mainly there to keep track of "what not to post again" :) (debugging too, I guess)
|
||||
db_filename = items.db
|
||||
# this leaves 36 characters and a ... to get to 140. If we don't have that, we skip the item :(
|
||||
max_url_length = 100
|
||||
# we don't want to flood more than that in a single run.
|
||||
max_new_items_per_feed = 3
|
||||
# to enable this, you need gdshortener: https://github.com/torre76/gd_shortener/
|
||||
use_shortener = False
|
||||
# tell is.gd to publicly show statistics for the shortened url
|
||||
shortener_stats = True
|
||||
# Use your own feeds, of course :)
|
||||
feeds = https://swatwt.com/favs/rss/en
|
||||
https://github.com/thedod.atom
|
||||
https://github.com/milouse.atom
|
@ -1,13 +1,49 @@
|
||||
from conf import *
|
||||
import feedparser,anydbm,sys
|
||||
import os,feedparser,anydbm,argparse,ConfigParser
|
||||
from bitcoinrpc.authproxy import AuthServiceProxy
|
||||
|
||||
if USE_SHORTENER:
|
||||
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
arg_parser = argparse.ArgumentParser(description='Feed2twister is a simple script to post items from RSS/ATOM feeds to Twister.')
|
||||
arg_parser.add_argument('--config', '-c', help='Alternate config file. Default is {0}.'.format(os.path.join(SCRIPT_PATH, 'feed2twister.conf')))
|
||||
arg_parser.add_argument('--maxitems', '-n', type=int, metavar='N',
|
||||
help="""Maximum items to post (per feed). Default is 0.
|
||||
If there are more than N new items in a feed, "over quota" items get marked as if they were posted (this can be handy when you add a new feed with a long history). Specifically, %(prog)s 0 would make all feeds "catch up" without posting anything.""")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
from xdg.BaseDirectory import xdg_config_home
|
||||
main_config_file = ConfigParser.ConfigParser()
|
||||
if args.config:
|
||||
main_config_file.read([os.path.expanduser(args.config)])
|
||||
else:
|
||||
main_config_file.read([os.path.join(SCRIPT_PATH, 'feed2twister.conf'), os.path.join(xdg_config_home, 'feed2twister.conf'), os.path.expanduser('~/.feed2twister.conf')])
|
||||
main_config = main_config_file.defaults()
|
||||
|
||||
def get_bool_conf_option(option):
|
||||
if main_config[option]:
|
||||
v = main_config[option]
|
||||
return str(v).lower() in ('yes', 'true', 't', '1')
|
||||
return False
|
||||
|
||||
def get_array_conf_option(option):
|
||||
if main_config[option]:
|
||||
return main_config[option].split("\n")
|
||||
return []
|
||||
|
||||
import logging
|
||||
log_level = logging.ERROR
|
||||
if main_config['logging_level']:
|
||||
log_level = main_config['logging_level']
|
||||
log_level = getattr(logging, log_level.upper())
|
||||
|
||||
logging.basicConfig(level=log_level)
|
||||
|
||||
if get_bool_conf_option('use_shortener'):
|
||||
try:
|
||||
import gdshortener
|
||||
except ImportError:
|
||||
USE_SHORTENER = False
|
||||
|
||||
|
||||
### truncated_utf8() is based on http://stackoverflow.com/a/13738452
|
||||
def _is_utf8_lead_byte(b):
|
||||
'''A UTF-8 intermediate byte starts with the bits 10xxxxxx.'''
|
||||
@ -31,53 +67,64 @@ def get_next_k(twister,username):
|
||||
return 0
|
||||
|
||||
def main(max_items):
|
||||
db = anydbm.open(DB_FILENAME,'c')
|
||||
twister = AuthServiceProxy(RPC_URL)
|
||||
for feed_url in FEEDS:
|
||||
db = anydbm.open(main_config['db_filename'],'c')
|
||||
twister = AuthServiceProxy(main_config['rpc_url'])
|
||||
|
||||
for feed_url in get_array_conf_option('feeds'):
|
||||
logging.info(feed_url)
|
||||
feed = feedparser.parse(feed_url)
|
||||
n_items = 0
|
||||
|
||||
for e in feed.entries:
|
||||
eid = '{0}|{1}'.format(feed_url,e.id)
|
||||
|
||||
if db.has_key(eid): # been there, done that (or not - for a reason)
|
||||
logging.debug('Skipping duplicate {0}'.format(eid))
|
||||
|
||||
else: # format as a <=140 character string
|
||||
# Construct the link, possibly with shortener
|
||||
entry_url = gdshortener.ISGDShortener().shorten(url=e.link, log_stat=SHORTENER_STATS)[0] if USE_SHORTENER else e.link
|
||||
if len(entry_url)<=MAX_URL_LENGTH:
|
||||
entry_url = gdshortener.ISGDShortener().shorten(url=e.link, log_stat=get_bool_conf_option('shortener_stats'))[0] if get_bool_conf_option('use_shortener') else e.link
|
||||
|
||||
if len(entry_url) <= int(main_config['max_url_length']):
|
||||
msg = u'{0} {1}'.format(entry_url,e.title)
|
||||
|
||||
if len(msg)>140: # Truncate (and hope it's still meaningful)
|
||||
msg = msg[:137]+u'...'
|
||||
|
||||
else: # Link too long. Not enough space left for text :(
|
||||
msg = ''
|
||||
|
||||
|
||||
utfmsg = truncated_utf8(msg,140)# limit is 140 utf-8 bytes (not chars)
|
||||
msg = unicode(utfmsg,'utf-8') # AuthServiceProxy needs unicode [we just needed to know where to truncate, and that's utf-8]
|
||||
db[eid] = utfmsg # anydbm, on the other hand, can't handle unicode, so it's a good thing we've also kept the utf-8 :)
|
||||
if not msg: # We've marked it as "posted", but no sense really posting it.
|
||||
logging.warn(u'Link too long at {0}'.format(eid))
|
||||
continue
|
||||
if n_items>=max_items: # Avoid accidental flooding
|
||||
|
||||
if n_items >= max_items: # Avoid accidental flooding
|
||||
logging.warn(u'Skipping "over quota" item: {0}'.format(msg))
|
||||
continue
|
||||
|
||||
logging.info(u'posting {0}'.format(msg))
|
||||
|
||||
try:
|
||||
twister.newpostmsg(USERNAME,get_next_k(twister,USERNAME),msg)
|
||||
twister.newpostmsg(main_config['username'],
|
||||
get_next_k(twister,
|
||||
main_config['username']),
|
||||
msg)
|
||||
except Exception,e:
|
||||
logging.error(`e`) # usually not very informative :(
|
||||
|
||||
n_items+=1
|
||||
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
if len(sys.argv)>1:
|
||||
if len(sys.argv)>2 or not sys.argv[1].isdigit():
|
||||
sys.stderr.write("""Usage: {cmd} [N]
|
||||
if [optional] N is supplied, it's used as the maximum items to post (per feed). Default is {n}.
|
||||
If there are more than N new items in a feed, "over quota" items get marked as if they were posted
|
||||
(this can be handy when you add a new feed with a long history).
|
||||
Specifically, {cmd} 0 would make all feeds "catch up" without posting anything.
|
||||
""".format(cmd=sys.argv[0],n=MAX_NEW_ITEMS_PER_FEED))
|
||||
sys.exit(-1)
|
||||
else:
|
||||
n = int(sys.argv[1])
|
||||
if args.maxitems:
|
||||
n = args.maxitems
|
||||
elif main_config['max_new_items_per_feed']:
|
||||
n = int(main_config['max_new_items_per_feed'])
|
||||
else:
|
||||
n = MAX_NEW_ITEMS_PER_FEED
|
||||
n = 0
|
||||
main(n)
|
||||
|
Loading…
x
Reference in New Issue
Block a user