diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..07017cc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "dependencies/python-bitcoinrpc"] + path = dependencies/python-bitcoinrpc + url = https://github.com/thedod/python-bitcoinrpc.git diff --git a/README.md b/README.md index 5b108ea..50a4e72 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,17 @@ Feed2twister is a simple script to post items from RSS/ATOM feeds to [Twister](h ### Installing -Copy `config-example.py` to `config.py` and edit it to taste. + * run `git submodule update --init` + (to install a [patched version](https://github.com/thedod/python-bitcoinrpc/tree/unicode-fix-for-twister) + of bitcoin-rpc (a twister-related unicode fix). + If you don't have git(?) you can [download the zip](https://github.com/thedod/python-bitcoinrpc/archive/unicode-fix-for-twister.zip), + and copy the bitcoinrpc directory into this directory (overwrite whatever you have ther now. probably an empty folder). + + * Copy `config-example.py` to `config.py` and edit it to taste. ### Running -Normally, you would run this as a cron task: `python feed2twister.py` [`N`] +Normally, you would run this as a cron task: `cd /path/to/this ; python feed2twister.py` [`N`] if [optional] `N` is supplied, it's used as the maximum items to post (per feed). Default is `conf.MAX_NEW_ITEMS_PER_FEED`. diff --git a/bitcoinrpc b/bitcoinrpc new file mode 120000 index 0000000..bd72d1c --- /dev/null +++ b/bitcoinrpc @@ -0,0 +1 @@ +dependencies/python-bitcoinrpc/bitcoinrpc \ No newline at end of file diff --git a/dependencies/python-bitcoinrpc b/dependencies/python-bitcoinrpc new file mode 160000 index 0000000..666d96a --- /dev/null +++ b/dependencies/python-bitcoinrpc @@ -0,0 +1 @@ +Subproject commit 666d96a2ceadf173f8099ae08ebf5eb831c16312 diff --git a/feed2twister.py b/feed2twister.py index 3258ead..4ee8430 100644 --- a/feed2twister.py +++ b/feed2twister.py @@ -2,6 +2,22 @@ from conf import * import feedparser,anydbm,sys from bitcoinrpc.authproxy import AuthServiceProxy +### truncated_utf8() is based on http://stackoverflow.com/a/13738452 +def _is_utf8_lead_byte(b): + '''A UTF-8 intermediate byte starts with the bits 10xxxxxx.''' + return (ord(b) & 0xC0) != 0x80 + +def truncated_utf8(text,max_bytes,ellipsis='\xe2\x80\xa6'): + '''If text[max_bytes] is not a lead byte, back up until a lead byte is + found and truncate before that character.''' + utf8 = text.encode('utf8') + if len(utf8) <= max_bytes: + return utf8 + i = max_bytes-len(ellipsis) + while i > 0 and not _is_utf8_lead_byte(utf8[i]): + i -= 1 + return utf8[:i]+ellipsis + def get_next_k(twister,username): try: return twister.getposts(1,[{'username':username}])[0]['userpost']['k']+1 @@ -26,7 +42,9 @@ def main(max_items): msg = msg[:137]+u'...' else: # Link too long. Not enough space left for text :( msg = '' - db[eid] = msg.encode('utf8') # Anydbm can't do unicode. utf8 may become >140, but it doesn't matter ;) + utfmsg = truncated_utf8(msg,140)# limit is 140 utf-8 bytes (not chars) + msg = unicode(utfmsg,'utf-8') # AuthServiceProxy needs unicode [we just needed to know where to truncate, and that's utf-8] + db[eid] = utfmsg # anydbm, on the other hand, can't handle unicode, so it's a good thing we've also kept the utf-8 :) if not msg: # We've marked it as "posted", but no sense really posting it. logging.warn(u'Link too long at {0}'.format(eid)) continue @@ -36,8 +54,8 @@ def main(max_items): logging.info(u'posting {0}'.format(msg)) try: twister.newpostmsg(USERNAME,get_next_k(twister,USERNAME),msg) - except Exception,e: # To do: find out why some unicode chars screw this and how to do this "right" - logging.error(`e`) + except Exception,e: + logging.error(`e`) # usually not very informative :( n_items+=1 if __name__=='__main__':