From a3046784ac6933b3fe785b34adf0d2bf40411bb7 Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Tue, 10 Dec 2013 18:34:08 -0200 Subject: [PATCH] This sample script is a username crawler: it will obtain all known usernames from block chain and then try to download avatar and profiles for all of them. The report is shown as an html file. --- contrib/HTML.py | 70 +++++++++++++++++++++++ contrib/usernameCrawler.py | 110 +++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 contrib/HTML.py create mode 100755 contrib/usernameCrawler.py diff --git a/contrib/HTML.py b/contrib/HTML.py new file mode 100644 index 00000000..de8c192d --- /dev/null +++ b/contrib/HTML.py @@ -0,0 +1,70 @@ +from cgi import escape +class HTML(object): + '''Easily generate HTML. + + >>> h = HTML() + >>> p = h.p('hello, world!') + >>> p.text('more text') + >>> with h.table(border='1', newlines=True): + ... for i in range(2): + ... with h.tr: + ... h.td('helo', a='"foo"') + ... h.td('there') + ... + >>> print h +

hello, world!more text

+ + + +
he<l>lothere
he<l>lothere
+ + ''' + def __init__(self, name=None, stack=None): + self.name = name + self.content = [] + self.attrs = {} + # insert newlines between content? + self.newlines = False + if stack is None: + stack = [self] + self.stack = stack + def __getattr__(self, name): + # adding a new tag or newline + if name == 'newline': + e = '\n' + else: + e = HTML(name, self.stack) + self.stack[-1].content.append(e) + return e + def text(self, text): + # adding text + self.content.append(escape(text)) + def __call__(self, *content, **kw): + # customising a tag with content or attributes + if content: + self.content = map(escape, content) + if 'newlines' in kw: + # special-case to allow control over newlines + self.newlines = kw.pop('newlines') + for k in kw: + self.attrs[k] = escape(kw[k]).replace('"', '"') + return self + def __enter__(self): + # we're now adding tags to me! + self.stack.append(self) + return self + def __exit__(self, exc_type, exc_value, exc_tb): + # we're done adding tags to me! + self.stack.pop() + def __str__(self): + # turn me and my content into text + join = '\n' if self.newlines else '' + if self.name is None: + return join.join(map(str, self.content)) + a = ['%s="%s"'%i for i in self.attrs.items()] + l = [self.name] + a + s = '<%s>%s'%(' '.join(l), join) + if self.content: + s += join.join(map(str, self.content)) + s += join + ''%self.name + return s \ No newline at end of file diff --git a/contrib/usernameCrawler.py b/contrib/usernameCrawler.py new file mode 100755 index 00000000..c22ef3a8 --- /dev/null +++ b/contrib/usernameCrawler.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# +# This sample script is a username crawler: it will obtain all known usernames +# from block chain and then try to download avatar and profiles for all of +# them. The report is shown as an html file. +# +# Downloaded data is cached in a python pickle file, so it may be executed +# again and it won't need to get everything all over again (you may run it +# from cron scripts, for example) + +import sys, cPickle, time + +dbFileName = "usernameCrawler.pickle" +htmlFileName = "userlist.html" +cacheTimeout = 24*3600 + +try: + from bitcoinrpc.authproxy import AuthServiceProxy +except ImportError as exc: + sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n") + exit(-1) + +serverUrl = "http://user:pwd@127.0.0.1:28332" +if len(sys.argv) > 1: + serverUrl = sys.argv[1] + +twister = AuthServiceProxy(serverUrl) + +class User: + avatar = "" + fullname = "" + location = "" + updateTime = 0 + +class MyDb: + lastBlockHash = 0 + +try: + db = cPickle.load(open(dbFileName)) + nextHash = db.lastBlockHash +except: + db = MyDb() + db.usernames = {} + nextHash = twister.getblockhash(0) + +while True: + block = twister.getblock(nextHash) + db.lastBlockHash = block["hash"] + print str(block["height"]) + "\r", + usernames = block["usernames"] + for u in usernames: + if not db.usernames.has_key(u): + db.usernames[u] = User() + if block.has_key("nextblockhash"): + nextHash = block["nextblockhash"] + else: + break + +now = time.time() +for u in db.usernames.keys(): + if db.usernames[u].updateTime + cacheTimeout < now: + + print "getting avatar for", u, "..." + d = twister.dhtget(u,"avatar","s") + if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): + db.usernames[u].avatar = d[0]["p"]["v"] + + print "getting profile for", u, "..." + d = twister.dhtget(u,"profile","s") + if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"): + db.usernames[u].fullname = d[0]["p"]["v"]["fullname"] + db.usernames[u].location = d[0]["p"]["v"]["location"] + + db.usernames[u].updateTime = now + +cPickle.dump(db,open(dbFileName,"w")) + + +from HTML import HTML +from cgi import escape +def outputHtmlUserlist(fname, db, keys): + h = HTML() + head = h.head("") + with h.body(""): + with h.table(border='1', newlines=True): + with h.colgroup: + h.col(span="1", style="width: 64px;") + h.col(span="1", style="width: 130px;") + h.col(span="1", style="width: 250px;") + h.col(span="1", style="width: 250px;") + with h.tr: + h.th("avatar") + h.th("username") + h.th("fullname") + h.th("location") + for u in keys: + with h.tr: + with h.td(): + h.img('',src=escape(db.usernames[u].avatar), width="64", height="64") + h.td(u) + h.td(escape(db.usernames[u].fullname)) + h.td(escape(db.usernames[u].location)) + open(fname, "w").write(str(h)) + +print "Generating", htmlFileName, "..." + +keys = db.usernames.keys() +keys.sort() # sorted by username +outputHtmlUserlist(htmlFileName, db, keys) +