This sample script is a username crawler: it will obtain all known usernames

from block chain and then try to download avatar and profiles for all of
them. The report is shown as an html file.
This commit is contained in:
Miguel Freitas 2013-12-10 18:34:08 -02:00
parent fd404d0927
commit a3046784ac
2 changed files with 180 additions and 0 deletions

70
contrib/HTML.py Normal file
View File

@ -0,0 +1,70 @@
from cgi import escape
class HTML(object):
'''Easily generate HTML.
>>> h = HTML()
>>> p = h.p('hello, world!')
>>> p.text('more text')
>>> with h.table(border='1', newlines=True):
... for i in range(2):
... with h.tr:
... h.td('he<l>lo', a='"foo"')
... h.td('there')
...
>>> print h
<p>hello, world!more text</p>
<table border="1">
<tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
<tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
</table>
'''
def __init__(self, name=None, stack=None):
self.name = name
self.content = []
self.attrs = {}
# insert newlines between content?
self.newlines = False
if stack is None:
stack = [self]
self.stack = stack
def __getattr__(self, name):
# adding a new tag or newline
if name == 'newline':
e = '\n'
else:
e = HTML(name, self.stack)
self.stack[-1].content.append(e)
return e
def text(self, text):
# adding text
self.content.append(escape(text))
def __call__(self, *content, **kw):
# customising a tag with content or attributes
if content:
self.content = map(escape, content)
if 'newlines' in kw:
# special-case to allow control over newlines
self.newlines = kw.pop('newlines')
for k in kw:
self.attrs[k] = escape(kw[k]).replace('"', '"')
return self
def __enter__(self):
# we're now adding tags to me!
self.stack.append(self)
return self
def __exit__(self, exc_type, exc_value, exc_tb):
# we're done adding tags to me!
self.stack.pop()
def __str__(self):
# turn me and my content into text
join = '\n' if self.newlines else ''
if self.name is None:
return join.join(map(str, self.content))
a = ['%s="%s"'%i for i in self.attrs.items()]
l = [self.name] + a
s = '<%s>%s'%(' '.join(l), join)
if self.content:
s += join.join(map(str, self.content))
s += join + '</%s>'%self.name
return s

110
contrib/usernameCrawler.py Executable file
View File

@ -0,0 +1,110 @@
#!/usr/bin/python
#
# This sample script is a username crawler: it will obtain all known usernames
# from block chain and then try to download avatar and profiles for all of
# them. The report is shown as an html file.
#
# Downloaded data is cached in a python pickle file, so it may be executed
# again and it won't need to get everything all over again (you may run it
# from cron scripts, for example)
import sys, cPickle, time
dbFileName = "usernameCrawler.pickle"
htmlFileName = "userlist.html"
cacheTimeout = 24*3600
try:
from bitcoinrpc.authproxy import AuthServiceProxy
except ImportError as exc:
sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n")
exit(-1)
serverUrl = "http://user:pwd@127.0.0.1:28332"
if len(sys.argv) > 1:
serverUrl = sys.argv[1]
twister = AuthServiceProxy(serverUrl)
class User:
avatar = ""
fullname = ""
location = ""
updateTime = 0
class MyDb:
lastBlockHash = 0
try:
db = cPickle.load(open(dbFileName))
nextHash = db.lastBlockHash
except:
db = MyDb()
db.usernames = {}
nextHash = twister.getblockhash(0)
while True:
block = twister.getblock(nextHash)
db.lastBlockHash = block["hash"]
print str(block["height"]) + "\r",
usernames = block["usernames"]
for u in usernames:
if not db.usernames.has_key(u):
db.usernames[u] = User()
if block.has_key("nextblockhash"):
nextHash = block["nextblockhash"]
else:
break
now = time.time()
for u in db.usernames.keys():
if db.usernames[u].updateTime + cacheTimeout < now:
print "getting avatar for", u, "..."
d = twister.dhtget(u,"avatar","s")
if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
db.usernames[u].avatar = d[0]["p"]["v"]
print "getting profile for", u, "..."
d = twister.dhtget(u,"profile","s")
if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
db.usernames[u].fullname = d[0]["p"]["v"]["fullname"]
db.usernames[u].location = d[0]["p"]["v"]["location"]
db.usernames[u].updateTime = now
cPickle.dump(db,open(dbFileName,"w"))
from HTML import HTML
from cgi import escape
def outputHtmlUserlist(fname, db, keys):
h = HTML()
head = h.head("")
with h.body(""):
with h.table(border='1', newlines=True):
with h.colgroup:
h.col(span="1", style="width: 64px;")
h.col(span="1", style="width: 130px;")
h.col(span="1", style="width: 250px;")
h.col(span="1", style="width: 250px;")
with h.tr:
h.th("avatar")
h.th("username")
h.th("fullname")
h.th("location")
for u in keys:
with h.tr:
with h.td():
h.img('',src=escape(db.usernames[u].avatar), width="64", height="64")
h.td(u)
h.td(escape(db.usernames[u].fullname))
h.td(escape(db.usernames[u].location))
open(fname, "w").write(str(h))
print "Generating", htmlFileName, "..."
keys = db.usernames.keys()
keys.sort() # sorted by username
outputHtmlUserlist(htmlFileName, db, keys)