Browse Source

This sample script is a username crawler: it will obtain all known usernames

from block chain and then try to download avatar and profiles for all of
them. The report is shown as an html file.
miguelfreitas
Miguel Freitas 11 years ago
parent
commit
a3046784ac
  1. 70
      contrib/HTML.py
  2. 110
      contrib/usernameCrawler.py

70
contrib/HTML.py

@ -0,0 +1,70 @@ @@ -0,0 +1,70 @@
from cgi import escape
class HTML(object):
'''Easily generate HTML.
>>> h = HTML()
>>> p = h.p('hello, world!')
>>> p.text('more text')
>>> with h.table(border='1', newlines=True):
... for i in range(2):
... with h.tr:
... h.td('he<l>lo', a='"foo"')
... h.td('there')
...
>>> print h
<p>hello, world!more text</p>
<table border="1">
<tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
<tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
</table>
'''
def __init__(self, name=None, stack=None):
self.name = name
self.content = []
self.attrs = {}
# insert newlines between content?
self.newlines = False
if stack is None:
stack = [self]
self.stack = stack
def __getattr__(self, name):
# adding a new tag or newline
if name == 'newline':
e = '\n'
else:
e = HTML(name, self.stack)
self.stack[-1].content.append(e)
return e
def text(self, text):
# adding text
self.content.append(escape(text))
def __call__(self, *content, **kw):
# customising a tag with content or attributes
if content:
self.content = map(escape, content)
if 'newlines' in kw:
# special-case to allow control over newlines
self.newlines = kw.pop('newlines')
for k in kw:
self.attrs[k] = escape(kw[k]).replace('"', '"')
return self
def __enter__(self):
# we're now adding tags to me!
self.stack.append(self)
return self
def __exit__(self, exc_type, exc_value, exc_tb):
# we're done adding tags to me!
self.stack.pop()
def __str__(self):
# turn me and my content into text
join = '\n' if self.newlines else ''
if self.name is None:
return join.join(map(str, self.content))
a = ['%s="%s"'%i for i in self.attrs.items()]
l = [self.name] + a
s = '<%s>%s'%(' '.join(l), join)
if self.content:
s += join.join(map(str, self.content))
s += join + '</%s>'%self.name
return s

110
contrib/usernameCrawler.py

@ -0,0 +1,110 @@ @@ -0,0 +1,110 @@
#!/usr/bin/python
#
# This sample script is a username crawler: it will obtain all known usernames
# from block chain and then try to download avatar and profiles for all of
# them. The report is shown as an html file.
#
# Downloaded data is cached in a python pickle file, so it may be executed
# again and it won't need to get everything all over again (you may run it
# from cron scripts, for example)
import sys, cPickle, time
dbFileName = "usernameCrawler.pickle"
htmlFileName = "userlist.html"
cacheTimeout = 24*3600
try:
from bitcoinrpc.authproxy import AuthServiceProxy
except ImportError as exc:
sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n")
exit(-1)
serverUrl = "http://user:pwd@127.0.0.1:28332"
if len(sys.argv) > 1:
serverUrl = sys.argv[1]
twister = AuthServiceProxy(serverUrl)
class User:
avatar = ""
fullname = ""
location = ""
updateTime = 0
class MyDb:
lastBlockHash = 0
try:
db = cPickle.load(open(dbFileName))
nextHash = db.lastBlockHash
except:
db = MyDb()
db.usernames = {}
nextHash = twister.getblockhash(0)
while True:
block = twister.getblock(nextHash)
db.lastBlockHash = block["hash"]
print str(block["height"]) + "\r",
usernames = block["usernames"]
for u in usernames:
if not db.usernames.has_key(u):
db.usernames[u] = User()
if block.has_key("nextblockhash"):
nextHash = block["nextblockhash"]
else:
break
now = time.time()
for u in db.usernames.keys():
if db.usernames[u].updateTime + cacheTimeout < now:
print "getting avatar for", u, "..."
d = twister.dhtget(u,"avatar","s")
if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
db.usernames[u].avatar = d[0]["p"]["v"]
print "getting profile for", u, "..."
d = twister.dhtget(u,"profile","s")
if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
db.usernames[u].fullname = d[0]["p"]["v"]["fullname"]
db.usernames[u].location = d[0]["p"]["v"]["location"]
db.usernames[u].updateTime = now
cPickle.dump(db,open(dbFileName,"w"))
from HTML import HTML
from cgi import escape
def outputHtmlUserlist(fname, db, keys):
h = HTML()
head = h.head("")
with h.body(""):
with h.table(border='1', newlines=True):
with h.colgroup:
h.col(span="1", style="width: 64px;")
h.col(span="1", style="width: 130px;")
h.col(span="1", style="width: 250px;")
h.col(span="1", style="width: 250px;")
with h.tr:
h.th("avatar")
h.th("username")
h.th("fullname")
h.th("location")
for u in keys:
with h.tr:
with h.td():
h.img('',src=escape(db.usernames[u].avatar), width="64", height="64")
h.td(u)
h.td(escape(db.usernames[u].fullname))
h.td(escape(db.usernames[u].location))
open(fname, "w").write(str(h))
print "Generating", htmlFileName, "..."
keys = db.usernames.keys()
keys.sort() # sorted by username
outputHtmlUserlist(htmlFileName, db, keys)
Loading…
Cancel
Save