This sample script is a username crawler: it will obtain all known usernames

from block chain and then try to download avatar and profiles for all of them. The report is shown as an html file.
2025-01-22 12:34:24 +00:00 · 2013-12-10 18:34:08 -02:00 · 2013-12-10 18:34:08 -02:00 · a3046784ac
commit a3046784ac
parent fd404d0927
2 changed files with 180 additions and 0 deletions
--- a/contrib/HTML.py
+++ b/contrib/HTML.py
@ -0,0 +1,70 @@
+from cgi import escape
+class HTML(object):
+    '''Easily generate HTML.
+
+        >>> h = HTML()
+        >>> p = h.p('hello, world!')
+        >>> p.text('more text')
+        >>> with h.table(border='1', newlines=True):
+        ...     for i in range(2):
+        ...         with h.tr:
+        ...             h.td('he<l>lo', a='"foo"')
+        ...             h.td('there')
+        ... 
+        >>> print h
+        <p>hello, world!more text</p>
+        <table border="1">
+        <tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
+        <tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
+        </table>
+
+    '''
+    def __init__(self, name=None, stack=None):
+        self.name = name
+        self.content = []
+        self.attrs = {}
+        # insert newlines between content?
+        self.newlines = False
+        if stack is None:
+            stack = [self]
+        self.stack = stack
+    def __getattr__(self, name):
+        # adding a new tag or newline
+        if name == 'newline':
+            e = '\n'
+        else:
+            e = HTML(name, self.stack)
+        self.stack[-1].content.append(e)
+        return e
+    def text(self, text):
+        # adding text
+        self.content.append(escape(text))
+    def __call__(self, *content, **kw):
+        # customising a tag with content or attributes
+        if content:
+            self.content = map(escape, content)
+        if 'newlines' in kw:
+            # special-case to allow control over newlines
+            self.newlines = kw.pop('newlines')
+        for k in kw:
+            self.attrs[k] = escape(kw[k]).replace('"', '"')
+        return self
+    def __enter__(self):
+        # we're now adding tags to me!
+        self.stack.append(self)
+        return self
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        # we're done adding tags to me!
+        self.stack.pop()
+    def __str__(self):
+        # turn me and my content into text
+        join = '\n' if self.newlines else ''
+        if self.name is None:
+            return join.join(map(str, self.content))
+        a = ['%s="%s"'%i for i in self.attrs.items()]
+        l = [self.name] + a
+        s = '<%s>%s'%(' '.join(l), join)
+        if self.content:
+            s += join.join(map(str, self.content))
+            s += join + '</%s>'%self.name
+        return s
--- a/contrib/usernameCrawler.py
+++ b/contrib/usernameCrawler.py
@ -0,0 +1,110 @@
+#!/usr/bin/python
+#
+# This sample script is a username crawler: it will obtain all known usernames
+# from block chain and then try to download avatar and profiles for all of
+# them. The report is shown as an html file.
+#
+# Downloaded data is cached in a python pickle file, so it may be executed
+# again and it won't need to get everything all over again (you may run it
+# from cron scripts, for example)
+
+import sys, cPickle, time
+
+dbFileName = "usernameCrawler.pickle"
+htmlFileName = "userlist.html"
+cacheTimeout = 24*3600
+
+try:
+    from bitcoinrpc.authproxy import AuthServiceProxy
+except ImportError as exc:
+    sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n")
+    exit(-1)
+
+serverUrl = "http://user:pwd@127.0.0.1:28332"
+if len(sys.argv) > 1:
+    serverUrl = sys.argv[1]
+
+twister = AuthServiceProxy(serverUrl)
+
+class User:
+    avatar = ""
+    fullname = ""
+    location = ""
+    updateTime = 0
+
+class MyDb:
+    lastBlockHash = 0
+
+try:
+    db = cPickle.load(open(dbFileName))
+    nextHash = db.lastBlockHash
+except:
+    db = MyDb()
+    db.usernames = {}
+    nextHash = twister.getblockhash(0)
+
+while True:
+    block = twister.getblock(nextHash)
+    db.lastBlockHash = block["hash"]
+    print str(block["height"]) + "\r",
+    usernames = block["usernames"]
+    for u in usernames:
+        if not db.usernames.has_key(u):
+            db.usernames[u] = User()
+    if block.has_key("nextblockhash"):
+        nextHash = block["nextblockhash"]
+    else:
+        break
+
+now = time.time()
+for u in db.usernames.keys():
+    if db.usernames[u].updateTime + cacheTimeout < now:
+
+        print "getting avatar for", u, "..."
+        d = twister.dhtget(u,"avatar","s")
+        if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
+            db.usernames[u].avatar = d[0]["p"]["v"]
+
+        print "getting profile for", u, "..."
+        d = twister.dhtget(u,"profile","s")
+        if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
+            db.usernames[u].fullname = d[0]["p"]["v"]["fullname"]
+            db.usernames[u].location = d[0]["p"]["v"]["location"]
+
+        db.usernames[u].updateTime = now
+
+cPickle.dump(db,open(dbFileName,"w"))
+
+
+from HTML import HTML
+from cgi import escape
+def outputHtmlUserlist(fname, db, keys):
+    h = HTML()
+    head = h.head("")
+    with h.body(""):
+        with h.table(border='1', newlines=True):
+            with h.colgroup:
+                h.col(span="1", style="width: 64px;")
+                h.col(span="1", style="width: 130px;")
+                h.col(span="1", style="width: 250px;")
+                h.col(span="1", style="width: 250px;")
+            with h.tr:
+                h.th("avatar")
+                h.th("username")
+                h.th("fullname")
+                h.th("location")
+            for u in keys:
+                with h.tr:
+                    with h.td():
+                        h.img('',src=escape(db.usernames[u].avatar), width="64", height="64")
+                    h.td(u)
+                    h.td(escape(db.usernames[u].fullname))
+                    h.td(escape(db.usernames[u].location))
+    open(fname, "w").write(str(h))
+
+print "Generating", htmlFileName, "..."
+
+keys = db.usernames.keys()
+keys.sort() # sorted by username
+outputHtmlUserlist(htmlFileName, db, keys)
+