This sample script is a username crawler: it will obtain all known usernames

from block chain and then try to download avatar and profiles for all of them. The report is shown as an html file.
11 years ago · a3046784ac
2 changed files with 180 additions and 0 deletions
--- a/contrib/HTML.py
+++ b/contrib/HTML.py
@ -0,0 +1,70 @@
				@@ -0,0 +1,70 @@
+from cgi import escape
+class HTML(object):
+    '''Easily generate HTML.
+
+        >>> h = HTML()
+        >>> p = h.p('hello, world!')
+        >>> p.text('more text')
+        >>> with h.table(border='1', newlines=True):
+        ...     for i in range(2):
+        ...         with h.tr:
+        ...             h.td('he<l>lo', a='"foo"')
+        ...             h.td('there')
+        ... 
+        >>> print h
+        <p>hello, world!more text</p>
+        <table border="1">
+        <tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
+        <tr><td a="&quot;foo&quot;">he&lt;l&gt;lo</td><td>there</td></tr>
+        </table>
+
+    '''
+    def __init__(self, name=None, stack=None):
+        self.name = name
+        self.content = []
+        self.attrs = {}
+        # insert newlines between content?
+        self.newlines = False
+        if stack is None:
+            stack = [self]
+        self.stack = stack
+    def __getattr__(self, name):
+        # adding a new tag or newline
+        if name == 'newline':
+            e = '\n'
+        else:
+            e = HTML(name, self.stack)
+        self.stack[-1].content.append(e)
+        return e
+    def text(self, text):
+        # adding text
+        self.content.append(escape(text))
+    def __call__(self, *content, **kw):
+        # customising a tag with content or attributes
+        if content:
+            self.content = map(escape, content)
+        if 'newlines' in kw:
+            # special-case to allow control over newlines
+            self.newlines = kw.pop('newlines')
+        for k in kw:
+            self.attrs[k] = escape(kw[k]).replace('"', '"')
+        return self
+    def __enter__(self):
+        # we're now adding tags to me!
+        self.stack.append(self)
+        return self
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        # we're done adding tags to me!
+        self.stack.pop()
+    def __str__(self):
+        # turn me and my content into text
+        join = '\n' if self.newlines else ''
+        if self.name is None:
+            return join.join(map(str, self.content))
+        a = ['%s="%s"'%i for i in self.attrs.items()]
+        l = [self.name] + a
+        s = '<%s>%s'%(' '.join(l), join)
+        if self.content:
+            s += join.join(map(str, self.content))
+            s += join + '</%s>'%self.name
+        return s
--- a/contrib/usernameCrawler.py
+++ b/contrib/usernameCrawler.py
@ -0,0 +1,110 @@
				@@ -0,0 +1,110 @@
+#!/usr/bin/python
+#
+# This sample script is a username crawler: it will obtain all known usernames
+# from block chain and then try to download avatar and profiles for all of
+# them. The report is shown as an html file.
+#
+# Downloaded data is cached in a python pickle file, so it may be executed
+# again and it won't need to get everything all over again (you may run it
+# from cron scripts, for example)
+
+import sys, cPickle, time
+
+dbFileName = "usernameCrawler.pickle"
+htmlFileName = "userlist.html"
+cacheTimeout = 24*3600
+
+try:
+    from bitcoinrpc.authproxy import AuthServiceProxy
+except ImportError as exc:
+    sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n")
+    exit(-1)
+
+serverUrl = "http://user:pwd@127.0.0.1:28332"
+if len(sys.argv) > 1:
+    serverUrl = sys.argv[1]
+
+twister = AuthServiceProxy(serverUrl)
+
+class User:
+    avatar = ""
+    fullname = ""
+    location = ""
+    updateTime = 0
+
+class MyDb:
+    lastBlockHash = 0
+
+try:
+    db = cPickle.load(open(dbFileName))
+    nextHash = db.lastBlockHash
+except:
+    db = MyDb()
+    db.usernames = {}
+    nextHash = twister.getblockhash(0)
+
+while True:
+    block = twister.getblock(nextHash)
+    db.lastBlockHash = block["hash"]
+    print str(block["height"]) + "\r",
+    usernames = block["usernames"]
+    for u in usernames:
+        if not db.usernames.has_key(u):
+            db.usernames[u] = User()
+    if block.has_key("nextblockhash"):
+        nextHash = block["nextblockhash"]
+    else:
+        break
+
+now = time.time()
+for u in db.usernames.keys():
+    if db.usernames[u].updateTime + cacheTimeout < now:
+
+        print "getting avatar for", u, "..."
+        d = twister.dhtget(u,"avatar","s")
+        if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
+            db.usernames[u].avatar = d[0]["p"]["v"]
+
+        print "getting profile for", u, "..."
+        d = twister.dhtget(u,"profile","s")
+        if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
+            db.usernames[u].fullname = d[0]["p"]["v"]["fullname"]
+            db.usernames[u].location = d[0]["p"]["v"]["location"]
+
+        db.usernames[u].updateTime = now
+
+cPickle.dump(db,open(dbFileName,"w"))
+
+
+from HTML import HTML
+from cgi import escape
+def outputHtmlUserlist(fname, db, keys):
+    h = HTML()
+    head = h.head("")
+    with h.body(""):
+        with h.table(border='1', newlines=True):
+            with h.colgroup:
+                h.col(span="1", style="width: 64px;")
+                h.col(span="1", style="width: 130px;")
+                h.col(span="1", style="width: 250px;")
+                h.col(span="1", style="width: 250px;")
+            with h.tr:
+                h.th("avatar")
+                h.th("username")
+                h.th("fullname")
+                h.th("location")
+            for u in keys:
+                with h.tr:
+                    with h.td():
+                        h.img('',src=escape(db.usernames[u].avatar), width="64", height="64")
+                    h.td(u)
+                    h.td(escape(db.usernames[u].fullname))
+                    h.td(escape(db.usernames[u].location))
+    open(fname, "w").write(str(h))
+
+print "Generating", htmlFileName, "..."
+
+keys = db.usernames.keys()
+keys.sort() # sorted by username
+outputHtmlUserlist(htmlFileName, db, keys)
+