mirror of
https://github.com/twisterarmy/twister-core.git
synced 2025-01-22 12:34:24 +00:00
This sample script is a username crawler: it will obtain all known usernames
from block chain and then try to download avatar and profiles for all of them. The report is shown as an html file.
This commit is contained in:
parent
fd404d0927
commit
a3046784ac
70
contrib/HTML.py
Normal file
70
contrib/HTML.py
Normal file
@ -0,0 +1,70 @@
|
||||
from cgi import escape
|
||||
class HTML(object):
|
||||
'''Easily generate HTML.
|
||||
|
||||
>>> h = HTML()
|
||||
>>> p = h.p('hello, world!')
|
||||
>>> p.text('more text')
|
||||
>>> with h.table(border='1', newlines=True):
|
||||
... for i in range(2):
|
||||
... with h.tr:
|
||||
... h.td('he<l>lo', a='"foo"')
|
||||
... h.td('there')
|
||||
...
|
||||
>>> print h
|
||||
<p>hello, world!more text</p>
|
||||
<table border="1">
|
||||
<tr><td a=""foo"">he<l>lo</td><td>there</td></tr>
|
||||
<tr><td a=""foo"">he<l>lo</td><td>there</td></tr>
|
||||
</table>
|
||||
|
||||
'''
|
||||
def __init__(self, name=None, stack=None):
|
||||
self.name = name
|
||||
self.content = []
|
||||
self.attrs = {}
|
||||
# insert newlines between content?
|
||||
self.newlines = False
|
||||
if stack is None:
|
||||
stack = [self]
|
||||
self.stack = stack
|
||||
def __getattr__(self, name):
|
||||
# adding a new tag or newline
|
||||
if name == 'newline':
|
||||
e = '\n'
|
||||
else:
|
||||
e = HTML(name, self.stack)
|
||||
self.stack[-1].content.append(e)
|
||||
return e
|
||||
def text(self, text):
|
||||
# adding text
|
||||
self.content.append(escape(text))
|
||||
def __call__(self, *content, **kw):
|
||||
# customising a tag with content or attributes
|
||||
if content:
|
||||
self.content = map(escape, content)
|
||||
if 'newlines' in kw:
|
||||
# special-case to allow control over newlines
|
||||
self.newlines = kw.pop('newlines')
|
||||
for k in kw:
|
||||
self.attrs[k] = escape(kw[k]).replace('"', '"')
|
||||
return self
|
||||
def __enter__(self):
|
||||
# we're now adding tags to me!
|
||||
self.stack.append(self)
|
||||
return self
|
||||
def __exit__(self, exc_type, exc_value, exc_tb):
|
||||
# we're done adding tags to me!
|
||||
self.stack.pop()
|
||||
def __str__(self):
|
||||
# turn me and my content into text
|
||||
join = '\n' if self.newlines else ''
|
||||
if self.name is None:
|
||||
return join.join(map(str, self.content))
|
||||
a = ['%s="%s"'%i for i in self.attrs.items()]
|
||||
l = [self.name] + a
|
||||
s = '<%s>%s'%(' '.join(l), join)
|
||||
if self.content:
|
||||
s += join.join(map(str, self.content))
|
||||
s += join + '</%s>'%self.name
|
||||
return s
|
110
contrib/usernameCrawler.py
Executable file
110
contrib/usernameCrawler.py
Executable file
@ -0,0 +1,110 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# This sample script is a username crawler: it will obtain all known usernames
|
||||
# from block chain and then try to download avatar and profiles for all of
|
||||
# them. The report is shown as an html file.
|
||||
#
|
||||
# Downloaded data is cached in a python pickle file, so it may be executed
|
||||
# again and it won't need to get everything all over again (you may run it
|
||||
# from cron scripts, for example)
|
||||
|
||||
import sys, cPickle, time
|
||||
|
||||
dbFileName = "usernameCrawler.pickle"
|
||||
htmlFileName = "userlist.html"
|
||||
cacheTimeout = 24*3600
|
||||
|
||||
try:
|
||||
from bitcoinrpc.authproxy import AuthServiceProxy
|
||||
except ImportError as exc:
|
||||
sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n")
|
||||
exit(-1)
|
||||
|
||||
serverUrl = "http://user:pwd@127.0.0.1:28332"
|
||||
if len(sys.argv) > 1:
|
||||
serverUrl = sys.argv[1]
|
||||
|
||||
twister = AuthServiceProxy(serverUrl)
|
||||
|
||||
class User:
|
||||
avatar = ""
|
||||
fullname = ""
|
||||
location = ""
|
||||
updateTime = 0
|
||||
|
||||
class MyDb:
|
||||
lastBlockHash = 0
|
||||
|
||||
try:
|
||||
db = cPickle.load(open(dbFileName))
|
||||
nextHash = db.lastBlockHash
|
||||
except:
|
||||
db = MyDb()
|
||||
db.usernames = {}
|
||||
nextHash = twister.getblockhash(0)
|
||||
|
||||
while True:
|
||||
block = twister.getblock(nextHash)
|
||||
db.lastBlockHash = block["hash"]
|
||||
print str(block["height"]) + "\r",
|
||||
usernames = block["usernames"]
|
||||
for u in usernames:
|
||||
if not db.usernames.has_key(u):
|
||||
db.usernames[u] = User()
|
||||
if block.has_key("nextblockhash"):
|
||||
nextHash = block["nextblockhash"]
|
||||
else:
|
||||
break
|
||||
|
||||
now = time.time()
|
||||
for u in db.usernames.keys():
|
||||
if db.usernames[u].updateTime + cacheTimeout < now:
|
||||
|
||||
print "getting avatar for", u, "..."
|
||||
d = twister.dhtget(u,"avatar","s")
|
||||
if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
|
||||
db.usernames[u].avatar = d[0]["p"]["v"]
|
||||
|
||||
print "getting profile for", u, "..."
|
||||
d = twister.dhtget(u,"profile","s")
|
||||
if len(d) == 1 and d[0].has_key("p") and d[0]["p"].has_key("v"):
|
||||
db.usernames[u].fullname = d[0]["p"]["v"]["fullname"]
|
||||
db.usernames[u].location = d[0]["p"]["v"]["location"]
|
||||
|
||||
db.usernames[u].updateTime = now
|
||||
|
||||
cPickle.dump(db,open(dbFileName,"w"))
|
||||
|
||||
|
||||
from HTML import HTML
|
||||
from cgi import escape
|
||||
def outputHtmlUserlist(fname, db, keys):
|
||||
h = HTML()
|
||||
head = h.head("")
|
||||
with h.body(""):
|
||||
with h.table(border='1', newlines=True):
|
||||
with h.colgroup:
|
||||
h.col(span="1", style="width: 64px;")
|
||||
h.col(span="1", style="width: 130px;")
|
||||
h.col(span="1", style="width: 250px;")
|
||||
h.col(span="1", style="width: 250px;")
|
||||
with h.tr:
|
||||
h.th("avatar")
|
||||
h.th("username")
|
||||
h.th("fullname")
|
||||
h.th("location")
|
||||
for u in keys:
|
||||
with h.tr:
|
||||
with h.td():
|
||||
h.img('',src=escape(db.usernames[u].avatar), width="64", height="64")
|
||||
h.td(u)
|
||||
h.td(escape(db.usernames[u].fullname))
|
||||
h.td(escape(db.usernames[u].location))
|
||||
open(fname, "w").write(str(h))
|
||||
|
||||
print "Generating", htmlFileName, "..."
|
||||
|
||||
keys = db.usernames.keys()
|
||||
keys.sort() # sorted by username
|
||||
outputHtmlUserlist(htmlFileName, db, keys)
|
||||
|
Loading…
x
Reference in New Issue
Block a user