11 years ago
4 changed files with 488 additions and 0 deletions
@ -0,0 +1,88 @@
@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*- |
from datetime import datetime |
from string import Template |
from os.path import expanduser |
__author__ = 'Giacomo Lacava' |
from twistscraper import TwisterScraper |
with open("map.html", "rb") as mapTemplate: |
TEMPLATE = Template( |
def generate_map(userdb): |
ts = TwisterScraper(userdb) |
loc_users = [u for u in ts.db.users.values() if u.location != ''] |
noLoc_user_num = len(ts.db.users) - len(loc_users) |
loc_users_fake_num = 0 |
locDb = {} |
for u in loc_users: |
if u.location in locDb: |
locDb[u.location]['users'].append(u.username) |
else: |
locData = u.locate() |
if locData is not None: |
locDb[u.location] = {} |
locDb[u.location]['coordinates'] = locData |
locDb[u.location]['users'] = [u.username] |
else: |
loc_users_fake_num += 1 |
# second pass to aggregate misspellings |
done = [] |
newLocDb = {} |
for loc, locDict in locDb.items(): |
# find all elements with same coordinates |
sameCoord = [(l, lObj['users']) for l, lObj in locDb.items() if lObj['coordinates'] == locDict['coordinates']] |
if len(sameCoord) == 1: |
# if only one element, copy it straight to the new dict |
newLocDb[loc] = locDict |
elif len(sameCoord) > 1: |
# if we're here, multiple locations have the same name |
# find the most popular name |
locMax = max(sameCoord, key=lambda x: len(x[1])) |
location = locMax[0] |
coordHash = '/'.join([str(locDict['coordinates']['lat']), str(locDict['coordinates']['lng'])]) |
# if we haven't seen this set of coordinates yet... |
if coordHash not in done: |
# ... collect all users ... |
users = [] |
for l, us in sameCoord: |
for u in us: |
users.append(u) |
users.sort() |
# ... and add the aggregated result |
if location not in newLocDb: |
newLocDb[location] = {} |
newLocDb[location]['users'] = users |
newLocDb[location]['coordinates'] = locDict['coordinates'] |
done.append(coordHash) |
locStrings = [] |
for k in newLocDb.keys(): |
locStrings.append("['<h4>{name} - {numusers}</h4><small>{users}</small>', {lat}, {lng}]".format( |
name=k.replace("'", "'"), |
lat=newLocDb[k]['coordinates']['lat'], |
lng=newLocDb[k]['coordinates']['lng'], |
users=',<br />'.join(newLocDb[k]['users']), |
numusers=len(newLocDb[k]['users']))) |
locStrings.sort() |
return TEMPLATE.substitute(locations=',\n'.join(locStrings), |
users_real_loc=len(loc_users), |
users_fake_loc=loc_users_fake_num, |
users_no_loc=noLoc_user_num, |
|||| |
if __name__ == '__main__': |
html = generate_map(expanduser('~/.twister/_localusersdb')) |
with open(expanduser('~/twistermap.html'), 'wb') as tmf: |
tmf.write(html.encode('utf-8')) |
@ -0,0 +1,111 @@
@@ -0,0 +1,111 @@
<!DOCTYPE html> |
<html> |
<head> |
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/> |
<title>Map of Twister Users</title> |
<script src=""></script> |
<script src=""></script> |
<style> |
body { |
font-family: Helvetica, Verdana, Arial, sans-serif; |
} |
h1, h4 { |
padding-bottom: 0; |
margin-bottom: 0; |
} |
</style> |
</head> |
<body> |
<h1>Map of Twister Users</h1> |
<p> |
<small>(as self-reported in profile)</small> |
</p> |
<div id="map" style="width: 600px; height: 400px;"></div> |
<ul> |
<li>Updated at: $timestamp</li> |
<li>Users with realistic location: $users_real_loc</li> |
<li>Users with unrealistic location: $users_fake_loc</li> |
<li>Users without location: $users_no_loc</li> |
</ul> |
<div id="bottom">For any feedback, ping @toyg on Twister.</div> |
<script type="text/javascript"> |
// Define your locations: HTML content for the info window, latitude, longitude |
var locations = [$locations]; |
// Setup the different icons and shadows |
var iconURLPrefix = ''; |
var icons = [ |
iconURLPrefix + 'red-dot.png', |
iconURLPrefix + 'green-dot.png', |
iconURLPrefix + 'blue-dot.png', |
iconURLPrefix + 'orange-dot.png', |
iconURLPrefix + 'purple-dot.png', |
iconURLPrefix + 'pink-dot.png', |
iconURLPrefix + 'yellow-dot.png' |
] |
var icons_length = icons.length; |
var shadow = { |
anchor: new google.maps.Point(15, 33), |
url: iconURLPrefix + 'msmarker.shadow.png' |
}; |
var map = new google.maps.Map(document.getElementById('map'), { |
zoom: 10, |
center: new google.maps.LatLng(-37.92, 151.25), |
mapTypeId: google.maps.MapTypeId.ROADMAP, |
mapTypeControl: false, |
streetViewControl: false, |
panControl: false, |
zoomControlOptions: { |
position: google.maps.ControlPosition.LEFT_BOTTOM |
} |
}); |
var infowindow = new google.maps.InfoWindow({ |
maxWidth: 160 |
}); |
var marker; |
var markers = new Array(); |
var iconCounter = 0; |
// Add the markers and infowindows to the map |
for (var i = 0; i < locations.length; i++) { |
marker = new google.maps.Marker({ |
position: new google.maps.LatLng(locations[i][1], locations[i][2]), |
map: map, |
icon: icons[iconCounter], |
shadow: shadow |
}); |
markers.push(marker); |
google.maps.event.addListener(marker, 'click', (function (marker, i) { |
return function () { |
infowindow.setContent(locations[i][0]); |
||||, marker); |
} |
})(marker, i)); |
iconCounter++; |
if (iconCounter >= icons_length) { |
iconCounter = 0; |
} |
} |
function AutoCenter() { |
var bounds = new google.maps.LatLngBounds(); |
$$.each(markers, function (index, marker) { |
bounds.extend(marker.position); |
}); |
map.fitBounds(bounds); |
} |
AutoCenter(); |
</script> |
</body> |
</html> |
@ -0,0 +1,100 @@
@@ -0,0 +1,100 @@
# -*- coding: utf-8 -*- |
import pickle |
from operator import attrgetter |
from threading import Thread |
from time import sleep |
from os.path import expanduser |
import feedparser |
from twistscraper import TwisterScraper |
__author__ = 'Giacomo Lacava' |
GITHUB_REPO_URL = '{user}/{repo}' |
CORE_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-core') |
HTML_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-html') |
SEED_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-seeder') |
CORE_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-core') |
HTML_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-html') |
SEED_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-seeder') |
class TwisterMonitor(Thread): |
MESSAGE = 'Twister update: {msg} - Pull it now: {url}' |
def __init__(self, twister_monitor, username, repo_feed=CORE_COMMIT_FEED, repo_url=CORE_REPO_URL): |
Thread.__init__(self) |
self.ts = twister_monitor |
self.cacheFile = expanduser('~/.twister/_twm_cache') |
self.cache = {} |
self.username = username |
self.feed = repo_feed |
self.repo = repo_url |
self.loadCache() |
def loadCache(self): |
try: |
with open(self.cacheFile, 'rb') as f: |
self.cache = pickle.load(f) |
except FileNotFoundError: |
self.cache = {} |
def get_commits(self): |
print("Fetching {0}".format(self.feed)) |
f = feedparser.parse(self.feed) |
if f['bozo'] == 1: |
raise Exception('Bad feed! Status: {status} - Error {err}'.format(status=f.status, err=f.bozo_exception)) |
if self.feed not in self.cache: |
self.cache[self.feed] = [] |
f.entries.sort(key=attrgetter('updated_parsed')) |
for entry in f.entries: |
print("Checking {0}".format( |
if not in self.cache[self.feed]: |
message = TwisterMonitor.MESSAGE.format(msg=entry.title, url=self.repo) |
cut = 1 |
while len(message) >= 140: |
message = TwisterMonitor.MESSAGE.format(msg=(entry.title[:-cut] + '...'), url=self.repo) |
cut += 1 |
print("Checking last post key...") |
key = 1 |
lastpost = self.ts.twister.getposts(1, [{"username": self.username}]) |
if len(lastpost) == 1: |
key = lastpost[0]['userpost']['k'] + 1 |
print("Posting '{0}' with key {1}...".format(message, key)) |
self.ts.twister.newpostmsg(self.username, key, message) |
print("Posted!") |
self.cache[self.feed].append( |
self.saveCache() |
sleep(10 * 60) |
def saveCache(self): |
with open(self.cacheFile, 'wb') as f: |
pickle.dump(self.cache, f) |
def run(self): |
while True: |
try: |
self.get_commits() |
except Exception as e: |
print("Exception following!") |
print(e) |
sleep(60 * 60) # in seconds |
if __name__ == '__main__': |
botID = 'twmonitor' |
ts = TwisterScraper(expanduser('~/.twister/_localusersdb')) |
monitor = TwisterMonitor(ts, botID, CORE_COMMIT_FEED, CORE_REPO_URL) |
monitor.start() |
sleep(4 * 60) |
monitor_ui = TwisterMonitor(ts, botID, HTML_COMMIT_FEED, HTML_REPO_URL) |
monitor_ui.start() |
sleep(6 * 60) |
monitor_seed = TwisterMonitor(ts, botID, SEED_COMMIT_FEED, SEED_REPO_URL) |
monitor_seed.start() |
@ -0,0 +1,189 @@
@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*- |
import json |
from http.client import HTTPException |
from urllib.parse import urlencode |
from urllib.request import urlopen |
from os.path import expanduser |
__author__ = 'Giacomo Lacava' |
import time, datetime |
import pickle |
import sys |
cacheTimeout = 24 * 3600 |
try: |
from bitcoinrpc.authproxy import AuthServiceProxy |
except ImportError as exc: |
sys.stderr.write("Error: install python-bitcoinrpc (\n") |
sys.exit(-1) |
class User: |
username = "" |
avatar = "" |
fullname = "" |
location = "" |
coords = None |
bio = "" |
url = "" |
updateTime = 0 |
following = [] |
_GMAP_URL = "{query}" |
def locate(self): |
""" |
Query Google API and save coordinates. Should work until we start having more than 50 new locatable |
users per hour. |
:return: dict with coordinates { 'lat':12345, 'lng':13245 } |
""" |
if self.location == '': |
return None |
if self.coords is not None: |
return self.coords |
loc = urlencode({'address': self.location}) |
urldoc = urlopen(User._GMAP_URL.format(query=loc)) |
jsObj = json.loads(urldoc.readall().decode('utf-8')) |
if len(jsObj['results']) > 0: |
# discard commercial results |
locTypes = jsObj['results'][0]['address_components'][0]['types'] |
if not 'premise' in locTypes and not 'route' in locTypes and not 'establishment' in locTypes and not 'subpremise' in locTypes: |
self.coords = jsObj['results'][0]['geometry']['location'] |
return self.coords |
# still here? it's all rubbish |
return None |
class TwisterDb: |
def __init__(self): |
self.lastBlockHash = None |
self.users = {} |
class TwisterScraper: |
CACHE_MAX_DURATION = datetime.timedelta(7) # ([days [, seconds [,microseconds]]]) |
def __init__(self, dbPath, server='localhost', port=28332, user='user', password='pwd', protocol='http'): |
self.serverUrl = '{protocol}://{user}:{passwd}@{server}:{port}'.format(protocol=protocol, |
server=server, |
port=port, |
user=user, |
passwd=password) |
self.twister = AuthServiceProxy(self.serverUrl) |
self.dbFile = dbPath |
try: |
with open(self.dbFile, 'rb') as dbFile: |
self.db = pickle.load(dbFile) |
except FileNotFoundError: |
self.db = TwisterDb() |
self.saveDb() |
def get_user(self, username): |
if username in self.db.users: |
return self.db.users[username] |
else: |
return None |
def scrape_users(self): |
nextHash = 0 |
#if self.db.lastBlockHash is not None and len(self.db.users) != 0: |
# nextHash = self.db.lastBlockHash |
#else: |
nextHash = self.twister.getblockhash(0) |
usernames = set() |
index = 0 |
while True: |
block = self.twister.getblock(nextHash) |
self.db.lastBlockHash = block['hash'] |
usernames = usernames.union(set(block['usernames'])) |
if len(usernames) > index: |
index = len(usernames) |
print('Found {0} usernames'.format(index)) |
if "nextblockhash" in block: |
nextHash = block["nextblockhash"] |
else: |
break |
if len(self.db.users) == 0: |
# first run |
for u in usernames: |
blankUser = User() |
blankUser.username = u |
blankUser.updateTime = - self.CACHE_MAX_DURATION |
self.saveDb() |
now = |
old_users = self.db.users.keys() |
need_refresh = [u for u in old_users if (self.db.users[u].updateTime + self.CACHE_MAX_DURATION) < now] |
new_users = usernames.difference(set(old_users)) |
to_fetch = new_users.union(set(need_refresh)) |
total_to_fetch = len(to_fetch) |
for n, u in enumerate(to_fetch): |
try: |
user = self._fetch_user_details(u) |
self.db.users[user.username] = user |
self.saveDb() |
print("({line} of {total}) Fetched {user} ...".format(user=u, line=n, total=total_to_fetch)) |
except HTTPException as e: |
print("Connection error retrieving user {0}: {1}".format(u, str(e))) |
def saveDb(self): |
with open(self.dbFile, 'wb') as dbFile: |
pickle.dump(self.db, dbFile) |
def get_posts_since(self, username, dateObj, maxNum=1000): |
since_epoch = time.mktime(dateObj.timetuple()) |
all_posts = self.twister.getposts(1000, [{'username': username}]) |
all_posts = sorted(all_posts, key=lambda x: x['userpost']['time']) |
index = int(len(all_posts) / 2) |
def _post_time(i): |
return all_posts[i]['userpost']['time'] |
while 0 > index > len(all_posts): |
if _post_time(index - 1) < since_epoch < _post_time(index + 1): |
if _post_time(index) < since_epoch: |
index += 1 |
break |
elif _post_time(index) > since_epoch: |
index = int(index / 2) |
elif _post_time(index) < since_epoch: |
index = int(index + index / 2) |
return all_posts[index:] |
def _fetch_user_details(self, username): |
user = User() |
user.username = username |
avatarData = self.twister.dhtget(username, "avatar", "s") |
if len(avatarData) == 1: |
if 'p' in avatarData[0]: |
if 'v' in avatarData[0]['p']: |
user.avatar = avatarData[0]['p']['v'] |
profileData = self.twister.dhtget(username, 'profile', 's') |
if len(profileData) == 1: |
if 'p' in profileData[0]: |
if 'v' in profileData[0]['p']: |
profile = profileData[0]['p']['v'] |
for key in ['location', 'url', 'bio', 'fullname']: |
if key in profile: |
setattr(user, key, profile[key]) |
user.following = self.twister.getfollowing(username) |
user.updateTime = |
return user |
if __name__ == '__main__': |
ts = TwisterScraper(expanduser('~/.twister/_localusersdb'), 'localhost') |
ts.scrape_users() |
print("Total users in db: {0}".format(len(ts.db.users))) |
Reference in new issue