toyg
11 years ago
4 changed files with 488 additions and 0 deletions
@ -0,0 +1,88 @@
@@ -0,0 +1,88 @@
|
||||
# -*- coding: utf-8 -*- |
||||
from datetime import datetime |
||||
from string import Template |
||||
|
||||
from os.path import expanduser |
||||
|
||||
|
||||
__author__ = 'Giacomo Lacava' |
||||
|
||||
from twistscraper import TwisterScraper |
||||
|
||||
TEMPLATE = None |
||||
with open("map.html", "rb") as mapTemplate: |
||||
TEMPLATE = Template(mapTemplate.read()) |
||||
|
||||
|
||||
def generate_map(userdb): |
||||
ts = TwisterScraper(userdb) |
||||
loc_users = [u for u in ts.db.users.values() if u.location != ''] |
||||
noLoc_user_num = len(ts.db.users) - len(loc_users) |
||||
loc_users_fake_num = 0 |
||||
locDb = {} |
||||
|
||||
for u in loc_users: |
||||
if u.location in locDb: |
||||
locDb[u.location]['users'].append(u.username) |
||||
else: |
||||
locData = u.locate() |
||||
if locData is not None: |
||||
locDb[u.location] = {} |
||||
locDb[u.location]['coordinates'] = locData |
||||
locDb[u.location]['users'] = [u.username] |
||||
else: |
||||
loc_users_fake_num += 1 |
||||
# second pass to aggregate misspellings |
||||
done = [] |
||||
newLocDb = {} |
||||
for loc, locDict in locDb.items(): |
||||
# find all elements with same coordinates |
||||
sameCoord = [(l, lObj['users']) for l, lObj in locDb.items() if lObj['coordinates'] == locDict['coordinates']] |
||||
if len(sameCoord) == 1: |
||||
# if only one element, copy it straight to the new dict |
||||
newLocDb[loc] = locDict |
||||
|
||||
elif len(sameCoord) > 1: |
||||
# if we're here, multiple locations have the same name |
||||
|
||||
# find the most popular name |
||||
locMax = max(sameCoord, key=lambda x: len(x[1])) |
||||
location = locMax[0] |
||||
coordHash = '/'.join([str(locDict['coordinates']['lat']), str(locDict['coordinates']['lng'])]) |
||||
# if we haven't seen this set of coordinates yet... |
||||
if coordHash not in done: |
||||
|
||||
# ... collect all users ... |
||||
users = [] |
||||
for l, us in sameCoord: |
||||
for u in us: |
||||
users.append(u) |
||||
users.sort() |
||||
|
||||
# ... and add the aggregated result |
||||
if location not in newLocDb: |
||||
newLocDb[location] = {} |
||||
newLocDb[location]['users'] = users |
||||
newLocDb[location]['coordinates'] = locDict['coordinates'] |
||||
done.append(coordHash) |
||||
|
||||
locStrings = [] |
||||
for k in newLocDb.keys(): |
||||
locStrings.append("['<h4>{name} - {numusers}</h4><small>{users}</small>', {lat}, {lng}]".format( |
||||
name=k.replace("'", "'"), |
||||
lat=newLocDb[k]['coordinates']['lat'], |
||||
lng=newLocDb[k]['coordinates']['lng'], |
||||
users=',<br />'.join(newLocDb[k]['users']), |
||||
numusers=len(newLocDb[k]['users']))) |
||||
locStrings.sort() |
||||
return TEMPLATE.substitute(locations=',\n'.join(locStrings), |
||||
users_real_loc=len(loc_users), |
||||
users_fake_loc=loc_users_fake_num, |
||||
users_no_loc=noLoc_user_num, |
||||
timestamp=datetime.now().isoformat()) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
html = generate_map(expanduser('~/.twister/_localusersdb')) |
||||
with open(expanduser('~/twistermap.html'), 'wb') as tmf: |
||||
tmf.write(html.encode('utf-8')) |
@ -0,0 +1,111 @@
@@ -0,0 +1,111 @@
|
||||
<!DOCTYPE html> |
||||
<html> |
||||
<head> |
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/> |
||||
<title>Map of Twister Users</title> |
||||
<script src="http://maps.google.com/maps/api/js?sensor=false"></script> |
||||
<script src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-1.10.1.min.js"></script> |
||||
<style> |
||||
|
||||
body { |
||||
font-family: Helvetica, Verdana, Arial, sans-serif; |
||||
} |
||||
|
||||
h1, h4 { |
||||
padding-bottom: 0; |
||||
margin-bottom: 0; |
||||
} |
||||
</style> |
||||
</head> |
||||
<body> |
||||
<h1>Map of Twister Users</h1> |
||||
|
||||
<p> |
||||
<small>(as self-reported in profile)</small> |
||||
</p> |
||||
<div id="map" style="width: 600px; height: 400px;"></div> |
||||
<ul> |
||||
<li>Updated at: $timestamp</li> |
||||
<li>Users with realistic location: $users_real_loc</li> |
||||
<li>Users with unrealistic location: $users_fake_loc</li> |
||||
<li>Users without location: $users_no_loc</li> |
||||
</ul> |
||||
<div id="bottom">For any feedback, ping @toyg on Twister.</div> |
||||
<script type="text/javascript"> |
||||
// Define your locations: HTML content for the info window, latitude, longitude |
||||
var locations = [$locations]; |
||||
|
||||
// Setup the different icons and shadows |
||||
var iconURLPrefix = 'http://maps.google.com/mapfiles/ms/icons/'; |
||||
var icons = [ |
||||
iconURLPrefix + 'red-dot.png', |
||||
iconURLPrefix + 'green-dot.png', |
||||
iconURLPrefix + 'blue-dot.png', |
||||
iconURLPrefix + 'orange-dot.png', |
||||
iconURLPrefix + 'purple-dot.png', |
||||
iconURLPrefix + 'pink-dot.png', |
||||
iconURLPrefix + 'yellow-dot.png' |
||||
] |
||||
var icons_length = icons.length; |
||||
|
||||
var shadow = { |
||||
anchor: new google.maps.Point(15, 33), |
||||
url: iconURLPrefix + 'msmarker.shadow.png' |
||||
}; |
||||
|
||||
var map = new google.maps.Map(document.getElementById('map'), { |
||||
zoom: 10, |
||||
center: new google.maps.LatLng(-37.92, 151.25), |
||||
mapTypeId: google.maps.MapTypeId.ROADMAP, |
||||
mapTypeControl: false, |
||||
streetViewControl: false, |
||||
panControl: false, |
||||
zoomControlOptions: { |
||||
position: google.maps.ControlPosition.LEFT_BOTTOM |
||||
} |
||||
}); |
||||
|
||||
var infowindow = new google.maps.InfoWindow({ |
||||
maxWidth: 160 |
||||
}); |
||||
|
||||
var marker; |
||||
var markers = new Array(); |
||||
|
||||
var iconCounter = 0; |
||||
|
||||
// Add the markers and infowindows to the map |
||||
for (var i = 0; i < locations.length; i++) { |
||||
marker = new google.maps.Marker({ |
||||
position: new google.maps.LatLng(locations[i][1], locations[i][2]), |
||||
map: map, |
||||
icon: icons[iconCounter], |
||||
shadow: shadow |
||||
}); |
||||
|
||||
markers.push(marker); |
||||
|
||||
google.maps.event.addListener(marker, 'click', (function (marker, i) { |
||||
return function () { |
||||
infowindow.setContent(locations[i][0]); |
||||
infowindow.open(map, marker); |
||||
} |
||||
})(marker, i)); |
||||
|
||||
iconCounter++; |
||||
if (iconCounter >= icons_length) { |
||||
iconCounter = 0; |
||||
} |
||||
} |
||||
|
||||
function AutoCenter() { |
||||
var bounds = new google.maps.LatLngBounds(); |
||||
$$.each(markers, function (index, marker) { |
||||
bounds.extend(marker.position); |
||||
}); |
||||
map.fitBounds(bounds); |
||||
} |
||||
AutoCenter(); |
||||
</script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,100 @@
@@ -0,0 +1,100 @@
|
||||
# -*- coding: utf-8 -*- |
||||
import pickle |
||||
from operator import attrgetter |
||||
from threading import Thread |
||||
from time import sleep |
||||
from os.path import expanduser |
||||
|
||||
import feedparser |
||||
|
||||
from twistscraper import TwisterScraper |
||||
|
||||
__author__ = 'Giacomo Lacava' |
||||
|
||||
GITHUB_REPO_URL = 'https://github.com/{user}/{repo}' |
||||
GITHUB_COMMIT_FEED_TEMPLATE = GITHUB_REPO_URL + '/commits/master.atom' |
||||
|
||||
CORE_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-core') |
||||
HTML_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-html') |
||||
SEED_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-seeder') |
||||
CORE_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-core') |
||||
HTML_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-html') |
||||
SEED_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-seeder') |
||||
|
||||
|
||||
class TwisterMonitor(Thread): |
||||
MESSAGE = 'Twister update: {msg} - Pull it now: {url}' |
||||
|
||||
def __init__(self, twister_monitor, username, repo_feed=CORE_COMMIT_FEED, repo_url=CORE_REPO_URL): |
||||
Thread.__init__(self) |
||||
self.ts = twister_monitor |
||||
self.cacheFile = expanduser('~/.twister/_twm_cache') |
||||
self.cache = {} |
||||
self.username = username |
||||
self.feed = repo_feed |
||||
self.repo = repo_url |
||||
self.loadCache() |
||||
|
||||
def loadCache(self): |
||||
try: |
||||
with open(self.cacheFile, 'rb') as f: |
||||
self.cache = pickle.load(f) |
||||
except FileNotFoundError: |
||||
self.cache = {} |
||||
|
||||
def get_commits(self): |
||||
print("Fetching {0}".format(self.feed)) |
||||
f = feedparser.parse(self.feed) |
||||
if f['bozo'] == 1: |
||||
raise Exception('Bad feed! Status: {status} - Error {err}'.format(status=f.status, err=f.bozo_exception)) |
||||
|
||||
if self.feed not in self.cache: |
||||
self.cache[self.feed] = [] |
||||
|
||||
f.entries.sort(key=attrgetter('updated_parsed')) |
||||
for entry in f.entries: |
||||
print("Checking {0}".format(entry.id)) |
||||
if entry.id not in self.cache[self.feed]: |
||||
message = TwisterMonitor.MESSAGE.format(msg=entry.title, url=self.repo) |
||||
cut = 1 |
||||
while len(message) >= 140: |
||||
message = TwisterMonitor.MESSAGE.format(msg=(entry.title[:-cut] + '...'), url=self.repo) |
||||
cut += 1 |
||||
|
||||
print("Checking last post key...") |
||||
key = 1 |
||||
lastpost = self.ts.twister.getposts(1, [{"username": self.username}]) |
||||
if len(lastpost) == 1: |
||||
key = lastpost[0]['userpost']['k'] + 1 |
||||
print("Posting '{0}' with key {1}...".format(message, key)) |
||||
self.ts.twister.newpostmsg(self.username, key, message) |
||||
print("Posted!") |
||||
self.cache[self.feed].append(entry.id) |
||||
self.saveCache() |
||||
sleep(10 * 60) |
||||
|
||||
def saveCache(self): |
||||
with open(self.cacheFile, 'wb') as f: |
||||
pickle.dump(self.cache, f) |
||||
|
||||
def run(self): |
||||
while True: |
||||
try: |
||||
self.get_commits() |
||||
except Exception as e: |
||||
print("Exception following!") |
||||
print(e) |
||||
sleep(60 * 60) # in seconds |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
botID = 'twmonitor' |
||||
ts = TwisterScraper(expanduser('~/.twister/_localusersdb')) |
||||
monitor = TwisterMonitor(ts, botID, CORE_COMMIT_FEED, CORE_REPO_URL) |
||||
monitor.start() |
||||
sleep(4 * 60) |
||||
monitor_ui = TwisterMonitor(ts, botID, HTML_COMMIT_FEED, HTML_REPO_URL) |
||||
monitor_ui.start() |
||||
sleep(6 * 60) |
||||
monitor_seed = TwisterMonitor(ts, botID, SEED_COMMIT_FEED, SEED_REPO_URL) |
||||
monitor_seed.start() |
@ -0,0 +1,189 @@
@@ -0,0 +1,189 @@
|
||||
# -*- coding: utf-8 -*- |
||||
import json |
||||
from http.client import HTTPException |
||||
from urllib.parse import urlencode |
||||
from urllib.request import urlopen |
||||
from os.path import expanduser |
||||
|
||||
__author__ = 'Giacomo Lacava' |
||||
|
||||
import time, datetime |
||||
import pickle |
||||
import sys |
||||
|
||||
cacheTimeout = 24 * 3600 |
||||
|
||||
try: |
||||
from bitcoinrpc.authproxy import AuthServiceProxy |
||||
except ImportError as exc: |
||||
sys.stderr.write("Error: install python-bitcoinrpc (https://github.com/jgarzik/python-bitcoinrpc)\n") |
||||
sys.exit(-1) |
||||
|
||||
|
||||
class User: |
||||
username = "" |
||||
avatar = "" |
||||
fullname = "" |
||||
location = "" |
||||
coords = None |
||||
bio = "" |
||||
url = "" |
||||
updateTime = 0 |
||||
following = [] |
||||
|
||||
_GMAP_URL = "https://maps.googleapis.com/maps/api/geocode/json?sensor=false&{query}" |
||||
|
||||
def locate(self): |
||||
""" |
||||
Query Google API and save coordinates. Should work until we start having more than 50 new locatable |
||||
users per hour. |
||||
:return: dict with coordinates { 'lat':12345, 'lng':13245 } |
||||
""" |
||||
if self.location == '': |
||||
return None |
||||
if self.coords is not None: |
||||
return self.coords |
||||
|
||||
loc = urlencode({'address': self.location}) |
||||
urldoc = urlopen(User._GMAP_URL.format(query=loc)) |
||||
jsObj = json.loads(urldoc.readall().decode('utf-8')) |
||||
if len(jsObj['results']) > 0: |
||||
# discard commercial results |
||||
locTypes = jsObj['results'][0]['address_components'][0]['types'] |
||||
if not 'premise' in locTypes and not 'route' in locTypes and not 'establishment' in locTypes and not 'subpremise' in locTypes: |
||||
self.coords = jsObj['results'][0]['geometry']['location'] |
||||
return self.coords |
||||
# still here? it's all rubbish |
||||
return None |
||||
|
||||
|
||||
class TwisterDb: |
||||
def __init__(self): |
||||
self.lastBlockHash = None |
||||
self.users = {} |
||||
|
||||
|
||||
class TwisterScraper: |
||||
CACHE_MAX_DURATION = datetime.timedelta(7) # ([days [, seconds [,microseconds]]]) |
||||
|
||||
def __init__(self, dbPath, server='localhost', port=28332, user='user', password='pwd', protocol='http'): |
||||
self.serverUrl = '{protocol}://{user}:{passwd}@{server}:{port}'.format(protocol=protocol, |
||||
server=server, |
||||
port=port, |
||||
user=user, |
||||
passwd=password) |
||||
self.twister = AuthServiceProxy(self.serverUrl) |
||||
self.dbFile = dbPath |
||||
|
||||
try: |
||||
with open(self.dbFile, 'rb') as dbFile: |
||||
self.db = pickle.load(dbFile) |
||||
except FileNotFoundError: |
||||
self.db = TwisterDb() |
||||
self.saveDb() |
||||
|
||||
def get_user(self, username): |
||||
if username in self.db.users: |
||||
return self.db.users[username] |
||||
else: |
||||
return None |
||||
|
||||
def scrape_users(self): |
||||
nextHash = 0 |
||||
#if self.db.lastBlockHash is not None and len(self.db.users) != 0: |
||||
# nextHash = self.db.lastBlockHash |
||||
#else: |
||||
nextHash = self.twister.getblockhash(0) |
||||
|
||||
usernames = set() |
||||
index = 0 |
||||
while True: |
||||
block = self.twister.getblock(nextHash) |
||||
self.db.lastBlockHash = block['hash'] |
||||
usernames = usernames.union(set(block['usernames'])) |
||||
if len(usernames) > index: |
||||
index = len(usernames) |
||||
print('Found {0} usernames'.format(index)) |
||||
if "nextblockhash" in block: |
||||
nextHash = block["nextblockhash"] |
||||
else: |
||||
break |
||||
|
||||
if len(self.db.users) == 0: |
||||
# first run |
||||
for u in usernames: |
||||
blankUser = User() |
||||
blankUser.username = u |
||||
blankUser.updateTime = datetime.datetime.now() - self.CACHE_MAX_DURATION |
||||
self.saveDb() |
||||
|
||||
now = datetime.datetime.now() |
||||
old_users = self.db.users.keys() |
||||
need_refresh = [u for u in old_users if (self.db.users[u].updateTime + self.CACHE_MAX_DURATION) < now] |
||||
new_users = usernames.difference(set(old_users)) |
||||
to_fetch = new_users.union(set(need_refresh)) |
||||
|
||||
total_to_fetch = len(to_fetch) |
||||
for n, u in enumerate(to_fetch): |
||||
try: |
||||
user = self._fetch_user_details(u) |
||||
self.db.users[user.username] = user |
||||
self.saveDb() |
||||
print("({line} of {total}) Fetched {user} ...".format(user=u, line=n, total=total_to_fetch)) |
||||
except HTTPException as e: |
||||
print("Connection error retrieving user {0}: {1}".format(u, str(e))) |
||||
|
||||
def saveDb(self): |
||||
with open(self.dbFile, 'wb') as dbFile: |
||||
pickle.dump(self.db, dbFile) |
||||
|
||||
def get_posts_since(self, username, dateObj, maxNum=1000): |
||||
since_epoch = time.mktime(dateObj.timetuple()) |
||||
all_posts = self.twister.getposts(1000, [{'username': username}]) |
||||
all_posts = sorted(all_posts, key=lambda x: x['userpost']['time']) |
||||
index = int(len(all_posts) / 2) |
||||
|
||||
def _post_time(i): |
||||
return all_posts[i]['userpost']['time'] |
||||
|
||||
while 0 > index > len(all_posts): |
||||
if _post_time(index - 1) < since_epoch < _post_time(index + 1): |
||||
if _post_time(index) < since_epoch: |
||||
index += 1 |
||||
break |
||||
elif _post_time(index) > since_epoch: |
||||
index = int(index / 2) |
||||
elif _post_time(index) < since_epoch: |
||||
index = int(index + index / 2) |
||||
|
||||
return all_posts[index:] |
||||
|
||||
def _fetch_user_details(self, username): |
||||
user = User() |
||||
user.username = username |
||||
|
||||
avatarData = self.twister.dhtget(username, "avatar", "s") |
||||
if len(avatarData) == 1: |
||||
if 'p' in avatarData[0]: |
||||
if 'v' in avatarData[0]['p']: |
||||
user.avatar = avatarData[0]['p']['v'] |
||||
|
||||
profileData = self.twister.dhtget(username, 'profile', 's') |
||||
if len(profileData) == 1: |
||||
if 'p' in profileData[0]: |
||||
if 'v' in profileData[0]['p']: |
||||
profile = profileData[0]['p']['v'] |
||||
for key in ['location', 'url', 'bio', 'fullname']: |
||||
if key in profile: |
||||
setattr(user, key, profile[key]) |
||||
|
||||
user.following = self.twister.getfollowing(username) |
||||
|
||||
user.updateTime = datetime.datetime.now() |
||||
return user |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
ts = TwisterScraper(expanduser('~/.twister/_localusersdb'), 'localhost') |
||||
ts.scrape_users() |
||||
print("Total users in db: {0}".format(len(ts.db.users))) |
Loading…
Reference in new issue