# -*- coding: utf-8 -*- from datetime import datetime from string import Template from os.path import expanduser __author__ = 'Giacomo Lacava' from twistscraper import TwisterScraper TEMPLATE = None with open("map.html", "rb") as mapTemplate: TEMPLATE = Template(mapTemplate.read()) def generate_map(userdb): ts = TwisterScraper(userdb) loc_users = [u for u in ts.db.users.values() if u.location != ''] noLoc_user_num = len(ts.db.users) - len(loc_users) loc_users_fake_num = 0 locDb = {} for u in loc_users: if u.location in locDb: locDb[u.location]['users'].append(u.username) else: locData = u.locate() if locData is not None: locDb[u.location] = {} locDb[u.location]['coordinates'] = locData locDb[u.location]['users'] = [u.username] else: loc_users_fake_num += 1 # second pass to aggregate misspellings done = [] newLocDb = {} for loc, locDict in locDb.items(): # find all elements with same coordinates sameCoord = [(l, lObj['users']) for l, lObj in locDb.items() if lObj['coordinates'] == locDict['coordinates']] if len(sameCoord) == 1: # if only one element, copy it straight to the new dict newLocDb[loc] = locDict elif len(sameCoord) > 1: # if we're here, multiple locations have the same name # find the most popular name locMax = max(sameCoord, key=lambda x: len(x[1])) location = locMax[0] coordHash = '/'.join([str(locDict['coordinates']['lat']), str(locDict['coordinates']['lng'])]) # if we haven't seen this set of coordinates yet... if coordHash not in done: # ... collect all users ... users = [] for l, us in sameCoord: for u in us: users.append(u) users.sort() # ... and add the aggregated result if location not in newLocDb: newLocDb[location] = {} newLocDb[location]['users'] = users newLocDb[location]['coordinates'] = locDict['coordinates'] done.append(coordHash) locStrings = [] for k in newLocDb.keys(): locStrings.append("['