Browse Source

fixed bad handling of cache through Borg, revamped feed handling

master
toyg 11 years ago
parent
commit
09ef85f862
  1. 89
      twistmonitor.py

89
twistmonitor.py

@ -7,58 +7,81 @@ from os.path import expanduser
import feedparser import feedparser
from twistscraper import TwisterScraper, GeoLocationService, User from twistscraper import TwisterScraper
__author__ = 'Giacomo Lacava' __author__ = 'Giacomo Lacava'
GITHUB_REPO_URL = 'https://github.com/{user}/{repo}'
GITHUB_COMMIT_FEED_TEMPLATE = GITHUB_REPO_URL + '/commits/master.atom'
CORE_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-core') class FeedCache:
HTML_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-html') _shared_state = {}
SEED_COMMIT_FEED = GITHUB_COMMIT_FEED_TEMPLATE.format(user='miguelfreitas', repo='twister-seeder')
CORE_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-core')
HTML_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-html')
SEED_REPO_URL = GITHUB_REPO_URL.format(user='miguelfreitas', repo='twister-seeder')
def __init__(self):
class TwisterMonitor(Thread): self.__dict__ = self._shared_state
MESSAGE = 'Twister update: {msg} - Pull it now: {url}' if len(self.__dict__) == 0:
# first instance setup
def __init__(self, twister_monitor, username, repo_feed=CORE_COMMIT_FEED, repo_url=CORE_REPO_URL):
Thread.__init__(self)
self.ts = twister_monitor
self.cacheFile = expanduser('~/.twister/_twm_cache') self.cacheFile = expanduser('~/.twister/_twm_cache')
self.cache = {} self.cache = {}
self.username = username self._load_cache()
self.feed = repo_feed
self.repo = repo_url
self.loadCache()
def loadCache(self): def _load_cache(self):
try: try:
with open(self.cacheFile, 'rb') as f: with open(self.cacheFile, 'rb') as f:
self.cache = pickle.load(f) self.cache = pickle.load(f)
except FileNotFoundError: except FileNotFoundError:
self.cache = {} self.cache = {}
def _save_cache(self):
with open(self.cacheFile, 'wb') as f:
pickle.dump(self.cache, f)
def get_feed_cache(self, feedUrl):
if feedUrl not in self.cache:
self.cache[feedUrl] = []
return self.cache[feedUrl]
def add_entry(self, feedUrl, entryID):
feed = self.get_feed_cache(feedUrl)
feed.append(entryID)
self._save_cache()
class TwisterMonitor(Thread):
MESSAGE = '{repo}: {msg} - {url}'
GITHUB_REPO_URL = 'https://github.com/{user}/{repo}'
GITHUB_COMMIT_FEED_TEMPLATE = GITHUB_REPO_URL + '/commits/master.atom'
def __init__(self, scraperObj, username, github_user, github_repo):
Thread.__init__(self)
self.ts = scraperObj
self.cacheObj = FeedCache()
self.username = username
self.feed = TwisterMonitor.GITHUB_COMMIT_FEED_TEMPLATE.format(user=github_user, repo=github_repo)
self.repo = TwisterMonitor.GITHUB_REPO_URL.format(user=github_user, repo=github_repo)
self.github_user = github_user
self.github_repo = github_repo
self.cache = self.cacheObj.get_feed_cache(self.feed)
def get_commits(self): def get_commits(self):
print("Fetching {0}".format(self.feed)) print("Fetching {0}".format(self.feed))
f = feedparser.parse(self.feed) f = feedparser.parse(self.feed)
if f['bozo'] == 1: if f['bozo'] == 1:
raise Exception('Bad feed! Status: {status} - Error {err}'.format(status=f.status, err=f.bozo_exception)) raise Exception('Bad feed! Status: {status} - Error {err}'.format(status=f.status, err=f.bozo_exception))
if self.feed not in self.cache:
self.cache[self.feed] = []
f.entries.sort(key=attrgetter('updated_parsed')) f.entries.sort(key=attrgetter('updated_parsed'))
for entry in f.entries: for entry in f.entries:
print("Checking {0}".format(entry.id)) print("Checking {0}".format(entry.id))
if entry.id not in self.cache[self.feed]: if entry.id not in self.cache:
message = TwisterMonitor.MESSAGE.format(msg=entry.title, url=self.repo) message = TwisterMonitor.MESSAGE.format(msg=entry.title,
url=self.repo,
repo=self.github_repo)
cut = 1 cut = 1
while len(message) >= 140: while len(message) >= 140:
message = TwisterMonitor.MESSAGE.format(msg=(entry.title[:-cut] + '...'), url=self.repo) message = TwisterMonitor.MESSAGE.format(msg=(entry.title[:-cut] + '...'),
url=self.repo,
repo=self.github_repo)
cut += 1 cut += 1
print("Checking last post key...") print("Checking last post key...")
@ -69,13 +92,9 @@ class TwisterMonitor(Thread):
print("Posting '{0}' with key {1}...".format(message, key)) print("Posting '{0}' with key {1}...".format(message, key))
self.ts.twister.newpostmsg(self.username, key, message) self.ts.twister.newpostmsg(self.username, key, message)
print("Posted!") print("Posted!")
self.cache[self.feed].append(entry.id) self.cacheObj.add_entry(self.feed, entry.id)
self.saveCache()
sleep(10 * 60) sleep(10 * 60)
def saveCache(self):
with open(self.cacheFile, 'wb') as f:
pickle.dump(self.cache, f)
def run(self): def run(self):
while True: while True:
@ -90,11 +109,11 @@ class TwisterMonitor(Thread):
if __name__ == '__main__': if __name__ == '__main__':
botID = 'twmonitor' botID = 'twmonitor'
ts = TwisterScraper(expanduser('~/.twister/_localusersdb')) ts = TwisterScraper(expanduser('~/.twister/_localusersdb'))
monitor = TwisterMonitor(ts, botID, CORE_COMMIT_FEED, CORE_REPO_URL) monitor = TwisterMonitor(ts, botID, "miguelfreitas", "twister-core")
monitor.start() monitor.start()
sleep(4 * 60) sleep(4 * 60)
monitor_ui = TwisterMonitor(ts, botID, HTML_COMMIT_FEED, HTML_REPO_URL) monitor_ui = TwisterMonitor(ts, botID, "miguelfreitas", "twister-html")
monitor_ui.start() monitor_ui.start()
sleep(6 * 60) sleep(6 * 60)
monitor_seed = TwisterMonitor(ts, botID, SEED_COMMIT_FEED, SEED_REPO_URL) monitor_seed = TwisterMonitor(ts, botID, "miguelfreitas", "twister-seeder")
monitor_seed.start() monitor_seed.start()

Loading…
Cancel
Save