diff --git a/src/searchengine/nova/engines/torrentreactor.py b/src/searchengine/nova/engines/torrentreactor.py index b09953edb..ee74f4e75 100644 --- a/src/searchengine/nova/engines/torrentreactor.py +++ b/src/searchengine/nova/engines/torrentreactor.py @@ -30,7 +30,7 @@ from novaprinter import prettyPrinter from helpers import retrieve_url, download_file from urllib2 import HTTPError -import sgmllib +from HTMLParser import HTMLParser import urllib import re @@ -42,14 +42,19 @@ class torrentreactor(object): def download_torrent(self, info): print download_file(info) - class SimpleSGMLParser(sgmllib.SGMLParser): + class SimpleHTMLParser(HTMLParser): def __init__(self, results, url, *args): - sgmllib.SGMLParser.__init__(self) + HTMLParser.__init__(self) self.td_counter = None self.current_item = None self.results = results self.id = None self.url = url + self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } + + def handle_starttag(self, tag, attrs): + if tag in self.dispatcher: + self.dispatcher[tag](attrs) def start_a(self, attr): params = dict(attr) @@ -93,14 +98,14 @@ class torrentreactor(object): def __init__(self): self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) + self.parser = self.SimpleHTMLParser(self.results, self.url) def search(self, what, cat='all'): i = 0 dat = '' while True and i<11: results = [] - parser = self.SimpleSGMLParser(results, self.url) + parser = self.SimpleHTMLParser(results, self.url) try: dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])) diff --git a/src/searchengine/nova3/engines/torrentreactor.py b/src/searchengine/nova3/engines/torrentreactor.py index 7ae7eb11d..a099ec5ab 100644 --- a/src/searchengine/nova3/engines/torrentreactor.py +++ b/src/searchengine/nova3/engines/torrentreactor.py @@ -30,7 +30,7 @@ from novaprinter import prettyPrinter from helpers import retrieve_url, download_file from urllib import error, parse -import sgmllib3 +from html.parser import HTMLParser import re class torrentreactor(object): @@ -41,14 +41,19 @@ class torrentreactor(object): def download_torrent(self, info): print(download_file(info)) - class SimpleSGMLParser(sgmllib3.SGMLParser): + class SimpleHTMLParser(HTMLParser): def __init__(self, results, url, *args): - sgmllib3.SGMLParser.__init__(self) + HTMLParser.__init__(self) self.td_counter = None self.current_item = None self.results = results self.id = None self.url = url + self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } + + def handle_starttag(self, tag, attrs): + if tag in self.dispatcher: + self.dispatcher[tag](attrs) def start_a(self, attr): params = dict(attr) @@ -92,14 +97,14 @@ class torrentreactor(object): def __init__(self): self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) + self.parser = self.SimpleHTMLParser(self.results, self.url) def search(self, what, cat='all'): i = 0 dat = '' while True and i<11: results = [] - parser = self.SimpleSGMLParser(results, self.url) + parser = self.SimpleHTMLParser(results, self.url) try: dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))