Browse Source

Replace deprecated sgmllib with HTMLParser/html.parser

adaptive-webui-19844
Bruno Barbieri 10 years ago
parent
commit
698e5ef0f1
  1. 15
      src/searchengine/nova/engines/torrentreactor.py
  2. 15
      src/searchengine/nova3/engines/torrentreactor.py

15
src/searchengine/nova/engines/torrentreactor.py

@ -30,7 +30,7 @@
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import retrieve_url, download_file
from urllib2 import HTTPError from urllib2 import HTTPError
import sgmllib from HTMLParser import HTMLParser
import urllib import urllib
import re import re
@ -42,14 +42,19 @@ class torrentreactor(object):
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) print download_file(info)
class SimpleSGMLParser(sgmllib.SGMLParser): class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args): def __init__(self, results, url, *args):
sgmllib.SGMLParser.__init__(self) HTMLParser.__init__(self)
self.td_counter = None self.td_counter = None
self.current_item = None self.current_item = None
self.results = results self.results = results
self.id = None self.id = None
self.url = url self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs):
if tag in self.dispatcher:
self.dispatcher[tag](attrs)
def start_a(self, attr): def start_a(self, attr):
params = dict(attr) params = dict(attr)
@ -93,14 +98,14 @@ class torrentreactor(object):
def __init__(self): def __init__(self):
self.results = [] self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url) self.parser = self.SimpleHTMLParser(self.results, self.url)
def search(self, what, cat='all'): def search(self, what, cat='all'):
i = 0 i = 0
dat = '' dat = ''
while True and i<11: while True and i<11:
results = [] results = []
parser = self.SimpleSGMLParser(results, self.url) parser = self.SimpleHTMLParser(results, self.url)
try: try:
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])) dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))

15
src/searchengine/nova3/engines/torrentreactor.py

@ -30,7 +30,7 @@
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import retrieve_url, download_file
from urllib import error, parse from urllib import error, parse
import sgmllib3 from html.parser import HTMLParser
import re import re
class torrentreactor(object): class torrentreactor(object):
@ -41,14 +41,19 @@ class torrentreactor(object):
def download_torrent(self, info): def download_torrent(self, info):
print(download_file(info)) print(download_file(info))
class SimpleSGMLParser(sgmllib3.SGMLParser): class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args): def __init__(self, results, url, *args):
sgmllib3.SGMLParser.__init__(self) HTMLParser.__init__(self)
self.td_counter = None self.td_counter = None
self.current_item = None self.current_item = None
self.results = results self.results = results
self.id = None self.id = None
self.url = url self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs):
if tag in self.dispatcher:
self.dispatcher[tag](attrs)
def start_a(self, attr): def start_a(self, attr):
params = dict(attr) params = dict(attr)
@ -92,14 +97,14 @@ class torrentreactor(object):
def __init__(self): def __init__(self):
self.results = [] self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url) self.parser = self.SimpleHTMLParser(self.results, self.url)
def search(self, what, cat='all'): def search(self, what, cat='all'):
i = 0 i = 0
dat = '' dat = ''
while True and i<11: while True and i<11:
results = [] results = []
parser = self.SimpleSGMLParser(results, self.url) parser = self.SimpleHTMLParser(results, self.url)
try: try:
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])) dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))

Loading…
Cancel
Save