Browse Source

[search engine] Update Legit Torrent to remove sgmllib

adaptive-webui-19844
DoumanAsh 10 years ago
parent
commit
033817f70b
  1. 128
      src/searchengine/nova/engines/legittorrents.py
  2. 2
      src/searchengine/nova/engines/versions.txt
  3. 128
      src/searchengine/nova3/engines/legittorrents.py
  4. 2
      src/searchengine/nova3/engines/versions.txt

128
src/searchengine/nova/engines/legittorrents.py

@ -1,5 +1,6 @@
#VERSION: 1.05 #VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Douman (custparasite@gmx.se)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: # modification, are permitted provided that the following conditions are met:
@ -28,78 +29,73 @@
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import retrieve_url, download_file
import sgmllib from HTMLParser import HTMLParser
import re from re import compile as re_compile
class legittorrents(object): class legittorrents(object):
url = 'http://www.legittorrents.info' url = 'http://www.legittorrents.info'
name = 'Legit Torrents' name = 'Legit Torrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} supported_categories = {'all': '0', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) print(download_file(info))
class SimpleSGMLParser(sgmllib.SGMLParser): class MyHtmlParseWithBlackJack(HTMLParser):
def __init__(self, results, url, *args): """ Parser class """
sgmllib.SGMLParser.__init__(self) def __init__(self, url):
self.url = url HTMLParser.__init__(self)
self.td_counter = None self.url = url
self.current_item = None self.current_item = None
self.start_name = False self.save_item_key = None
self.results = results
def start_a(self, attr): def handle_starttag(self, tag, attrs):
params = dict(attr) """ Parser's start tag handler """
if params.has_key('href') and params['href'].startswith('download.php?'): if self.current_item:
self.current_item['link'] = self.url + '/' + params['href'].strip() params = dict(attrs)
elif params.has_key('href') and params['href'].startswith('index.php?page=torrent-details'): if tag == "a":
self.current_item = {} link = params["href"]
self.td_counter = 0 if link.startswith("index") and "title" in params:
self.current_item['desc_link'] = self.url + '/' + params['href'].strip() #description link
self.current_item["name"] = params["title"][14:]
self.current_item["desc_link"] = "/".join((self.url, link))
elif link.startswith("download"):
self.current_item["link"] = "/".join((self.url, link))
elif tag == "td":
if "class" in params and params["class"].startswith("#FF"):
self.save_item_key = "leech" if "seeds" in self.current_item else "seeds"
def handle_data(self, data): elif tag == "tr":
if self.td_counter == 0: self.current_item = {}
if not self.current_item.has_key('name'): self.current_item["size"] = ""
self.current_item['name'] = data.strip() self.current_item["engine_url"] = self.url
elif self.td_counter == 3:
if not self.current_item.has_key('seeds'):
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 4:
if not self.current_item.has_key('leech'):
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr): def handle_endtag(self, tag):
if isinstance(self.td_counter,int): """ Parser's end tag handler """
self.td_counter += 1 if self.current_item and tag == "tr":
if self.td_counter > 5: if len(self.current_item) > 4:
self.td_counter = None prettyPrinter(self.current_item)
# Display item self.current_item = None
if self.current_item:
self.current_item['engine_url'] = self.url def handle_data(self, data):
if not self.current_item['seeds'].isdigit(): """ Parser's data handler """
self.current_item['seeds'] = 0 if self.save_item_key:
if not self.current_item['leech'].isdigit(): self.current_item[self.save_item_key] = data.strip()
self.current_item['leech'] = 0 self.save_item_key = None
self.current_item['size'] = ''
prettyPrinter(self.current_item) def search(self, what, cat='all'):
self.results.append('a') """ Performs search """
query = "".join((self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, '0'), "&active=1"))
get_table = re_compile('(?s)<table\sclass="lista".*>(.*)</table>')
data = get_table.search(retrieve_url(query)).group(0)
#extract first ten pages of next results
next_pages = re_compile('(?m)<option value="(.*)">[0-9]+</option>')
next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]]
def search(self, what, cat='all'): parser = self.MyHtmlParseWithBlackJack(self.url)
ret = [] parser.feed(data)
i = 1
while True and i<11:
results = []
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/index.php?page=torrents&search=%s&category=%s&active=1&order=3&by=2&pages=%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<table width="100%" class="lista">.*')
for match in results_re.finditer(dat):
res_tab = match.group(0)
parser.feed(res_tab)
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
for page in next_pages:
parser.feed(get_table.search(retrieve_url(page)).group(0))
parser.close()

2
src/searchengine/nova/engines/versions.txt

@ -2,7 +2,7 @@ btdigg: 1.25
demonoid: 1.1 demonoid: 1.1
extratorrent: 2.0 extratorrent: 2.0
kickasstorrents: 1.27 kickasstorrents: 1.27
legittorrents: 1.05 legittorrents: 2.00
mininova: 2.00 mininova: 2.00
piratebay: 2.11 piratebay: 2.11
torrentreactor: 1.36 torrentreactor: 1.36

128
src/searchengine/nova3/engines/legittorrents.py

@ -1,5 +1,6 @@
#VERSION: 1.05 #VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Douman (custparasite@gmx.se)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: # modification, are permitted provided that the following conditions are met:
@ -28,78 +29,73 @@
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import retrieve_url, download_file
import sgmllib3 as sgmllib from html.parser import HTMLParser
import re from re import compile as re_compile
class legittorrents(object): class legittorrents(object):
url = 'http://www.legittorrents.info' url = 'http://www.legittorrents.info'
name = 'Legit Torrents' name = 'Legit Torrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} supported_categories = {'all': '0', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def download_torrent(self, info): def download_torrent(self, info):
print(download_file(info)) print(download_file(info))
class SimpleSGMLParser(sgmllib.SGMLParser): class MyHtmlParseWithBlackJack(HTMLParser):
def __init__(self, results, url, *args): """ Parser class """
sgmllib.SGMLParser.__init__(self) def __init__(self, url):
self.url = url HTMLParser.__init__(self)
self.td_counter = None self.url = url
self.current_item = None self.current_item = None
self.start_name = False self.save_item_key = None
self.results = results
def start_a(self, attr): def handle_starttag(self, tag, attrs):
params = dict(attr) """ Parser's start tag handler """
if 'href' in params and params['href'].startswith('download.php?'): if self.current_item:
self.current_item['link'] = self.url + '/' + params['href'].strip() params = dict(attrs)
elif 'href' in params and params['href'].startswith('index.php?page=torrent-details'): if tag == "a":
self.current_item = {} link = params["href"]
self.td_counter = 0 if link.startswith("index") and "title" in params:
self.current_item['desc_link'] = self.url + '/' + params['href'].strip() #description link
self.current_item["name"] = params["title"][14:]
self.current_item["desc_link"] = "/".join((self.url, link))
elif link.startswith("download"):
self.current_item["link"] = "/".join((self.url, link))
elif tag == "td":
if "class" in params and params["class"].startswith("#FF"):
self.save_item_key = "leech" if "seeds" in self.current_item else "seeds"
def handle_data(self, data): elif tag == "tr":
if self.td_counter == 0: self.current_item = {}
if 'name' not in self.current_item: self.current_item["size"] = ""
self.current_item['name'] = data.strip() self.current_item["engine_url"] = self.url
elif self.td_counter == 3:
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 4:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr): def handle_endtag(self, tag):
if isinstance(self.td_counter,int): """ Parser's end tag handler """
self.td_counter += 1 if self.current_item and tag == "tr":
if self.td_counter > 5: if len(self.current_item) > 4:
self.td_counter = None prettyPrinter(self.current_item)
# Display item self.current_item = None
if self.current_item:
self.current_item['engine_url'] = self.url def handle_data(self, data):
if not self.current_item['seeds'].isdigit(): """ Parser's data handler """
self.current_item['seeds'] = 0 if self.save_item_key:
if not self.current_item['leech'].isdigit(): self.current_item[self.save_item_key] = data.strip()
self.current_item['leech'] = 0 self.save_item_key = None
self.current_item['size'] = ''
prettyPrinter(self.current_item) def search(self, what, cat='all'):
self.results.append('a') """ Performs search """
query = "".join((self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, '0'), "&active=1"))
get_table = re_compile('(?s)<table\sclass="lista".*>(.*)</table>')
data = get_table.search(retrieve_url(query)).group(0)
#extract first ten pages of next results
next_pages = re_compile('(?m)<option value="(.*)">[0-9]+</option>')
next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]]
def search(self, what, cat='all'): parser = self.MyHtmlParseWithBlackJack(self.url)
ret = [] parser.feed(data)
i = 1
while True and i<11:
results = []
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/index.php?page=torrents&search=%s&category=%s&active=1&order=3&by=2&pages=%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<table width="100%" class="lista">.*')
for match in results_re.finditer(dat):
res_tab = match.group(0)
parser.feed(res_tab)
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
for page in next_pages:
parser.feed(get_table.search(retrieve_url(page)).group(0))
parser.close()

2
src/searchengine/nova3/engines/versions.txt

@ -2,7 +2,7 @@ btdigg: 1.25
demonoid: 1.1 demonoid: 1.1
extratorrent: 2.0 extratorrent: 2.0
kickasstorrents: 1.27 kickasstorrents: 1.27
legittorrents: 1.05 legittorrents: 2.00
mininova: 2.00 mininova: 2.00
piratebay: 2.11 piratebay: 2.11
torrentreactor: 1.36 torrentreactor: 1.36

Loading…
Cancel
Save