Browse Source

[search engine] engines update

adaptive-webui-19844
DoumanAsh 10 years ago
parent
commit
d6d0f422f5
  1. 211
      src/searchengine/nova/engines/extratorrent.py
  2. 6
      src/searchengine/nova/engines/legittorrents.py
  3. 200
      src/searchengine/nova/engines/mininova.py
  4. 154
      src/searchengine/nova/engines/torrentreactor.py
  5. 9
      src/searchengine/nova/engines/versions.txt
  6. 211
      src/searchengine/nova3/engines/extratorrent.py
  7. 6
      src/searchengine/nova3/engines/legittorrents.py
  8. 200
      src/searchengine/nova3/engines/mininova.py
  9. 153
      src/searchengine/nova3/engines/torrentreactor.py
  10. 9
      src/searchengine/nova3/engines/versions.txt

211
src/searchengine/nova/engines/extratorrent.py

@ -1,4 +1,4 @@
#VERSION: 1.2 #VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,135 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
#qBt
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib
import re
class extratorrent(object): class extratorrent(object):
url = 'http://extratorrent.cc' """ Search engine class """
name = 'extratorrent' url = 'http://extratorrent.cc'
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'} name = 'ExtraTorrent'
supported_categories = {'all' : '0',
def __init__(self): 'movies' : '4',
self.results = [] 'tv' : '8',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'music' : '5',
'games' : '3',
def download_torrent(self, info): 'anime' : '1',
print download_file(info) 'software' : '7',
'books' : '2',
class SimpleSGMLParser(sgmllib.SGMLParser): 'pictures' : '6'}
def __init__(self, results, url, *args):
sgmllib.SGMLParser.__init__(self) def download_torrent(self, info):
self.url = url """ Downloader """
self.td_counter = None print(download_file(info))
self.current_item = None
self.start_name = False class MyHtmlParseWithBlackJack(HTMLParser):
self.results = results """ Parser class """
def __init__(self, list_searches, url):
def start_a(self, attr): HTMLParser.__init__(self)
params = dict(attr) self.url = url
#print params self.list_searches = list_searches
if params.has_key('href') and params['href'].startswith("/torrent_download/"): self.current_item = None
self.current_item = {} self.cur_item_name = None
self.td_counter = 0 self.pending_size = False
self.start_name = False self.next_queries = True
torrent_id = '/'.join(params['href'].split('/')[2:]) self.pending_next_queries = False
self.current_item['link']=self.url+'/download/'+torrent_id
elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"): def handle_starttag(self, tag, attrs):
self.current_item['desc_link'] = self.url + params['href'].strip() if self.current_item:
self.start_name = True if tag == "a":
params = dict(attrs)
def handle_data(self, data): link = params['href']
if self.td_counter == 2:
if not self.current_item.has_key('name') and self.start_name: if not link.startswith("/torrent"):
self.current_item['name'] = data.strip() return
elif self.td_counter == 3:
if not self.current_item.has_key('size'): if link[8] == "/":
self.current_item['size'] = '' #description
self.current_item['size']+= data.replace(" ", " ").strip() self.current_item["desc_link"] = "".join((self.url, link))
elif self.td_counter == 4: #remove view at the beginning
if not self.current_item.has_key('seeds'): self.current_item["name"] = params["title"][5:]
self.current_item['seeds'] = '' self.pending_size = True
self.current_item['seeds']+= data.strip() elif link[8] == "_":
elif self.td_counter == 5: #download link
if not self.current_item.has_key('leech'): link = link.replace("torrent_", "", 1)
self.current_item['leech'] = '' self.current_item["link"] = "".join((self.url, link))
self.current_item['leech']+= data.strip()
elif tag == "td":
def start_td(self,attr): if self.pending_size:
if isinstance(self.td_counter,int): self.cur_item_name = "size"
self.td_counter += 1 self.current_item["size"] = ""
if self.td_counter > 5: self.pending_size = False
self.td_counter = None
# Display item for attr in attrs:
if attr[0] == "class":
if attr[1][0] == "s":
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif attr[1][0] == "l":
self.cur_item_name = "leech"
self.current_item["leech"] = ""
break
elif tag == "tr":
for attr in attrs:
if attr[0] == "class" and attr[1].startswith("tl"):
self.current_item = dict()
self.current_item["engine_url"] = self.url
break
elif self.pending_next_queries:
if tag == "a":
params = dict(attrs)
self.list_searches.append(params['href'])
if params["title"] == "10":
self.pending_next_queries = False
else:
self.pending_next_queries = False
elif self.next_queries:
if tag == "b" and ("class", "pager_no_link") in attrs:
self.next_queries = False
self.pending_next_queries = True
def handle_data(self, data):
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
#Due to utf-8 we need to handle data two times if there is space
if not self.cur_item_name == "size":
self.cur_item_name = None
def handle_endtag(self, tag):
if self.current_item: if self.current_item:
self.current_item['engine_url'] = self.url if tag == "tr":
if not self.current_item['seeds'].isdigit(): prettyPrinter(self.current_item)
self.current_item['seeds'] = 0 self.current_item = None
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0 def search(self, what, cat="all"):
prettyPrinter(self.current_item) """ Performs search """
self.results.append('a') connection = http("extratorrent.cc")
def search(self, what, cat='all'): query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
ret = []
i = 1 connection.request("GET", query)
while True and i<11: response = connection.getresponse()
results = [] if response.status != 200:
parser = self.SimpleSGMLParser(results, self.url) return
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<table class="tl"><thead>.*') list_searches = []
for match in results_re.finditer(dat): parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
res_tab = match.group(0) parser.feed(response.read().decode('utf-8'))
parser.feed(res_tab)
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

6
src/searchengine/nova/engines/legittorrents.py

@ -1,4 +1,4 @@
#VERSION: 1.02 #VERSION: 1.03
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents' name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) print download_file(info)

200
src/searchengine/nova/engines/mininova.py

@ -1,4 +1,4 @@
#VERSION: 1.51 #VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com) #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,124 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib
import re
class mininova(object): class mininova(object):
# Mandatory properties """ Search engine class """
url = 'http://www.mininova.org' url = 'http://www.mininova.org'
name = 'Mininova' name = 'Mininova'
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'} supported_categories = {'all' : '0',
'movies' : '4',
def __init__(self): 'tv' : '8',
self.results = [] 'music' : '5',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'games' : '3',
'anime' : '1',
def download_torrent(self, info): 'software' : '7',
print download_file(info) 'pictures' : '6',
'books' : '2'}
class SimpleSGMLParser(sgmllib.SGMLParser):
def __init__(self, results, url, *args): def download_torrent(self, info):
sgmllib.SGMLParser.__init__(self) print(download_file(info))
self.url = url
self.td_counter = None class MyHtmlParseWithBlackJack(HTMLParser):
self.current_item = None """ Parser class """
self.results = results def __init__(self, list_searches, url):
HTMLParser.__init__(self)
def start_a(self, attr): self.list_searches = list_searches
params = dict(attr) self.url = url
#print params self.table_results = False
if params.has_key('href'): self.current_item = None
if params['href'].startswith("/get/"): self.cur_item_name = None
self.current_item = {} self.next_queries = True
self.td_counter = 0
self.current_item['link']=self.url+params['href'].strip() def handle_starttag_tr(self, _):
elif params['href'].startswith("/tor/") and self.current_item is not None: """ Handler of tr start tag """
self.current_item['desc_link']=self.url+params['href'].strip() self.current_item = dict()
def handle_data(self, data): def handle_starttag_a(self, attrs):
if self.td_counter == 0: """ Handler of a start tag """
if not self.current_item.has_key('name'): params = dict(attrs)
self.current_item['name'] = '' link = params["href"]
self.current_item['name']+= data
elif self.td_counter == 1: if link.startswith("/get/"):
if not self.current_item.has_key('size'): #download link
self.current_item['size'] = '' self.current_item["link"] = "".join((self.url, link))
self.current_item['size']+= data.strip() elif link.startswith("/tor/"):
elif self.td_counter == 2: #description
if not self.current_item.has_key('seeds'): self.current_item["desc_link"] = "".join((self.url, link))
self.current_item['seeds'] = '' self.cur_item_name = "name"
self.current_item['seeds']+= data.strip() self.current_item["name"] = ""
elif self.td_counter == 3: elif self.next_queries and link.startswith("/search"):
if not self.current_item.has_key('leech'): if params["title"].startswith("Page"):
self.current_item['leech'] = '' self.list_searches.append(link)
self.current_item['leech']+= data.strip()
def handle_starttag_td(self, attrs):
def start_td(self,attr): """ Handler of td start tag """
if isinstance(self.td_counter,int): if ("align", "right") in attrs:
self.td_counter += 1 if not "size" in self.current_item.keys():
if self.td_counter > 4: self.cur_item_name = "size"
self.td_counter = None self.current_item["size"] = ""
# Display item
if self.current_item: def handle_starttag_span(self, attrs):
self.current_item['engine_url'] = self.url """ Handler of span start tag """
if not self.current_item['seeds'].isdigit(): if ("class", "g") in attrs:
self.current_item['seeds'] = 0 self.cur_item_name = "seeds"
if not self.current_item['leech'].isdigit(): self.current_item["seeds"] = ""
self.current_item['leech'] = 0 elif ("class", "b") in attrs:
prettyPrinter(self.current_item) self.cur_item_name = "leech"
self.results.append('a') self.current_item["leech"] = ""
def search(self, what, cat='all'): def handle_starttag(self, tag, attrs):
ret = [] """ Parser's start tag handler """
i = 1 if self.table_results:
while True and i<11: dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
results = [] if dispatcher:
parser = self.SimpleSGMLParser(results, self.url) dispatcher(attrs)
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<h1>Search results for.*') elif tag == "table":
for match in results_re.finditer(dat): self.table_results = ("class", "maintable") in attrs
res_tab = match.group(0)
parser.feed(res_tab) def handle_endtag(self, tag):
""" Parser's end tag handler """
if tag == "tr" and self.current_item:
self.current_item["engine_url"] = self.url
prettyPrinter(self.current_item)
self.current_item = None
elif self.cur_item_name:
if tag == "a" or tag == "span":
self.cur_item_name = None
def handle_data(self, data):
""" Parser's data handler """
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
def search(self, what, cat="all"):
""" Performs search """
connection = http("www.mininova.org")
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
parser.next_queries = False
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

154
src/searchengine/nova/engines/torrentreactor.py

@ -1,4 +1,4 @@
#VERSION: 1.33 #VERSION: 1.35
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net) #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com) # Bruno Barbieri (brunorex@gmail.com)
@ -28,92 +28,94 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
from urllib2 import HTTPError
from HTMLParser import HTMLParser
import urllib import urllib
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
import re import re
class torrentreactor(object): class torrentreactor(object):
url = 'http://www.torrentreactor.net' url = 'http://www.torrentreactor.net'
name = 'TorrentReactor.Net' name = 'TorrentReactor.Net'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def download_torrent(self, info):
print(download_file(info))
def download_torrent(self, info): class SimpleHTMLParser(HTMLParser):
print download_file(info) def __init__(self, results, url, *args):
HTMLParser.__init__(self)
self.td_counter = None
self.current_item = None
self.results = results
self.id = None
self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
class SimpleHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs):
def __init__(self, results, url, *args): if tag in self.dispatcher:
HTMLParser.__init__(self) self.dispatcher[tag](attrs)
self.td_counter = None
self.current_item = None
self.results = results
self.id = None
self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs): def start_a(self, attr):
if tag in self.dispatcher: params = dict(attr)
self.dispatcher[tag](attrs) if re.match("/torrents/\d+.*", params['href']):
self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']:
self.td_counter = 0
self.current_item['link'] = params['href'].strip()
self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
def start_a(self, attr): def handle_data(self, data):
params = dict(attr) if self.td_counter == 1:
if re.match("/torrents/\d+.*", params['href']): if 'size' not in self.current_item:
self.current_item = {} self.current_item['size'] = ''
self.current_item['desc_link'] = self.url+params['href'].strip() self.current_item['size']+= data.strip()
elif 'torrentreactor.net/download.php' in params['href']: elif self.td_counter == 2:
self.td_counter = 0 if 'seeds' not in self.current_item:
self.current_item['link'] = params['href'].strip() self.current_item['seeds'] = ''
self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1]) self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def handle_data(self, data): def start_td(self,attr):
if self.td_counter == 1: if isinstance(self.td_counter,int):
if not self.current_item.has_key('size'): self.td_counter += 1
self.current_item['size'] = '' if self.td_counter > 3:
self.current_item['size']+= data.strip() self.td_counter = None
elif self.td_counter == 2: # add item to results
if not self.current_item.has_key('seeds'): if self.current_item:
self.current_item['seeds'] = '' self.current_item['engine_url'] = self.url
self.current_item['seeds']+= data.strip() if not self.current_item['seeds'].isdigit():
elif self.td_counter == 3: self.current_item['seeds'] = 0
if not self.current_item.has_key('leech'): if not self.current_item['leech'].isdigit():
self.current_item['leech'] = '' self.current_item['leech'] = 0
self.current_item['leech']+= data.strip() prettyPrinter(self.current_item)
self.has_results = True
self.results.append('a')
def start_td(self,attr): def search(self, what, cat='all'):
if isinstance(self.td_counter,int): i = 0
self.td_counter += 1 dat = ''
if self.td_counter > 3: connection = http("www.torrentreactor.net")
self.td_counter = None
# add item to results
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.has_results = True
self.results.append('a')
def __init__(self): while True and i<11:
self.results = [] results = []
self.parser = self.SimpleHTMLParser(self.results, self.url) parser = self.SimpleHTMLParser(results, self.url)
query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
break
def search(self, what, cat='all'): dat = response.read().decode('utf-8')
i = 0
dat = ''
while True and i<11:
results = []
parser = self.SimpleHTMLParser(results, self.url)
try: parser.feed(dat)
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])) parser.close()
except HTTPError: if len(results) <= 0:
break break
i += 1
parser.feed(dat) connection.close()
parser.close()
if len(results) <= 0:
break
i += 1

9
src/searchengine/nova/engines/versions.txt

@ -1,8 +1,9 @@
torrentreactor: 1.33
mininova: 1.51
piratebay: 2.11
extratorrent: 1.2 extratorrent: 1.2
torrentreactor: 1.35
mininova: 2.00
piratebay: 2.11
extratorrent: 2.0
kickasstorrents: 1.26 kickasstorrents: 1.26
btdigg: 1.24 btdigg: 1.24
legittorrents: 1.02
torrentz: 2.13 torrentz: 2.13
legittorrents: 1.03

211
src/searchengine/nova3/engines/extratorrent.py

@ -1,4 +1,4 @@
#VERSION: 1.2 #VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,135 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from html.parser import HTMLParser
from http.client import HTTPConnection as http
#qBt
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib3
import re
class extratorrent(object): class extratorrent(object):
url = 'http://extratorrent.cc' """ Search engine class """
name = 'extratorrent' url = 'http://extratorrent.cc'
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'} name = 'ExtraTorrent'
supported_categories = {'all' : '0',
def __init__(self): 'movies' : '4',
self.results = [] 'tv' : '8',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'music' : '5',
'games' : '3',
def download_torrent(self, info): 'anime' : '1',
print(download_file(info)) 'software' : '7',
'books' : '2',
class SimpleSGMLParser(sgmllib3.SGMLParser): 'pictures' : '6'}
def __init__(self, results, url, *args):
sgmllib3.SGMLParser.__init__(self) def download_torrent(self, info):
self.url = url """ Downloader """
self.td_counter = None print(download_file(info))
self.current_item = None
self.start_name = False class MyHtmlParseWithBlackJack(HTMLParser):
self.results = results """ Parser class """
def __init__(self, list_searches, url):
def start_a(self, attr): HTMLParser.__init__(self)
params = dict(attr) self.url = url
#print params self.list_searches = list_searches
if 'href' in params and params['href'].startswith("/torrent_download/"): self.current_item = None
self.current_item = {} self.cur_item_name = None
self.td_counter = 0 self.pending_size = False
self.start_name = False self.next_queries = True
torrent_id = '/'.join(params['href'].split('/')[2:]) self.pending_next_queries = False
self.current_item['link']=self.url+'/download/'+torrent_id
elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"): def handle_starttag(self, tag, attrs):
self.current_item['desc_link'] = self.url + params['href'].strip() if self.current_item:
self.start_name = True if tag == "a":
params = dict(attrs)
def handle_data(self, data): link = params['href']
if self.td_counter == 2:
if 'name' not in self.current_item and self.start_name: if not link.startswith("/torrent"):
self.current_item['name'] = data.strip() return
elif self.td_counter == 3:
if 'size' not in self.current_item: if link[8] == "/":
self.current_item['size'] = '' #description
self.current_item['size']+= data.replace("&nbsp;", " ").strip() self.current_item["desc_link"] = "".join((self.url, link))
elif self.td_counter == 4: #remove view at the beginning
if 'seeds' not in self.current_item: self.current_item["name"] = params["title"][5:]
self.current_item['seeds'] = '' self.pending_size = True
self.current_item['seeds']+= data.strip() elif link[8] == "_":
elif self.td_counter == 5: #download link
if 'leech' not in self.current_item: link = link.replace("torrent_", "", 1)
self.current_item['leech'] = '' self.current_item["link"] = "".join((self.url, link))
self.current_item['leech']+= data.strip()
elif tag == "td":
def start_td(self,attr): if self.pending_size:
if isinstance(self.td_counter,int): self.cur_item_name = "size"
self.td_counter += 1 self.current_item["size"] = ""
if self.td_counter > 5: self.pending_size = False
self.td_counter = None
# Display item for attr in attrs:
if attr[0] == "class":
if attr[1][0] == "s":
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif attr[1][0] == "l":
self.cur_item_name = "leech"
self.current_item["leech"] = ""
break
elif tag == "tr":
for attr in attrs:
if attr[0] == "class" and attr[1].startswith("tl"):
self.current_item = dict()
self.current_item["engine_url"] = self.url
break
elif self.pending_next_queries:
if tag == "a":
params = dict(attrs)
self.list_searches.append(params['href'])
if params["title"] == "10":
self.pending_next_queries = False
else:
self.pending_next_queries = False
elif self.next_queries:
if tag == "b" and ("class", "pager_no_link") in attrs:
self.next_queries = False
self.pending_next_queries = True
def handle_data(self, data):
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
#Due to utf-8 we need to handle data two times if there is space
if not self.cur_item_name == "size":
self.cur_item_name = None
def handle_endtag(self, tag):
if self.current_item: if self.current_item:
self.current_item['engine_url'] = self.url if tag == "tr":
if not self.current_item['seeds'].isdigit(): prettyPrinter(self.current_item)
self.current_item['seeds'] = 0 self.current_item = None
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0 def search(self, what, cat="all"):
prettyPrinter(self.current_item) """ Performs search """
self.results.append('a') connection = http("extratorrent.cc")
def search(self, what, cat='all'): query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
ret = []
i = 1 connection.request("GET", query)
while True and i<11: response = connection.getresponse()
results = [] if response.status != 200:
parser = self.SimpleSGMLParser(results, self.url) return
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<table class="tl"><thead>.*') list_searches = []
for match in results_re.finditer(dat): parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
res_tab = match.group(0) parser.feed(response.read().decode('utf-8'))
parser.feed(res_tab)
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

6
src/searchengine/nova3/engines/legittorrents.py

@ -1,4 +1,4 @@
#VERSION: 1.03 #VERSION: 1.04
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents' name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info): def download_torrent(self, info):
print(download_file(info)) print(download_file(info))

200
src/searchengine/nova3/engines/mininova.py

@ -1,4 +1,4 @@
#VERSION: 1.51 #VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com) #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,124 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from html.parser import HTMLParser
from http.client import HTTPConnection as http
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib3
import re
class mininova(object): class mininova(object):
# Mandatory properties """ Search engine class """
url = 'http://www.mininova.org' url = 'http://www.mininova.org'
name = 'Mininova' name = 'Mininova'
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'} supported_categories = {'all' : '0',
'movies' : '4',
def __init__(self): 'tv' : '8',
self.results = [] 'music' : '5',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'games' : '3',
'anime' : '1',
def download_torrent(self, info): 'software' : '7',
print(download_file(info)) 'pictures' : '6',
'books' : '2'}
class SimpleSGMLParser(sgmllib3.SGMLParser):
def __init__(self, results, url, *args): def download_torrent(self, info):
sgmllib3.SGMLParser.__init__(self) print(download_file(info))
self.url = url
self.td_counter = None class MyHtmlParseWithBlackJack(HTMLParser):
self.current_item = None """ Parser class """
self.results = results def __init__(self, list_searches, url):
HTMLParser.__init__(self)
def start_a(self, attr): self.list_searches = list_searches
params = dict(attr) self.url = url
#print params self.table_results = False
if 'href' in params: self.current_item = None
if params['href'].startswith("/get/"): self.cur_item_name = None
self.current_item = {} self.next_queries = True
self.td_counter = 0
self.current_item['link']=self.url+params['href'].strip() def handle_starttag_tr(self, _):
elif params['href'].startswith("/tor/") and self.current_item is not None: """ Handler of tr start tag """
self.current_item['desc_link']=self.url+params['href'].strip() self.current_item = dict()
def handle_data(self, data): def handle_starttag_a(self, attrs):
if self.td_counter == 0: """ Handler of a start tag """
if 'name' not in self.current_item: params = dict(attrs)
self.current_item['name'] = '' link = params["href"]
self.current_item['name']+= data
elif self.td_counter == 1: if link.startswith("/get/"):
if 'size' not in self.current_item: #download link
self.current_item['size'] = '' self.current_item["link"] = "".join((self.url, link))
self.current_item['size']+= data.strip() elif link.startswith("/tor/"):
elif self.td_counter == 2: #description
if 'seeds' not in self.current_item: self.current_item["desc_link"] = "".join((self.url, link))
self.current_item['seeds'] = '' self.cur_item_name = "name"
self.current_item['seeds']+= data.strip() self.current_item["name"] = ""
elif self.td_counter == 3: elif self.next_queries and link.startswith("/search"):
if 'leech' not in self.current_item: if params["title"].startswith("Page"):
self.current_item['leech'] = '' self.list_searches.append(link)
self.current_item['leech']+= data.strip()
def handle_starttag_td(self, attrs):
def start_td(self,attr): """ Handler of td start tag """
if isinstance(self.td_counter,int): if ("align", "right") in attrs:
self.td_counter += 1 if not "size" in self.current_item.keys():
if self.td_counter > 4: self.cur_item_name = "size"
self.td_counter = None self.current_item["size"] = ""
# Display item
if self.current_item: def handle_starttag_span(self, attrs):
self.current_item['engine_url'] = self.url """ Handler of span start tag """
if not self.current_item['seeds'].isdigit(): if ("class", "g") in attrs:
self.current_item['seeds'] = 0 self.cur_item_name = "seeds"
if not self.current_item['leech'].isdigit(): self.current_item["seeds"] = ""
self.current_item['leech'] = 0 elif ("class", "b") in attrs:
prettyPrinter(self.current_item) self.cur_item_name = "leech"
self.results.append('a') self.current_item["leech"] = ""
def search(self, what, cat='all'): def handle_starttag(self, tag, attrs):
ret = [] """ Parser's start tag handler """
i = 1 if self.table_results:
while True and i<11: dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
results = [] if dispatcher:
parser = self.SimpleSGMLParser(results, self.url) dispatcher(attrs)
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<h1>Search results for.*') elif tag == "table":
for match in results_re.finditer(dat): self.table_results = ("class", "maintable") in attrs
res_tab = match.group(0)
parser.feed(res_tab) def handle_endtag(self, tag):
""" Parser's end tag handler """
if tag == "tr" and self.current_item:
self.current_item["engine_url"] = self.url
prettyPrinter(self.current_item)
self.current_item = None
elif self.cur_item_name:
if tag == "a" or tag == "span":
self.cur_item_name = None
def handle_data(self, data):
""" Parser's data handler """
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
def search(self, what, cat="all"):
""" Performs search """
connection = http("www.mininova.org")
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
parser.next_queries = False
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

153
src/searchengine/nova3/engines/torrentreactor.py

@ -1,4 +1,4 @@
#VERSION: 1.33 #VERSION: 1.35
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net) #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com) # Bruno Barbieri (brunorex@gmail.com)
@ -28,91 +28,94 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
from urllib import error, parse from urllib import parse
from html.parser import HTMLParser from html.parser import HTMLParser
from http.client import HTTPConnection as http
import re import re
class torrentreactor(object): class torrentreactor(object):
url = 'http://www.torrentreactor.net' url = 'http://www.torrentreactor.net'
name = 'TorrentReactor.Net' name = 'TorrentReactor.Net'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def download_torrent(self, info): def download_torrent(self, info):
print(download_file(info)) print(download_file(info))
class SimpleHTMLParser(HTMLParser): class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args): def __init__(self, results, url, *args):
HTMLParser.__init__(self) HTMLParser.__init__(self)
self.td_counter = None self.td_counter = None
self.current_item = None self.current_item = None
self.results = results self.results = results
self.id = None self.id = None
self.url = url self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if tag in self.dispatcher: if tag in self.dispatcher:
self.dispatcher[tag](attrs) self.dispatcher[tag](attrs)
def start_a(self, attr): def start_a(self, attr):
params = dict(attr) params = dict(attr)
if re.match("/torrents/\d+.*", params['href']): if re.match("/torrents/\d+.*", params['href']):
self.current_item = {} self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip() self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']: elif 'torrentreactor.net/download.php' in params['href']:
self.td_counter = 0 self.td_counter = 0
self.current_item['link'] = params['href'].strip() self.current_item['link'] = params['href'].strip()
self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1]) self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
def handle_data(self, data): def handle_data(self, data):
if self.td_counter == 1: if self.td_counter == 1:
if 'size' not in self.current_item: if 'size' not in self.current_item:
self.current_item['size'] = '' self.current_item['size'] = ''
self.current_item['size']+= data.strip() self.current_item['size']+= data.strip()
elif self.td_counter == 2: elif self.td_counter == 2:
if 'seeds' not in self.current_item: if 'seeds' not in self.current_item:
self.current_item['seeds'] = '' self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip() self.current_item['seeds']+= data.strip()
elif self.td_counter == 3: elif self.td_counter == 3:
if 'leech' not in self.current_item: if 'leech' not in self.current_item:
self.current_item['leech'] = '' self.current_item['leech'] = ''
self.current_item['leech']+= data.strip() self.current_item['leech']+= data.strip()
def start_td(self,attr): def start_td(self,attr):
if isinstance(self.td_counter,int): if isinstance(self.td_counter,int):
self.td_counter += 1 self.td_counter += 1
if self.td_counter > 3: if self.td_counter > 3:
self.td_counter = None self.td_counter = None
# add item to results # add item to results
if self.current_item: if self.current_item:
self.current_item['engine_url'] = self.url self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit(): if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0 self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit(): if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0 self.current_item['leech'] = 0
prettyPrinter(self.current_item) prettyPrinter(self.current_item)
self.has_results = True self.has_results = True
self.results.append('a') self.results.append('a')
def __init__(self): def search(self, what, cat='all'):
self.results = [] i = 0
self.parser = self.SimpleHTMLParser(self.results, self.url) dat = ''
connection = http("www.torrentreactor.net")
def search(self, what, cat='all'): while True and i<11:
i = 0 results = []
dat = '' parser = self.SimpleHTMLParser(results, self.url)
while True and i<11: query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
results = [] connection.request("GET", query)
parser = self.SimpleHTMLParser(results, self.url) response = connection.getresponse()
if response.status != 200:
break
try: dat = response.read().decode('utf-8')
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
except error.HTTPError:
break
parser.feed(dat) parser.feed(dat)
parser.close() parser.close()
if len(results) <= 0: if len(results) <= 0:
break break
i += 1 i += 1
connection.close()

9
src/searchengine/nova3/engines/versions.txt

@ -1,8 +1,9 @@
torrentreactor: 1.33
mininova: 1.51
piratebay: 2.11
extratorrent: 1.2 extratorrent: 1.2
torrentreactor: 1.35
mininova: 2.00
piratebay: 2.11
extratorrent: 2.0
kickasstorrents: 1.26 kickasstorrents: 1.26
btdigg: 1.23 btdigg: 1.23
legittorrents: 1.03
torrentz: 2.13 torrentz: 2.13
legittorrents: 1.04

Loading…
Cancel
Save