Browse Source

Merge pull request #2550 from DoumanAsh/multiprocessor_search

[search engine] Replace threading with multiprocessing
adaptive-webui-19844
sledgehammer999 10 years ago
parent
commit
3d40834c57
  1. 197
      src/searchengine/nova/engines/extratorrent.py
  2. 6
      src/searchengine/nova/engines/legittorrents.py
  3. 181
      src/searchengine/nova/engines/mininova.py
  4. BIN
      src/searchengine/nova/engines/torrentreactor.png
  5. 36
      src/searchengine/nova/engines/torrentreactor.py
  6. 4
      src/searchengine/nova/engines/torrentz.py
  7. 11
      src/searchengine/nova/engines/versions.txt
  8. 188
      src/searchengine/nova/nova2.py
  9. 12
      src/searchengine/nova/nova2dl.py
  10. 15
      src/searchengine/nova/novaprinter.py
  11. 195
      src/searchengine/nova3/engines/extratorrent.py
  12. 6
      src/searchengine/nova3/engines/legittorrents.py
  13. 179
      src/searchengine/nova3/engines/mininova.py
  14. BIN
      src/searchengine/nova3/engines/torrentreactor.png
  15. 27
      src/searchengine/nova3/engines/torrentreactor.py
  16. 2
      src/searchengine/nova3/engines/torrentz.py
  17. 11
      src/searchengine/nova3/engines/versions.txt
  18. 180
      src/searchengine/nova3/nova2.py
  19. 2
      src/searchengine/nova3/nova2dl.py
  20. 6
      src/searchengine/nova3/novaprinter.py

197
src/searchengine/nova/engines/extratorrent.py

@ -1,4 +1,4 @@
#VERSION: 1.2 #VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,139 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
#qBt
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib
import re
class extratorrent(object): class extratorrent(object):
""" Search engine class """
url = 'http://extratorrent.cc' url = 'http://extratorrent.cc'
name = 'extratorrent' name = 'ExtraTorrent'
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'} supported_categories = {'all' : '0',
'movies' : '4',
def __init__(self): 'tv' : '8',
self.results = [] 'music' : '5',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'games' : '3',
'anime' : '1',
'software' : '7',
'books' : '2',
'pictures' : '6'}
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) """ Downloader """
print(download_file(info))
class SimpleSGMLParser(sgmllib.SGMLParser): class MyHtmlParseWithBlackJack(HTMLParser):
def __init__(self, results, url, *args): """ Parser class """
sgmllib.SGMLParser.__init__(self) def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.url = url self.url = url
self.td_counter = None self.list_searches = list_searches
self.current_item = None self.current_item = None
self.start_name = False self.cur_item_name = None
self.results = results self.pending_size = False
self.next_queries = True
def start_a(self, attr): self.pending_next_queries = False
params = dict(attr) self.next_queries_set = set()
#print params
if params.has_key('href') and params['href'].startswith("/torrent_download/"): def handle_starttag(self, tag, attrs):
self.current_item = {} if self.current_item:
self.td_counter = 0 if tag == "a":
self.start_name = False params = dict(attrs)
torrent_id = '/'.join(params['href'].split('/')[2:]) link = params['href']
self.current_item['link']=self.url+'/download/'+torrent_id
elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"): if not link.startswith("/torrent"):
self.current_item['desc_link'] = self.url + params['href'].strip() return
self.start_name = True
if link[8] == "/":
#description
self.current_item["desc_link"] = "".join((self.url, link))
#remove view at the beginning
self.current_item["name"] = params["title"][5:].replace("&", "&")
self.pending_size = True
elif link[8] == "_":
#download link
link = link.replace("torrent_", "", 1)
self.current_item["link"] = "".join((self.url, link))
elif tag == "td":
if self.pending_size:
self.cur_item_name = "size"
self.current_item["size"] = ""
self.pending_size = False
for attr in attrs:
if attr[0] == "class":
if attr[1][0] == "s":
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif attr[1][0] == "l":
self.cur_item_name = "leech"
self.current_item["leech"] = ""
break
elif tag == "tr":
for attr in attrs:
if attr[0] == "class" and attr[1].startswith("tl"):
self.current_item = dict()
self.current_item["engine_url"] = self.url
break
elif self.pending_next_queries:
if tag == "a":
params = dict(attrs)
if params["title"] in self.next_queries_set:
return
self.list_searches.append(params['href'])
self.next_queries_set.add(params["title"])
if params["title"] == "10":
self.pending_next_queries = False
else:
self.pending_next_queries = False
elif self.next_queries:
if tag == "b" and ("class", "pager_no_link") in attrs:
self.next_queries = False
self.pending_next_queries = True
def handle_data(self, data): def handle_data(self, data):
if self.td_counter == 2: if self.cur_item_name:
if not self.current_item.has_key('name') and self.start_name: temp = self.current_item[self.cur_item_name]
self.current_item['name'] = data.strip() self.current_item[self.cur_item_name] = " ".join((temp, data))
elif self.td_counter == 3: #Due to utf-8 we need to handle data two times if there is space
if not self.current_item.has_key('size'): if not self.cur_item_name == "size":
self.current_item['size'] = '' self.cur_item_name = None
self.current_item['size']+= data.replace(" ", " ").strip()
elif self.td_counter == 4: def handle_endtag(self, tag):
if not self.current_item.has_key('seeds'):
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 5:
if not self.current_item.has_key('leech'):
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 5:
self.td_counter = None
# Display item
if self.current_item: if self.current_item:
self.current_item['engine_url'] = self.url if tag == "tr":
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item) prettyPrinter(self.current_item)
self.results.append('a') self.current_item = None
def search(self, what, cat='all'): def search(self, what, cat="all"):
ret = [] """ Performs search """
i = 1 connection = http("extratorrent.cc")
while True and i<11:
results = [] query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i)) connection.request("GET", query)
results_re = re.compile('(?s)<table class="tl"><thead>.*') response = connection.getresponse()
for match in results_re.finditer(dat): if response.status != 200:
res_tab = match.group(0) return
parser.feed(res_tab)
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
connection.close()
return

6
src/searchengine/nova/engines/legittorrents.py

@ -1,4 +1,4 @@
#VERSION: 1.02 #VERSION: 1.03
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents' name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) print download_file(info)

181
src/searchengine/nova/engines/mininova.py

@ -1,4 +1,4 @@
#VERSION: 1.51 #VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com) #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,123 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib
import re
class mininova(object): class mininova(object):
# Mandatory properties """ Search engine class """
url = 'http://www.mininova.org' url = 'http://www.mininova.org'
name = 'Mininova' name = 'Mininova'
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'} supported_categories = {'all' : '0',
'movies' : '4',
def __init__(self): 'tv' : '8',
self.results = [] 'music' : '5',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'games' : '3',
'anime' : '1',
'software' : '7',
'pictures' : '6',
'books' : '2'}
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) print(download_file(info))
class SimpleSGMLParser(sgmllib.SGMLParser): class MyHtmlParseWithBlackJack(HTMLParser):
def __init__(self, results, url, *args): """ Parser class """
sgmllib.SGMLParser.__init__(self) def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.list_searches = list_searches
self.url = url self.url = url
self.td_counter = None self.table_results = False
self.current_item = None self.current_item = None
self.results = results self.cur_item_name = None
self.next_queries = True
def start_a(self, attr):
params = dict(attr)
#print params
if params.has_key('href'):
if params['href'].startswith("/get/"):
self.current_item = {}
self.td_counter = 0
self.current_item['link']=self.url+params['href'].strip()
elif params['href'].startswith("/tor/") and self.current_item is not None:
self.current_item['desc_link']=self.url+params['href'].strip()
def handle_data(self, data): def handle_starttag_tr(self, _):
if self.td_counter == 0: """ Handler of tr start tag """
if not self.current_item.has_key('name'): self.current_item = dict()
self.current_item['name'] = ''
self.current_item['name']+= data def handle_starttag_a(self, attrs):
elif self.td_counter == 1: """ Handler of a start tag """
if not self.current_item.has_key('size'): params = dict(attrs)
self.current_item['size'] = '' link = params["href"]
self.current_item['size']+= data.strip()
elif self.td_counter == 2: if link.startswith("/tor/"):
if not self.current_item.has_key('seeds'): #description
self.current_item['seeds'] = '' self.current_item["desc_link"] = "".join((self.url, link))
self.current_item['seeds']+= data.strip() #get download link from description by id
elif self.td_counter == 3: self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
if not self.current_item.has_key('leech'): self.cur_item_name = "name"
self.current_item['leech'] = '' self.current_item["name"] = ""
self.current_item['leech']+= data.strip() elif self.next_queries and link.startswith("/search"):
if params["title"].startswith("Page"):
def start_td(self,attr): self.list_searches.append(link)
if isinstance(self.td_counter,int):
self.td_counter += 1 def handle_starttag_td(self, attrs):
if self.td_counter > 4: """ Handler of td start tag """
self.td_counter = None if ("align", "right") in attrs:
# Display item if not "size" in self.current_item:
if self.current_item: self.cur_item_name = "size"
self.current_item['engine_url'] = self.url self.current_item["size"] = ""
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0 def handle_starttag_span(self, attrs):
if not self.current_item['leech'].isdigit(): """ Handler of span start tag """
self.current_item['leech'] = 0 if ("class", "g") in attrs:
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif ("class", "b") in attrs:
self.cur_item_name = "leech"
self.current_item["leech"] = ""
def handle_starttag(self, tag, attrs):
""" Parser's start tag handler """
if self.table_results:
dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
if dispatcher:
dispatcher(attrs)
elif tag == "table":
self.table_results = ("class", "maintable") in attrs
def handle_endtag(self, tag):
""" Parser's end tag handler """
if tag == "tr" and self.current_item:
self.current_item["engine_url"] = self.url
prettyPrinter(self.current_item) prettyPrinter(self.current_item)
self.results.append('a') self.current_item = None
elif self.cur_item_name:
def search(self, what, cat='all'): if tag == "a" or tag == "td":
ret = [] self.cur_item_name = None
i = 1
while True and i<11: def handle_data(self, data):
results = [] """ Parser's data handler """
parser = self.SimpleSGMLParser(results, self.url) if self.cur_item_name:
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i)) temp = self.current_item[self.cur_item_name]
results_re = re.compile('(?s)<h1>Search results for.*') self.current_item[self.cur_item_name] = " ".join((temp, data))
for match in results_re.finditer(dat):
res_tab = match.group(0) def search(self, what, cat="all"):
parser.feed(res_tab) """ Performs search """
connection = http("www.mininova.org")
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
parser.next_queries = False
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
connection.close()
return

BIN
src/searchengine/nova/engines/torrentreactor.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 252 B

After

Width:  |  Height:  |  Size: 951 B

36
src/searchengine/nova/engines/torrentreactor.py

@ -1,4 +1,4 @@
#VERSION: 1.33 #VERSION: 1.36
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net) #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com) # Bruno Barbieri (brunorex@gmail.com)
@ -28,19 +28,18 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file, retrieve_url
from urllib2 import HTTPError
from HTMLParser import HTMLParser
import urllib import urllib
import re from HTMLParser import HTMLParser
from re import compile as re_compile
class torrentreactor(object): class torrentreactor(object):
url = 'http://www.torrentreactor.net' url = 'http://www.torrentreactor.net'
name = 'TorrentReactor.Net' name = 'TorrentReactor'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def download_torrent(self, info): def download_torrent(self, info):
print download_file(info) print(download_file(info))
class SimpleHTMLParser(HTMLParser): class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args): def __init__(self, results, url, *args):
@ -50,6 +49,7 @@ class torrentreactor(object):
self.results = results self.results = results
self.id = None self.id = None
self.url = url self.url = url
self.torrents_matcher = re_compile("/torrents/\d+.*")
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
@ -58,7 +58,7 @@ class torrentreactor(object):
def start_a(self, attr): def start_a(self, attr):
params = dict(attr) params = dict(attr)
if re.match("/torrents/\d+.*", params['href']): if self.torrents_matcher.match(params['href']):
self.current_item = {} self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip() self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']: elif 'torrentreactor.net/download.php' in params['href']:
@ -68,15 +68,15 @@ class torrentreactor(object):
def handle_data(self, data): def handle_data(self, data):
if self.td_counter == 1: if self.td_counter == 1:
if not self.current_item.has_key('size'): if 'size' not in self.current_item:
self.current_item['size'] = '' self.current_item['size'] = ''
self.current_item['size']+= data.strip() self.current_item['size']+= data.strip()
elif self.td_counter == 2: elif self.td_counter == 2:
if not self.current_item.has_key('seeds'): if 'seeds' not in self.current_item:
self.current_item['seeds'] = '' self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip() self.current_item['seeds']+= data.strip()
elif self.td_counter == 3: elif self.td_counter == 3:
if not self.current_item.has_key('leech'): if 'leech' not in self.current_item:
self.current_item['leech'] = '' self.current_item['leech'] = ''
self.current_item['leech']+= data.strip() self.current_item['leech']+= data.strip()
@ -96,22 +96,14 @@ class torrentreactor(object):
self.has_results = True self.has_results = True
self.results.append('a') self.results.append('a')
def __init__(self):
self.results = []
self.parser = self.SimpleHTMLParser(self.results, self.url)
def search(self, what, cat='all'): def search(self, what, cat='all'):
i = 0 i = 0
dat = '' dat = ''
while True and i<11:
while i < 11:
results = [] results = []
parser = self.SimpleHTMLParser(results, self.url) parser = self.SimpleHTMLParser(results, self.url)
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
try:
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
except HTTPError:
break
parser.feed(dat) parser.feed(dat)
parser.close() parser.close()
if len(results) <= 0: if len(results) <= 0:

4
src/searchengine/nova/engines/torrentz.py

@ -1,4 +1,4 @@
#VERSION: 2.13 #VERSION: 2.14
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com) #AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -105,7 +105,7 @@ class torrentz(object):
while i < 6: while i < 6:
results_list = [] results_list = []
# "what" is already urlencoded # "what" is already urlencoded
html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i)) html = retrieve_url('%s/any?f=%s&p=%d' % (self.url, what, i))
parser = self.MyHtmlParser(results_list, self.url, trackers) parser = self.MyHtmlParser(results_list, self.url, trackers)
parser.feed(html) parser.feed(html)
parser.close() parser.close()

11
src/searchengine/nova/engines/versions.txt

@ -1,8 +1,9 @@
torrentreactor: 1.33
mininova: 1.51
piratebay: 2.11
extratorrent: 1.2 extratorrent: 1.2
torrentreactor: 1.36
mininova: 2.00
piratebay: 2.11
extratorrent: 2.0
kickasstorrents: 1.26 kickasstorrents: 1.26
btdigg: 1.24 btdigg: 1.24
legittorrents: 1.02 torrentz: 2.14
torrentz: 2.13 legittorrents: 1.03

188
src/searchengine/nova/nova2.py

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.32 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,16 +37,15 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib import urllib
from os import path
import fix_encoding from glob import glob
from sys import argv
from multiprocessing import Pool, cpu_count
from fix_encoding import fix_encoding
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -56,34 +55,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
def initialize_engines():
""" Import available engines
Return list of available engines
"""
supported_engines = [] supported_engines = []
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
for engine in engines: for engine in engines:
e = engine.split(os.sep)[-1][:-3] engi = path.basename(engine).split('.')[0].strip()
if len(e.strip()) == 0: continue if len(engi) == 0 or engi.startswith('_'):
if e.startswith('_'): continue continue
try: try:
exec "from engines.%s import %s"%(e,e) #import engines.[engine]
supported_engines.append(e) engine_module = __import__(".".join(("engines", engi)))
#get low-level module
engine_module = getattr(engine_module, engi)
#bind class name
globals()[engi] = getattr(engine_module, engi)
supported_engines.append(engi)
except: except:
pass pass
def engineToXml(short_name): return supported_engines
xml = "<%s>\n"%short_name
exec "engine = %s()"%short_name def engines_to_xml(supported_engines):
xml += "<name>%s</name>\n"%engine.name """ Generates xml for supported engines """
xml += "<url>%s</url>\n"%engine.url tab = " " * 4
xml += "<categories>"
if hasattr(engine, 'supported_categories'): for short_name in supported_engines:
supported_categories = engine.supported_categories.keys() search_engine = globals()[short_name]()
supported_categories.remove('all')
xml += " ".join(supported_categories) supported_categories = ""
xml += "</categories>\n" if hasattr(search_engine, "supported_categories"):
xml += "</%s>\n"%short_name supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
return xml if key is not "all"))
def displayCapabilities(): yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
def displayCapabilities(supported_engines):
""" """
Display capabilities in XML format Display capabilities in XML format
<capabilities> <capabilities>
@ -94,70 +109,75 @@ def displayCapabilities():
</engine_short_name> </engine_short_name>
</capabilities> </capabilities>
""" """
xml = "<capabilities>" xml = "".join(("<capabilities>\n",
for short_name in supported_engines: "".join(engines_to_xml(supported_engines)),
xml += engineToXml(short_name) "</capabilities>"))
xml += "</capabilities>" print(xml)
print xml
def run_search(engine_list):
class EngineLauncher(threading.Thread): """ Run search in engine
def __init__(self, engine, what, cat='all'):
threading.Thread.__init__(self) @param engine_list List with engine, query and category
self.engine = engine
self.what = what @retval False if any exceptions occured
self.cat = cat @retval True otherwise
def run(self): """
if hasattr(self.engine, 'supported_categories'): engine, what, cat = engine_list
if self.cat == 'all' or self.cat in self.engine.supported_categories.keys(): try:
self.engine.search(self.what, self.cat) engine = engine()
elif self.cat == 'all': #avoid exceptions due to invalid category
self.engine.search(self.what) if hasattr(engine, 'supported_categories'):
cat = cat if cat in engine.supported_categories else "all"
if __name__ == '__main__': engine.search(what, cat)
# Make sure we enforce utf-8 encoding
fix_encoding.fix_encoding()
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2:
if sys.argv[1] == "--capabilities":
displayCapabilities()
sys.exit(0)
else: else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% engine.search(what)
(','.join(supported_engines))) return True
except:
return False
def main(args):
fix_encoding()
supported_engines = initialize_engines()
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
#get only unique engines with set
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list: if 'all' in engines_list:
engines_list = supported_engines engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
cat = sys.argv[2].lower() if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES: if cat not in CATEGORIES:
raise SystemExit('Invalid category!') raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.quote(' '.join(sys.argv[3:])) what = urllib.quote(' '.join(args[2:]))
threads = []
for engine in engines_list:
try:
if THREADED: if THREADED:
exec "l = EngineLauncher(%s(), what, cat)"%engine #child process spawning is controlled min(number of searches, number of cpu)
threads.append(l) pool = Pool(min(len(engines_list), cpu_count()))
l.start() pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else: else:
exec "e = %s()"%engine map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
if hasattr(engine, 'supported_categories'):
if cat == 'all' or cat in e.supported_categories.keys(): if __name__ == "__main__":
e.search(what, cat) main(argv[1:])
elif self.cat == 'all':
e.search(what)
engine().search(what, cat)
except:
pass
if THREADED:
for t in threads:
t.join()

12
src/searchengine/nova/nova2dl.py

@ -25,7 +25,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.10 #VERSION: 1.20
# Author: # Author:
# Christophe DUMEZ (chris@qbittorrent.org) # Christophe DUMEZ (chris@qbittorrent.org)
@ -43,8 +43,8 @@ for engine in engines:
if len(e.strip()) == 0: continue if len(e.strip()) == 0: continue
if e.startswith('_'): continue if e.startswith('_'): continue
try: try:
exec "from engines.%s import %s"%(e,e) exec("from engines.%s import %s"%(e,e))
exec "engine_url = %s.url"%e exec("engine_url = %s.url"%e)
supported_engines[engine_url] = e supported_engines[engine_url] = e
except: except:
pass pass
@ -54,11 +54,11 @@ if __name__ == '__main__':
raise SystemExit('./nova2dl.py engine_url download_parameter') raise SystemExit('./nova2dl.py engine_url download_parameter')
engine_url = sys.argv[1].strip() engine_url = sys.argv[1].strip()
download_param = sys.argv[2].strip() download_param = sys.argv[2].strip()
if engine_url not in supported_engines.keys(): if engine_url not in list(supported_engines.keys()):
raise SystemExit('./nova2dl.py: this engine_url was not recognized') raise SystemExit('./nova2dl.py: this engine_url was not recognized')
exec "engine = %s()"%supported_engines[engine_url] exec("engine = %s()"%supported_engines[engine_url])
if hasattr(engine, 'download_torrent'): if hasattr(engine, 'download_torrent'):
engine.download_torrent(download_param) engine.download_torrent(download_param)
else: else:
print download_file(download_param) print(download_file(download_param))
sys.exit(0) sys.exit(0)

15
src/searchengine/nova/novaprinter.py

@ -25,20 +25,19 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
import sys, codecs import sys, codecs
from io import open
# Force UTF-8 printing # Force UTF-8 printing
sys.stdout = codecs.getwriter('utf-8')(sys.stdout) sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
def prettyPrinter(dictionary): def prettyPrinter(dictionary):
# Convert everything to unicode for safe printing
for key,value in dictionary.items():
if isinstance(dictionary[key], str):
dictionary[key] = unicode(dictionary[key], 'utf-8')
dictionary['size'] = anySizeToBytes(dictionary['size']) dictionary['size'] = anySizeToBytes(dictionary['size'])
if dictionary.has_key('desc_link'): outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
print u"%s|%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link']) if 'desc_link' in dictionary:
else: outtext = "|".join((outtext, dictionary["desc_link"]))
print u"%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout:
utf8_stdout.write(unicode("".join((outtext, "\n"))))
def anySizeToBytes(size_string): def anySizeToBytes(size_string):
""" """

195
src/searchengine/nova3/engines/extratorrent.py

@ -1,4 +1,4 @@
#VERSION: 1.2 #VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,139 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from html.parser import HTMLParser
from http.client import HTTPConnection as http
#qBt
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib3
import re
class extratorrent(object): class extratorrent(object):
""" Search engine class """
url = 'http://extratorrent.cc' url = 'http://extratorrent.cc'
name = 'extratorrent' name = 'ExtraTorrent'
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'} supported_categories = {'all' : '0',
'movies' : '4',
def __init__(self): 'tv' : '8',
self.results = [] 'music' : '5',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'games' : '3',
'anime' : '1',
'software' : '7',
'books' : '2',
'pictures' : '6'}
def download_torrent(self, info): def download_torrent(self, info):
""" Downloader """
print(download_file(info)) print(download_file(info))
class SimpleSGMLParser(sgmllib3.SGMLParser): class MyHtmlParseWithBlackJack(HTMLParser):
def __init__(self, results, url, *args): """ Parser class """
sgmllib3.SGMLParser.__init__(self) def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.url = url self.url = url
self.td_counter = None self.list_searches = list_searches
self.current_item = None self.current_item = None
self.start_name = False self.cur_item_name = None
self.results = results self.pending_size = False
self.next_queries = True
def start_a(self, attr): self.pending_next_queries = False
params = dict(attr) self.next_queries_set = set()
#print params
if 'href' in params and params['href'].startswith("/torrent_download/"): def handle_starttag(self, tag, attrs):
self.current_item = {} if self.current_item:
self.td_counter = 0 if tag == "a":
self.start_name = False params = dict(attrs)
torrent_id = '/'.join(params['href'].split('/')[2:]) link = params['href']
self.current_item['link']=self.url+'/download/'+torrent_id
elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"): if not link.startswith("/torrent"):
self.current_item['desc_link'] = self.url + params['href'].strip() return
self.start_name = True
if link[8] == "/":
#description
self.current_item["desc_link"] = "".join((self.url, link))
#remove view at the beginning
self.current_item["name"] = params["title"][5:].replace("&amp;", "&")
self.pending_size = True
elif link[8] == "_":
#download link
link = link.replace("torrent_", "", 1)
self.current_item["link"] = "".join((self.url, link))
elif tag == "td":
if self.pending_size:
self.cur_item_name = "size"
self.current_item["size"] = ""
self.pending_size = False
for attr in attrs:
if attr[0] == "class":
if attr[1][0] == "s":
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif attr[1][0] == "l":
self.cur_item_name = "leech"
self.current_item["leech"] = ""
break
elif tag == "tr":
for attr in attrs:
if attr[0] == "class" and attr[1].startswith("tl"):
self.current_item = dict()
self.current_item["engine_url"] = self.url
break
elif self.pending_next_queries:
if tag == "a":
params = dict(attrs)
if params["title"] in self.next_queries_set:
return
self.list_searches.append(params['href'])
self.next_queries_set.add(params["title"])
if params["title"] == "10":
self.pending_next_queries = False
else:
self.pending_next_queries = False
elif self.next_queries:
if tag == "b" and ("class", "pager_no_link") in attrs:
self.next_queries = False
self.pending_next_queries = True
def handle_data(self, data): def handle_data(self, data):
if self.td_counter == 2: if self.cur_item_name:
if 'name' not in self.current_item and self.start_name: temp = self.current_item[self.cur_item_name]
self.current_item['name'] = data.strip() self.current_item[self.cur_item_name] = " ".join((temp, data))
elif self.td_counter == 3: #Due to utf-8 we need to handle data two times if there is space
if 'size' not in self.current_item: if not self.cur_item_name == "size":
self.current_item['size'] = '' self.cur_item_name = None
self.current_item['size']+= data.replace("&nbsp;", " ").strip()
elif self.td_counter == 4: def handle_endtag(self, tag):
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 5:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 5:
self.td_counter = None
# Display item
if self.current_item: if self.current_item:
self.current_item['engine_url'] = self.url if tag == "tr":
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item) prettyPrinter(self.current_item)
self.results.append('a') self.current_item = None
def search(self, what, cat='all'): def search(self, what, cat="all"):
ret = [] """ Performs search """
i = 1 connection = http("extratorrent.cc")
while True and i<11:
results = [] query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i)) connection.request("GET", query)
results_re = re.compile('(?s)<table class="tl"><thead>.*') response = connection.getresponse()
for match in results_re.finditer(dat): if response.status != 200:
res_tab = match.group(0) return
parser.feed(res_tab)
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
connection.close()
return

6
src/searchengine/nova3/engines/legittorrents.py

@ -1,4 +1,4 @@
#VERSION: 1.03 #VERSION: 1.04
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents' name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info): def download_torrent(self, info):
print(download_file(info)) print(download_file(info))

179
src/searchengine/nova3/engines/mininova.py

@ -1,4 +1,4 @@
#VERSION: 1.51 #VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org) #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com) #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,123 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from html.parser import HTMLParser
from http.client import HTTPConnection as http
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file
import sgmllib3
import re
class mininova(object): class mininova(object):
# Mandatory properties """ Search engine class """
url = 'http://www.mininova.org' url = 'http://www.mininova.org'
name = 'Mininova' name = 'Mininova'
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'} supported_categories = {'all' : '0',
'movies' : '4',
def __init__(self): 'tv' : '8',
self.results = [] 'music' : '5',
self.parser = self.SimpleSGMLParser(self.results, self.url) 'games' : '3',
'anime' : '1',
'software' : '7',
'pictures' : '6',
'books' : '2'}
def download_torrent(self, info): def download_torrent(self, info):
print(download_file(info)) print(download_file(info))
class SimpleSGMLParser(sgmllib3.SGMLParser): class MyHtmlParseWithBlackJack(HTMLParser):
def __init__(self, results, url, *args): """ Parser class """
sgmllib3.SGMLParser.__init__(self) def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.list_searches = list_searches
self.url = url self.url = url
self.td_counter = None self.table_results = False
self.current_item = None self.current_item = None
self.results = results self.cur_item_name = None
self.next_queries = True
def start_a(self, attr):
params = dict(attr)
#print params
if 'href' in params:
if params['href'].startswith("/get/"):
self.current_item = {}
self.td_counter = 0
self.current_item['link']=self.url+params['href'].strip()
elif params['href'].startswith("/tor/") and self.current_item is not None:
self.current_item['desc_link']=self.url+params['href'].strip()
def handle_data(self, data): def handle_starttag_tr(self, _):
if self.td_counter == 0: """ Handler of tr start tag """
if 'name' not in self.current_item: self.current_item = dict()
self.current_item['name'] = ''
self.current_item['name']+= data def handle_starttag_a(self, attrs):
elif self.td_counter == 1: """ Handler of a start tag """
if 'size' not in self.current_item: params = dict(attrs)
self.current_item['size'] = '' link = params["href"]
self.current_item['size']+= data.strip()
elif self.td_counter == 2: if link.startswith("/tor/"):
if 'seeds' not in self.current_item: #description
self.current_item['seeds'] = '' self.current_item["desc_link"] = "".join((self.url, link))
self.current_item['seeds']+= data.strip() #get download link from description by id
elif self.td_counter == 3: self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
if 'leech' not in self.current_item: self.cur_item_name = "name"
self.current_item['leech'] = '' self.current_item["name"] = ""
self.current_item['leech']+= data.strip() elif self.next_queries and link.startswith("/search"):
if params["title"].startswith("Page"):
def start_td(self,attr): self.list_searches.append(link)
if isinstance(self.td_counter,int):
self.td_counter += 1 def handle_starttag_td(self, attrs):
if self.td_counter > 4: """ Handler of td start tag """
self.td_counter = None if ("align", "right") in attrs:
# Display item if not "size" in self.current_item:
if self.current_item: self.cur_item_name = "size"
self.current_item['engine_url'] = self.url self.current_item["size"] = ""
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0 def handle_starttag_span(self, attrs):
if not self.current_item['leech'].isdigit(): """ Handler of span start tag """
self.current_item['leech'] = 0 if ("class", "g") in attrs:
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif ("class", "b") in attrs:
self.cur_item_name = "leech"
self.current_item["leech"] = ""
def handle_starttag(self, tag, attrs):
""" Parser's start tag handler """
if self.table_results:
dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
if dispatcher:
dispatcher(attrs)
elif tag == "table":
self.table_results = ("class", "maintable") in attrs
def handle_endtag(self, tag):
""" Parser's end tag handler """
if tag == "tr" and self.current_item:
self.current_item["engine_url"] = self.url
prettyPrinter(self.current_item) prettyPrinter(self.current_item)
self.results.append('a') self.current_item = None
elif self.cur_item_name:
def search(self, what, cat='all'): if tag == "a" or tag == "td":
ret = [] self.cur_item_name = None
i = 1
while True and i<11: def handle_data(self, data):
results = [] """ Parser's data handler """
parser = self.SimpleSGMLParser(results, self.url) if self.cur_item_name:
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i)) temp = self.current_item[self.cur_item_name]
results_re = re.compile('(?s)<h1>Search results for.*') self.current_item[self.cur_item_name] = " ".join((temp, data))
for match in results_re.finditer(dat):
res_tab = match.group(0) def search(self, what, cat="all"):
parser.feed(res_tab) """ Performs search """
connection = http("www.mininova.org")
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
parser.next_queries = False
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close() parser.close()
break
if len(results) <= 0:
break
i += 1
connection.close()
return

BIN
src/searchengine/nova3/engines/torrentreactor.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 252 B

After

Width:  |  Height:  |  Size: 951 B

27
src/searchengine/nova3/engines/torrentreactor.py

@ -1,4 +1,4 @@
#VERSION: 1.33 #VERSION: 1.36
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net) #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com) # Bruno Barbieri (brunorex@gmail.com)
@ -28,14 +28,14 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file from helpers import download_file, retrieve_url
from urllib import error, parse from urllib import parse
from html.parser import HTMLParser from html.parser import HTMLParser
import re from re import compile as re_compile
class torrentreactor(object): class torrentreactor(object):
url = 'http://www.torrentreactor.net' url = 'http://www.torrentreactor.net'
name = 'TorrentReactor.Net' name = 'TorrentReactor'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def download_torrent(self, info): def download_torrent(self, info):
@ -49,6 +49,7 @@ class torrentreactor(object):
self.results = results self.results = results
self.id = None self.id = None
self.url = url self.url = url
self.torrents_matcher = re_compile("/torrents/\d+.*")
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
@ -57,7 +58,7 @@ class torrentreactor(object):
def start_a(self, attr): def start_a(self, attr):
params = dict(attr) params = dict(attr)
if re.match("/torrents/\d+.*", params['href']): if self.torrents_matcher.match(params['href']):
self.current_item = {} self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip() self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']: elif 'torrentreactor.net/download.php' in params['href']:
@ -95,22 +96,14 @@ class torrentreactor(object):
self.has_results = True self.has_results = True
self.results.append('a') self.results.append('a')
def __init__(self):
self.results = []
self.parser = self.SimpleHTMLParser(self.results, self.url)
def search(self, what, cat='all'): def search(self, what, cat='all'):
i = 0 i = 0
dat = '' dat = ''
while True and i<11:
while i < 11:
results = [] results = []
parser = self.SimpleHTMLParser(results, self.url) parser = self.SimpleHTMLParser(results, self.url)
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
try:
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
except error.HTTPError:
break
parser.feed(dat) parser.feed(dat)
parser.close() parser.close()
if len(results) <= 0: if len(results) <= 0:

2
src/searchengine/nova3/engines/torrentz.py

@ -1,4 +1,4 @@
#VERSION: 2.13 #VERSION: 2.14
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com) #AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without

11
src/searchengine/nova3/engines/versions.txt

@ -1,8 +1,9 @@
torrentreactor: 1.33
mininova: 1.51
piratebay: 2.11
extratorrent: 1.2 extratorrent: 1.2
torrentreactor: 1.36
mininova: 2.00
piratebay: 2.11
extratorrent: 2.0
kickasstorrents: 1.26 kickasstorrents: 1.26
btdigg: 1.23 btdigg: 1.23
legittorrents: 1.03 torrentz: 2.14
torrentz: 2.13 legittorrents: 1.04

180
src/searchengine/nova3/nova2.py

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.24 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,14 +37,14 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib.parse import urllib.parse
from os import path, cpu_count
from glob import glob
from sys import argv
from multiprocessing import Pool
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -54,34 +54,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
def initialize_engines():
""" Import available engines
Return list of available engines
"""
supported_engines = [] supported_engines = []
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
for engine in engines: for engine in engines:
e = engine.split(os.sep)[-1][:-3] engi = path.basename(engine).split('.')[0].strip()
if len(e.strip()) == 0: continue if len(engi) == 0 or engi.startswith('_'):
if e.startswith('_'): continue continue
try: try:
exec("from engines.%s import %s"%(e,e)) #import engines.[engine]
supported_engines.append(e) engine_module = __import__(".".join(("engines", engi)))
#get low-level module
engine_module = getattr(engine_module, engi)
#bind class name
globals()[engi] = getattr(engine_module, engi)
supported_engines.append(engi)
except: except:
pass pass
def engineToXml(short_name): return supported_engines
xml = "<%s>\n"%short_name
exec("search_engine = %s()"%short_name, globals()) def engines_to_xml(supported_engines):
xml += "<name>%s</name>\n"%search_engine.name """ Generates xml for supported engines """
xml += "<url>%s</url>\n"%search_engine.url tab = " " * 4
xml += "<categories>"
if hasattr(search_engine, 'supported_categories'): for short_name in supported_engines:
supported_categories = list(search_engine.supported_categories.keys()) search_engine = globals()[short_name]()
supported_categories.remove('all')
xml += " ".join(supported_categories) supported_categories = ""
xml += "</categories>\n" if hasattr(search_engine, "supported_categories"):
xml += "</%s>\n"%short_name supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
return xml if key is not "all"))
def displayCapabilities(): yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
def displayCapabilities(supported_engines):
""" """
Display capabilities in XML format Display capabilities in XML format
<capabilities> <capabilities>
@ -92,67 +108,75 @@ def displayCapabilities():
</engine_short_name> </engine_short_name>
</capabilities> </capabilities>
""" """
xml = "<capabilities>" xml = "".join(("<capabilities>\n",
for short_name in supported_engines: "".join(engines_to_xml(supported_engines)),
xml += engineToXml(short_name) "</capabilities>"))
xml += "</capabilities>"
print(xml) print(xml)
class EngineLauncher(threading.Thread): def run_search(engine_list):
def __init__(self, engine, what, cat='all'): """ Run search in engine
threading.Thread.__init__(self)
self.engine = engine @param engine_list List with engine, query and category
self.what = what
self.cat = cat @retval False if any exceptions occured
def run(self): @retval True otherwise
if hasattr(self.engine, 'supported_categories'): """
if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()): engine, what, cat = engine_list
self.engine.search(self.what, self.cat) try:
elif self.cat == 'all': engine = engine()
self.engine.search(self.what) #avoid exceptions due to invalid category
if hasattr(engine, 'supported_categories'):
if __name__ == '__main__': cat = cat if cat in engine.supported_categories else "all"
if len(sys.argv) < 2: engine.search(what, cat)
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2:
if sys.argv[1] == "--capabilities":
displayCapabilities()
sys.exit(0)
else: else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% engine.search(what)
(','.join(supported_engines)))
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] return True
except:
return False
def main(args):
supported_engines = initialize_engines()
if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
#get only unique engines with set
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list: if 'all' in engines_list:
engines_list = supported_engines engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
cat = sys.argv[2].lower() if not engines_list:
#engine list is empty. Nothing to do here
return
if cat not in CATEGORIES: cat = args[1].lower()
raise SystemExit('Invalid category!')
what = urllib.parse.quote(' '.join(sys.argv[3:])) if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
threads = [] what = urllib.parse.quote(' '.join(args[2:]))
for engine in engines_list:
try:
if THREADED: if THREADED:
exec("l = EngineLauncher(%s(), what, cat)"%engine) #child process spawning is controlled min(number of searches, number of cpu)
threads.append(l) with Pool(min(len(engines_list), cpu_count())) as pool:
l.start() pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else: else:
exec("e = %s()"%engine) #py3 note: map is needed to be evaluated for content to be executed
if hasattr(engine, 'supported_categories'): all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list)))
if cat == 'all' or cat in list(e.supported_categories.keys()):
e.search(what, cat) if __name__ == "__main__":
elif self.cat == 'all': main(argv[1:])
e.search(what)
engine().search(what, cat)
except:
pass
if THREADED:
for t in threads:
t.join()

2
src/searchengine/nova3/nova2dl.py

@ -25,7 +25,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.10 #VERSION: 1.20
# Author: # Author:
# Christophe DUMEZ (chris@qbittorrent.org) # Christophe DUMEZ (chris@qbittorrent.org)

6
src/searchengine/nova3/novaprinter.py

@ -26,12 +26,10 @@
def prettyPrinter(dictionary): def prettyPrinter(dictionary):
outtext = ''
dictionary['size'] = anySizeToBytes(dictionary['size']) dictionary['size'] = anySizeToBytes(dictionary['size'])
outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
if 'desc_link' in dictionary: if 'desc_link' in dictionary:
outtext = '%s|%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link']) outtext = "|".join((outtext, dictionary["desc_link"]))
else:
outtext = '%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
# fd 1 is stdout # fd 1 is stdout
with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout: with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout:

Loading…
Cancel
Save