diff --git a/src/searchengine/nova/engines/extratorrent.py b/src/searchengine/nova/engines/extratorrent.py
index 2956406f4..19fce553c 100644
--- a/src/searchengine/nova/engines/extratorrent.py
+++ b/src/searchengine/nova/engines/extratorrent.py
@@ -1,4 +1,4 @@
-#VERSION: 1.2
+#VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@@ -25,92 +25,135 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
-
+from HTMLParser import HTMLParser
+from httplib import HTTPConnection as http
+#qBt
from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib
-import re
+from helpers import download_file
class extratorrent(object):
- url = 'http://extratorrent.cc'
- name = 'extratorrent'
- supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
-
- def __init__(self):
- self.results = []
- self.parser = self.SimpleSGMLParser(self.results, self.url)
-
- def download_torrent(self, info):
- print download_file(info)
-
- class SimpleSGMLParser(sgmllib.SGMLParser):
- def __init__(self, results, url, *args):
- sgmllib.SGMLParser.__init__(self)
- self.url = url
- self.td_counter = None
- self.current_item = None
- self.start_name = False
- self.results = results
-
- def start_a(self, attr):
- params = dict(attr)
- #print params
- if params.has_key('href') and params['href'].startswith("/torrent_download/"):
- self.current_item = {}
- self.td_counter = 0
- self.start_name = False
- torrent_id = '/'.join(params['href'].split('/')[2:])
- self.current_item['link']=self.url+'/download/'+torrent_id
- elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
- self.current_item['desc_link'] = self.url + params['href'].strip()
- self.start_name = True
-
- def handle_data(self, data):
- if self.td_counter == 2:
- if not self.current_item.has_key('name') and self.start_name:
- self.current_item['name'] = data.strip()
- elif self.td_counter == 3:
- if not self.current_item.has_key('size'):
- self.current_item['size'] = ''
- self.current_item['size']+= data.replace(" ", " ").strip()
- elif self.td_counter == 4:
- if not self.current_item.has_key('seeds'):
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 5:
- if not self.current_item.has_key('leech'):
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
-
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 5:
- self.td_counter = None
- # Display item
+ """ Search engine class """
+ url = 'http://extratorrent.cc'
+ name = 'ExtraTorrent'
+ supported_categories = {'all' : '0',
+ 'movies' : '4',
+ 'tv' : '8',
+ 'music' : '5',
+ 'games' : '3',
+ 'anime' : '1',
+ 'software' : '7',
+ 'books' : '2',
+ 'pictures' : '6'}
+
+ def download_torrent(self, info):
+ """ Downloader """
+ print(download_file(info))
+
+ class MyHtmlParseWithBlackJack(HTMLParser):
+ """ Parser class """
+ def __init__(self, list_searches, url):
+ HTMLParser.__init__(self)
+ self.url = url
+ self.list_searches = list_searches
+ self.current_item = None
+ self.cur_item_name = None
+ self.pending_size = False
+ self.next_queries = True
+ self.pending_next_queries = False
+
+ def handle_starttag(self, tag, attrs):
if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- prettyPrinter(self.current_item)
- self.results.append('a')
-
- def search(self, what, cat='all'):
- ret = []
- i = 1
- while True and i<11:
- results = []
- parser = self.SimpleSGMLParser(results, self.url)
- dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
- results_re = re.compile('(?s)
.*')
- for match in results_re.finditer(dat):
- res_tab = match.group(0)
- parser.feed(res_tab)
+ if tag == "a":
+ params = dict(attrs)
+ link = params['href']
+
+ if not link.startswith("/torrent"):
+ return
+
+ if link[8] == "/":
+ #description
+ self.current_item["desc_link"] = "".join((self.url, link))
+ #remove view at the beginning
+ self.current_item["name"] = params["title"][5:]
+ self.pending_size = True
+ elif link[8] == "_":
+ #download link
+ link = link.replace("torrent_", "", 1)
+ self.current_item["link"] = "".join((self.url, link))
+
+ elif tag == "td":
+ if self.pending_size:
+ self.cur_item_name = "size"
+ self.current_item["size"] = ""
+ self.pending_size = False
+
+ for attr in attrs:
+ if attr[0] == "class":
+ if attr[1][0] == "s":
+ self.cur_item_name = "seeds"
+ self.current_item["seeds"] = ""
+ elif attr[1][0] == "l":
+ self.cur_item_name = "leech"
+ self.current_item["leech"] = ""
+ break
+
+
+ elif tag == "tr":
+ for attr in attrs:
+ if attr[0] == "class" and attr[1].startswith("tl"):
+ self.current_item = dict()
+ self.current_item["engine_url"] = self.url
+ break
+
+ elif self.pending_next_queries:
+ if tag == "a":
+ params = dict(attrs)
+ self.list_searches.append(params['href'])
+ if params["title"] == "10":
+ self.pending_next_queries = False
+ else:
+ self.pending_next_queries = False
+
+ elif self.next_queries:
+ if tag == "b" and ("class", "pager_no_link") in attrs:
+ self.next_queries = False
+ self.pending_next_queries = True
+
+ def handle_data(self, data):
+ if self.cur_item_name:
+ temp = self.current_item[self.cur_item_name]
+ self.current_item[self.cur_item_name] = " ".join((temp, data))
+ #Due to utf-8 we need to handle data two times if there is space
+ if not self.cur_item_name == "size":
+ self.cur_item_name = None
+
+ def handle_endtag(self, tag):
+ if self.current_item:
+ if tag == "tr":
+ prettyPrinter(self.current_item)
+ self.current_item = None
+
+ def search(self, what, cat="all"):
+ """ Performs search """
+ connection = http("extratorrent.cc")
+
+ query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
+
+ connection.request("GET", query)
+ response = connection.getresponse()
+ if response.status != 200:
+ return
+
+ list_searches = []
+ parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+ parser.feed(response.read().decode('utf-8'))
parser.close()
- break
- if len(results) <= 0:
- break
- i += 1
-
+
+ for search_query in list_searches:
+ connection.request("GET", search_query)
+ response = connection.getresponse()
+ parser.feed(response.read().decode('utf-8'))
+ parser.close()
+
+ connection.close()
+ return
diff --git a/src/searchengine/nova/engines/legittorrents.py b/src/searchengine/nova/engines/legittorrents.py
index be083053e..a6b9b6f18 100644
--- a/src/searchengine/nova/engines/legittorrents.py
+++ b/src/searchengine/nova/engines/legittorrents.py
@@ -1,4 +1,4 @@
-#VERSION: 1.02
+#VERSION: 1.03
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
- def __init__(self):
- self.results = []
- self.parser = self.SimpleSGMLParser(self.results, self.url)
-
def download_torrent(self, info):
print download_file(info)
diff --git a/src/searchengine/nova/engines/mininova.py b/src/searchengine/nova/engines/mininova.py
index 5355b0ec7..dc132cd6c 100644
--- a/src/searchengine/nova/engines/mininova.py
+++ b/src/searchengine/nova/engines/mininova.py
@@ -1,4 +1,4 @@
-#VERSION: 1.51
+#VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@@ -26,90 +26,124 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
+from HTMLParser import HTMLParser
+from httplib import HTTPConnection as http
from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib
-import re
+from helpers import download_file
class mininova(object):
- # Mandatory properties
- url = 'http://www.mininova.org'
- name = 'Mininova'
- supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
-
- def __init__(self):
- self.results = []
- self.parser = self.SimpleSGMLParser(self.results, self.url)
-
- def download_torrent(self, info):
- print download_file(info)
-
- class SimpleSGMLParser(sgmllib.SGMLParser):
- def __init__(self, results, url, *args):
- sgmllib.SGMLParser.__init__(self)
- self.url = url
- self.td_counter = None
- self.current_item = None
- self.results = results
-
- def start_a(self, attr):
- params = dict(attr)
- #print params
- if params.has_key('href'):
- if params['href'].startswith("/get/"):
- self.current_item = {}
- self.td_counter = 0
- self.current_item['link']=self.url+params['href'].strip()
- elif params['href'].startswith("/tor/") and self.current_item is not None:
- self.current_item['desc_link']=self.url+params['href'].strip()
-
- def handle_data(self, data):
- if self.td_counter == 0:
- if not self.current_item.has_key('name'):
- self.current_item['name'] = ''
- self.current_item['name']+= data
- elif self.td_counter == 1:
- if not self.current_item.has_key('size'):
- self.current_item['size'] = ''
- self.current_item['size']+= data.strip()
- elif self.td_counter == 2:
- if not self.current_item.has_key('seeds'):
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 3:
- if not self.current_item.has_key('leech'):
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
-
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 4:
- self.td_counter = None
- # Display item
- if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- prettyPrinter(self.current_item)
- self.results.append('a')
-
- def search(self, what, cat='all'):
- ret = []
- i = 1
- while True and i<11:
- results = []
- parser = self.SimpleSGMLParser(results, self.url)
- dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
- results_re = re.compile('(?s)Search results for.*')
- for match in results_re.finditer(dat):
- res_tab = match.group(0)
- parser.feed(res_tab)
+ """ Search engine class """
+ url = 'http://www.mininova.org'
+ name = 'Mininova'
+ supported_categories = {'all' : '0',
+ 'movies' : '4',
+ 'tv' : '8',
+ 'music' : '5',
+ 'games' : '3',
+ 'anime' : '1',
+ 'software' : '7',
+ 'pictures' : '6',
+ 'books' : '2'}
+
+ def download_torrent(self, info):
+ print(download_file(info))
+
+ class MyHtmlParseWithBlackJack(HTMLParser):
+ """ Parser class """
+ def __init__(self, list_searches, url):
+ HTMLParser.__init__(self)
+ self.list_searches = list_searches
+ self.url = url
+ self.table_results = False
+ self.current_item = None
+ self.cur_item_name = None
+ self.next_queries = True
+
+ def handle_starttag_tr(self, _):
+ """ Handler of tr start tag """
+ self.current_item = dict()
+
+ def handle_starttag_a(self, attrs):
+ """ Handler of a start tag """
+ params = dict(attrs)
+ link = params["href"]
+
+ if link.startswith("/get/"):
+ #download link
+ self.current_item["link"] = "".join((self.url, link))
+ elif link.startswith("/tor/"):
+ #description
+ self.current_item["desc_link"] = "".join((self.url, link))
+ self.cur_item_name = "name"
+ self.current_item["name"] = ""
+ elif self.next_queries and link.startswith("/search"):
+ if params["title"].startswith("Page"):
+ self.list_searches.append(link)
+
+ def handle_starttag_td(self, attrs):
+ """ Handler of td start tag """
+ if ("align", "right") in attrs:
+ if not "size" in self.current_item.keys():
+ self.cur_item_name = "size"
+ self.current_item["size"] = ""
+
+ def handle_starttag_span(self, attrs):
+ """ Handler of span start tag """
+ if ("class", "g") in attrs:
+ self.cur_item_name = "seeds"
+ self.current_item["seeds"] = ""
+ elif ("class", "b") in attrs:
+ self.cur_item_name = "leech"
+ self.current_item["leech"] = ""
+
+ def handle_starttag(self, tag, attrs):
+ """ Parser's start tag handler """
+ if self.table_results:
+ dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
+ if dispatcher:
+ dispatcher(attrs)
+
+ elif tag == "table":
+ self.table_results = ("class", "maintable") in attrs
+
+ def handle_endtag(self, tag):
+ """ Parser's end tag handler """
+ if tag == "tr" and self.current_item:
+ self.current_item["engine_url"] = self.url
+ prettyPrinter(self.current_item)
+ self.current_item = None
+ elif self.cur_item_name:
+ if tag == "a" or tag == "span":
+ self.cur_item_name = None
+
+ def handle_data(self, data):
+ """ Parser's data handler """
+ if self.cur_item_name:
+ temp = self.current_item[self.cur_item_name]
+ self.current_item[self.cur_item_name] = " ".join((temp, data))
+
+ def search(self, what, cat="all"):
+ """ Performs search """
+ connection = http("www.mininova.org")
+
+ query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
+
+ connection.request("GET", query)
+ response = connection.getresponse()
+ if response.status != 200:
+ return
+
+ list_searches = []
+ parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+ parser.feed(response.read().decode('utf-8'))
parser.close()
- break
- if len(results) <= 0:
- break
- i += 1
-
+
+ parser.next_queries = False
+ for search_query in list_searches:
+ connection.request("GET", search_query)
+ response = connection.getresponse()
+ parser.feed(response.read().decode('utf-8'))
+ parser.close()
+
+ connection.close()
+ return
diff --git a/src/searchengine/nova/engines/torrentreactor.py b/src/searchengine/nova/engines/torrentreactor.py
index ee74f4e75..dff7d35f2 100644
--- a/src/searchengine/nova/engines/torrentreactor.py
+++ b/src/searchengine/nova/engines/torrentreactor.py
@@ -1,4 +1,4 @@
-#VERSION: 1.33
+#VERSION: 1.35
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com)
@@ -28,92 +28,94 @@
# POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-from urllib2 import HTTPError
-from HTMLParser import HTMLParser
+from helpers import download_file
import urllib
+from HTMLParser import HTMLParser
+from httplib import HTTPConnection as http
import re
class torrentreactor(object):
- url = 'http://www.torrentreactor.net'
- name = 'TorrentReactor.Net'
- supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
+ url = 'http://www.torrentreactor.net'
+ name = 'TorrentReactor.Net'
+ supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
+
+ def download_torrent(self, info):
+ print(download_file(info))
- def download_torrent(self, info):
- print download_file(info)
+ class SimpleHTMLParser(HTMLParser):
+ def __init__(self, results, url, *args):
+ HTMLParser.__init__(self)
+ self.td_counter = None
+ self.current_item = None
+ self.results = results
+ self.id = None
+ self.url = url
+ self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
- class SimpleHTMLParser(HTMLParser):
- def __init__(self, results, url, *args):
- HTMLParser.__init__(self)
- self.td_counter = None
- self.current_item = None
- self.results = results
- self.id = None
- self.url = url
- self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
+ def handle_starttag(self, tag, attrs):
+ if tag in self.dispatcher:
+ self.dispatcher[tag](attrs)
- def handle_starttag(self, tag, attrs):
- if tag in self.dispatcher:
- self.dispatcher[tag](attrs)
+ def start_a(self, attr):
+ params = dict(attr)
+ if re.match("/torrents/\d+.*", params['href']):
+ self.current_item = {}
+ self.current_item['desc_link'] = self.url+params['href'].strip()
+ elif 'torrentreactor.net/download.php' in params['href']:
+ self.td_counter = 0
+ self.current_item['link'] = params['href'].strip()
+ self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
- def start_a(self, attr):
- params = dict(attr)
- if re.match("/torrents/\d+.*", params['href']):
- self.current_item = {}
- self.current_item['desc_link'] = self.url+params['href'].strip()
- elif 'torrentreactor.net/download.php' in params['href']:
- self.td_counter = 0
- self.current_item['link'] = params['href'].strip()
- self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
+ def handle_data(self, data):
+ if self.td_counter == 1:
+ if 'size' not in self.current_item:
+ self.current_item['size'] = ''
+ self.current_item['size']+= data.strip()
+ elif self.td_counter == 2:
+ if 'seeds' not in self.current_item:
+ self.current_item['seeds'] = ''
+ self.current_item['seeds']+= data.strip()
+ elif self.td_counter == 3:
+ if 'leech' not in self.current_item:
+ self.current_item['leech'] = ''
+ self.current_item['leech']+= data.strip()
- def handle_data(self, data):
- if self.td_counter == 1:
- if not self.current_item.has_key('size'):
- self.current_item['size'] = ''
- self.current_item['size']+= data.strip()
- elif self.td_counter == 2:
- if not self.current_item.has_key('seeds'):
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 3:
- if not self.current_item.has_key('leech'):
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
+ def start_td(self,attr):
+ if isinstance(self.td_counter,int):
+ self.td_counter += 1
+ if self.td_counter > 3:
+ self.td_counter = None
+ # add item to results
+ if self.current_item:
+ self.current_item['engine_url'] = self.url
+ if not self.current_item['seeds'].isdigit():
+ self.current_item['seeds'] = 0
+ if not self.current_item['leech'].isdigit():
+ self.current_item['leech'] = 0
+ prettyPrinter(self.current_item)
+ self.has_results = True
+ self.results.append('a')
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 3:
- self.td_counter = None
- # add item to results
- if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- prettyPrinter(self.current_item)
- self.has_results = True
- self.results.append('a')
+ def search(self, what, cat='all'):
+ i = 0
+ dat = ''
+ connection = http("www.torrentreactor.net")
- def __init__(self):
- self.results = []
- self.parser = self.SimpleHTMLParser(self.results, self.url)
+ while True and i<11:
+ results = []
+ parser = self.SimpleHTMLParser(results, self.url)
+ query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
+ connection.request("GET", query)
+ response = connection.getresponse()
+ if response.status != 200:
+ break
- def search(self, what, cat='all'):
- i = 0
- dat = ''
- while True and i<11:
- results = []
- parser = self.SimpleHTMLParser(results, self.url)
+ dat = response.read().decode('utf-8')
- try:
- dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
- except HTTPError:
- break
+ parser.feed(dat)
+ parser.close()
+ if len(results) <= 0:
+ break
+ i += 1
- parser.feed(dat)
- parser.close()
- if len(results) <= 0:
- break
- i += 1
+ connection.close()
diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt
index d581a676f..77fb875d5 100644
--- a/src/searchengine/nova/engines/versions.txt
+++ b/src/searchengine/nova/engines/versions.txt
@@ -1,8 +1,9 @@
-torrentreactor: 1.33
-mininova: 1.51
-piratebay: 2.11
extratorrent: 1.2
+torrentreactor: 1.35
+mininova: 2.00
+piratebay: 2.11
+extratorrent: 2.0
kickasstorrents: 1.26
btdigg: 1.24
-legittorrents: 1.02
torrentz: 2.13
+legittorrents: 1.03
diff --git a/src/searchengine/nova3/engines/extratorrent.py b/src/searchengine/nova3/engines/extratorrent.py
index df1ef9b24..de3dcb9a2 100644
--- a/src/searchengine/nova3/engines/extratorrent.py
+++ b/src/searchengine/nova3/engines/extratorrent.py
@@ -1,4 +1,4 @@
-#VERSION: 1.2
+#VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@@ -25,92 +25,135 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
-
+from html.parser import HTMLParser
+from http.client import HTTPConnection as http
+#qBt
from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib3
-import re
+from helpers import download_file
class extratorrent(object):
- url = 'http://extratorrent.cc'
- name = 'extratorrent'
- supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
-
- def __init__(self):
- self.results = []
- self.parser = self.SimpleSGMLParser(self.results, self.url)
-
- def download_torrent(self, info):
- print(download_file(info))
-
- class SimpleSGMLParser(sgmllib3.SGMLParser):
- def __init__(self, results, url, *args):
- sgmllib3.SGMLParser.__init__(self)
- self.url = url
- self.td_counter = None
- self.current_item = None
- self.start_name = False
- self.results = results
-
- def start_a(self, attr):
- params = dict(attr)
- #print params
- if 'href' in params and params['href'].startswith("/torrent_download/"):
- self.current_item = {}
- self.td_counter = 0
- self.start_name = False
- torrent_id = '/'.join(params['href'].split('/')[2:])
- self.current_item['link']=self.url+'/download/'+torrent_id
- elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
- self.current_item['desc_link'] = self.url + params['href'].strip()
- self.start_name = True
-
- def handle_data(self, data):
- if self.td_counter == 2:
- if 'name' not in self.current_item and self.start_name:
- self.current_item['name'] = data.strip()
- elif self.td_counter == 3:
- if 'size' not in self.current_item:
- self.current_item['size'] = ''
- self.current_item['size']+= data.replace(" ", " ").strip()
- elif self.td_counter == 4:
- if 'seeds' not in self.current_item:
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 5:
- if 'leech' not in self.current_item:
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
-
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 5:
- self.td_counter = None
- # Display item
+ """ Search engine class """
+ url = 'http://extratorrent.cc'
+ name = 'ExtraTorrent'
+ supported_categories = {'all' : '0',
+ 'movies' : '4',
+ 'tv' : '8',
+ 'music' : '5',
+ 'games' : '3',
+ 'anime' : '1',
+ 'software' : '7',
+ 'books' : '2',
+ 'pictures' : '6'}
+
+ def download_torrent(self, info):
+ """ Downloader """
+ print(download_file(info))
+
+ class MyHtmlParseWithBlackJack(HTMLParser):
+ """ Parser class """
+ def __init__(self, list_searches, url):
+ HTMLParser.__init__(self)
+ self.url = url
+ self.list_searches = list_searches
+ self.current_item = None
+ self.cur_item_name = None
+ self.pending_size = False
+ self.next_queries = True
+ self.pending_next_queries = False
+
+ def handle_starttag(self, tag, attrs):
if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- prettyPrinter(self.current_item)
- self.results.append('a')
-
- def search(self, what, cat='all'):
- ret = []
- i = 1
- while True and i<11:
- results = []
- parser = self.SimpleSGMLParser(results, self.url)
- dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
- results_re = re.compile('(?s)
.*')
- for match in results_re.finditer(dat):
- res_tab = match.group(0)
- parser.feed(res_tab)
+ if tag == "a":
+ params = dict(attrs)
+ link = params['href']
+
+ if not link.startswith("/torrent"):
+ return
+
+ if link[8] == "/":
+ #description
+ self.current_item["desc_link"] = "".join((self.url, link))
+ #remove view at the beginning
+ self.current_item["name"] = params["title"][5:]
+ self.pending_size = True
+ elif link[8] == "_":
+ #download link
+ link = link.replace("torrent_", "", 1)
+ self.current_item["link"] = "".join((self.url, link))
+
+ elif tag == "td":
+ if self.pending_size:
+ self.cur_item_name = "size"
+ self.current_item["size"] = ""
+ self.pending_size = False
+
+ for attr in attrs:
+ if attr[0] == "class":
+ if attr[1][0] == "s":
+ self.cur_item_name = "seeds"
+ self.current_item["seeds"] = ""
+ elif attr[1][0] == "l":
+ self.cur_item_name = "leech"
+ self.current_item["leech"] = ""
+ break
+
+
+ elif tag == "tr":
+ for attr in attrs:
+ if attr[0] == "class" and attr[1].startswith("tl"):
+ self.current_item = dict()
+ self.current_item["engine_url"] = self.url
+ break
+
+ elif self.pending_next_queries:
+ if tag == "a":
+ params = dict(attrs)
+ self.list_searches.append(params['href'])
+ if params["title"] == "10":
+ self.pending_next_queries = False
+ else:
+ self.pending_next_queries = False
+
+ elif self.next_queries:
+ if tag == "b" and ("class", "pager_no_link") in attrs:
+ self.next_queries = False
+ self.pending_next_queries = True
+
+ def handle_data(self, data):
+ if self.cur_item_name:
+ temp = self.current_item[self.cur_item_name]
+ self.current_item[self.cur_item_name] = " ".join((temp, data))
+ #Due to utf-8 we need to handle data two times if there is space
+ if not self.cur_item_name == "size":
+ self.cur_item_name = None
+
+ def handle_endtag(self, tag):
+ if self.current_item:
+ if tag == "tr":
+ prettyPrinter(self.current_item)
+ self.current_item = None
+
+ def search(self, what, cat="all"):
+ """ Performs search """
+ connection = http("extratorrent.cc")
+
+ query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
+
+ connection.request("GET", query)
+ response = connection.getresponse()
+ if response.status != 200:
+ return
+
+ list_searches = []
+ parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+ parser.feed(response.read().decode('utf-8'))
parser.close()
- break
- if len(results) <= 0:
- break
- i += 1
-
+
+ for search_query in list_searches:
+ connection.request("GET", search_query)
+ response = connection.getresponse()
+ parser.feed(response.read().decode('utf-8'))
+ parser.close()
+
+ connection.close()
+ return
diff --git a/src/searchengine/nova3/engines/legittorrents.py b/src/searchengine/nova3/engines/legittorrents.py
index 290852f0d..60297c574 100644
--- a/src/searchengine/nova3/engines/legittorrents.py
+++ b/src/searchengine/nova3/engines/legittorrents.py
@@ -1,4 +1,4 @@
-#VERSION: 1.03
+#VERSION: 1.04
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
- def __init__(self):
- self.results = []
- self.parser = self.SimpleSGMLParser(self.results, self.url)
-
def download_torrent(self, info):
print(download_file(info))
diff --git a/src/searchengine/nova3/engines/mininova.py b/src/searchengine/nova3/engines/mininova.py
index 96d6ed8e5..12544db09 100644
--- a/src/searchengine/nova3/engines/mininova.py
+++ b/src/searchengine/nova3/engines/mininova.py
@@ -1,4 +1,4 @@
-#VERSION: 1.51
+#VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@@ -26,90 +26,124 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
+from html.parser import HTMLParser
+from http.client import HTTPConnection as http
from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib3
-import re
+from helpers import download_file
class mininova(object):
- # Mandatory properties
- url = 'http://www.mininova.org'
- name = 'Mininova'
- supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
-
- def __init__(self):
- self.results = []
- self.parser = self.SimpleSGMLParser(self.results, self.url)
-
- def download_torrent(self, info):
- print(download_file(info))
-
- class SimpleSGMLParser(sgmllib3.SGMLParser):
- def __init__(self, results, url, *args):
- sgmllib3.SGMLParser.__init__(self)
- self.url = url
- self.td_counter = None
- self.current_item = None
- self.results = results
-
- def start_a(self, attr):
- params = dict(attr)
- #print params
- if 'href' in params:
- if params['href'].startswith("/get/"):
- self.current_item = {}
- self.td_counter = 0
- self.current_item['link']=self.url+params['href'].strip()
- elif params['href'].startswith("/tor/") and self.current_item is not None:
- self.current_item['desc_link']=self.url+params['href'].strip()
-
- def handle_data(self, data):
- if self.td_counter == 0:
- if 'name' not in self.current_item:
- self.current_item['name'] = ''
- self.current_item['name']+= data
- elif self.td_counter == 1:
- if 'size' not in self.current_item:
- self.current_item['size'] = ''
- self.current_item['size']+= data.strip()
- elif self.td_counter == 2:
- if 'seeds' not in self.current_item:
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 3:
- if 'leech' not in self.current_item:
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
-
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 4:
- self.td_counter = None
- # Display item
- if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- prettyPrinter(self.current_item)
- self.results.append('a')
-
- def search(self, what, cat='all'):
- ret = []
- i = 1
- while True and i<11:
- results = []
- parser = self.SimpleSGMLParser(results, self.url)
- dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
- results_re = re.compile('(?s)Search results for.*')
- for match in results_re.finditer(dat):
- res_tab = match.group(0)
- parser.feed(res_tab)
+ """ Search engine class """
+ url = 'http://www.mininova.org'
+ name = 'Mininova'
+ supported_categories = {'all' : '0',
+ 'movies' : '4',
+ 'tv' : '8',
+ 'music' : '5',
+ 'games' : '3',
+ 'anime' : '1',
+ 'software' : '7',
+ 'pictures' : '6',
+ 'books' : '2'}
+
+ def download_torrent(self, info):
+ print(download_file(info))
+
+ class MyHtmlParseWithBlackJack(HTMLParser):
+ """ Parser class """
+ def __init__(self, list_searches, url):
+ HTMLParser.__init__(self)
+ self.list_searches = list_searches
+ self.url = url
+ self.table_results = False
+ self.current_item = None
+ self.cur_item_name = None
+ self.next_queries = True
+
+ def handle_starttag_tr(self, _):
+ """ Handler of tr start tag """
+ self.current_item = dict()
+
+ def handle_starttag_a(self, attrs):
+ """ Handler of a start tag """
+ params = dict(attrs)
+ link = params["href"]
+
+ if link.startswith("/get/"):
+ #download link
+ self.current_item["link"] = "".join((self.url, link))
+ elif link.startswith("/tor/"):
+ #description
+ self.current_item["desc_link"] = "".join((self.url, link))
+ self.cur_item_name = "name"
+ self.current_item["name"] = ""
+ elif self.next_queries and link.startswith("/search"):
+ if params["title"].startswith("Page"):
+ self.list_searches.append(link)
+
+ def handle_starttag_td(self, attrs):
+ """ Handler of td start tag """
+ if ("align", "right") in attrs:
+ if not "size" in self.current_item.keys():
+ self.cur_item_name = "size"
+ self.current_item["size"] = ""
+
+ def handle_starttag_span(self, attrs):
+ """ Handler of span start tag """
+ if ("class", "g") in attrs:
+ self.cur_item_name = "seeds"
+ self.current_item["seeds"] = ""
+ elif ("class", "b") in attrs:
+ self.cur_item_name = "leech"
+ self.current_item["leech"] = ""
+
+ def handle_starttag(self, tag, attrs):
+ """ Parser's start tag handler """
+ if self.table_results:
+ dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
+ if dispatcher:
+ dispatcher(attrs)
+
+ elif tag == "table":
+ self.table_results = ("class", "maintable") in attrs
+
+ def handle_endtag(self, tag):
+ """ Parser's end tag handler """
+ if tag == "tr" and self.current_item:
+ self.current_item["engine_url"] = self.url
+ prettyPrinter(self.current_item)
+ self.current_item = None
+ elif self.cur_item_name:
+ if tag == "a" or tag == "span":
+ self.cur_item_name = None
+
+ def handle_data(self, data):
+ """ Parser's data handler """
+ if self.cur_item_name:
+ temp = self.current_item[self.cur_item_name]
+ self.current_item[self.cur_item_name] = " ".join((temp, data))
+
+ def search(self, what, cat="all"):
+ """ Performs search """
+ connection = http("www.mininova.org")
+
+ query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
+
+ connection.request("GET", query)
+ response = connection.getresponse()
+ if response.status != 200:
+ return
+
+ list_searches = []
+ parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+ parser.feed(response.read().decode('utf-8'))
parser.close()
- break
- if len(results) <= 0:
- break
- i += 1
-
+
+ parser.next_queries = False
+ for search_query in list_searches:
+ connection.request("GET", search_query)
+ response = connection.getresponse()
+ parser.feed(response.read().decode('utf-8'))
+ parser.close()
+
+ connection.close()
+ return
diff --git a/src/searchengine/nova3/engines/torrentreactor.py b/src/searchengine/nova3/engines/torrentreactor.py
index a099ec5ab..da6391cba 100644
--- a/src/searchengine/nova3/engines/torrentreactor.py
+++ b/src/searchengine/nova3/engines/torrentreactor.py
@@ -1,4 +1,4 @@
-#VERSION: 1.33
+#VERSION: 1.35
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com)
@@ -28,91 +28,94 @@
# POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-from urllib import error, parse
+from helpers import download_file
+from urllib import parse
from html.parser import HTMLParser
+from http.client import HTTPConnection as http
import re
class torrentreactor(object):
- url = 'http://www.torrentreactor.net'
- name = 'TorrentReactor.Net'
- supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
+ url = 'http://www.torrentreactor.net'
+ name = 'TorrentReactor.Net'
+ supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
- def download_torrent(self, info):
- print(download_file(info))
+ def download_torrent(self, info):
+ print(download_file(info))
- class SimpleHTMLParser(HTMLParser):
- def __init__(self, results, url, *args):
- HTMLParser.__init__(self)
- self.td_counter = None
- self.current_item = None
- self.results = results
- self.id = None
- self.url = url
- self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
+ class SimpleHTMLParser(HTMLParser):
+ def __init__(self, results, url, *args):
+ HTMLParser.__init__(self)
+ self.td_counter = None
+ self.current_item = None
+ self.results = results
+ self.id = None
+ self.url = url
+ self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
- def handle_starttag(self, tag, attrs):
- if tag in self.dispatcher:
- self.dispatcher[tag](attrs)
+ def handle_starttag(self, tag, attrs):
+ if tag in self.dispatcher:
+ self.dispatcher[tag](attrs)
- def start_a(self, attr):
- params = dict(attr)
- if re.match("/torrents/\d+.*", params['href']):
- self.current_item = {}
- self.current_item['desc_link'] = self.url+params['href'].strip()
- elif 'torrentreactor.net/download.php' in params['href']:
- self.td_counter = 0
- self.current_item['link'] = params['href'].strip()
- self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
+ def start_a(self, attr):
+ params = dict(attr)
+ if re.match("/torrents/\d+.*", params['href']):
+ self.current_item = {}
+ self.current_item['desc_link'] = self.url+params['href'].strip()
+ elif 'torrentreactor.net/download.php' in params['href']:
+ self.td_counter = 0
+ self.current_item['link'] = params['href'].strip()
+ self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
- def handle_data(self, data):
- if self.td_counter == 1:
- if 'size' not in self.current_item:
- self.current_item['size'] = ''
- self.current_item['size']+= data.strip()
- elif self.td_counter == 2:
- if 'seeds' not in self.current_item:
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 3:
- if 'leech' not in self.current_item:
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
+ def handle_data(self, data):
+ if self.td_counter == 1:
+ if 'size' not in self.current_item:
+ self.current_item['size'] = ''
+ self.current_item['size']+= data.strip()
+ elif self.td_counter == 2:
+ if 'seeds' not in self.current_item:
+ self.current_item['seeds'] = ''
+ self.current_item['seeds']+= data.strip()
+ elif self.td_counter == 3:
+ if 'leech' not in self.current_item:
+ self.current_item['leech'] = ''
+ self.current_item['leech']+= data.strip()
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 3:
- self.td_counter = None
- # add item to results
- if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- prettyPrinter(self.current_item)
- self.has_results = True
- self.results.append('a')
+ def start_td(self,attr):
+ if isinstance(self.td_counter,int):
+ self.td_counter += 1
+ if self.td_counter > 3:
+ self.td_counter = None
+ # add item to results
+ if self.current_item:
+ self.current_item['engine_url'] = self.url
+ if not self.current_item['seeds'].isdigit():
+ self.current_item['seeds'] = 0
+ if not self.current_item['leech'].isdigit():
+ self.current_item['leech'] = 0
+ prettyPrinter(self.current_item)
+ self.has_results = True
+ self.results.append('a')
- def __init__(self):
- self.results = []
- self.parser = self.SimpleHTMLParser(self.results, self.url)
+ def search(self, what, cat='all'):
+ i = 0
+ dat = ''
+ connection = http("www.torrentreactor.net")
- def search(self, what, cat='all'):
- i = 0
- dat = ''
- while True and i<11:
- results = []
- parser = self.SimpleHTMLParser(results, self.url)
+ while True and i<11:
+ results = []
+ parser = self.SimpleHTMLParser(results, self.url)
+ query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
+ connection.request("GET", query)
+ response = connection.getresponse()
+ if response.status != 200:
+ break
- try:
- dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
- except error.HTTPError:
- break
+ dat = response.read().decode('utf-8')
- parser.feed(dat)
- parser.close()
- if len(results) <= 0:
- break
- i += 1
+ parser.feed(dat)
+ parser.close()
+ if len(results) <= 0:
+ break
+ i += 1
+
+ connection.close()
diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt
index c0e097583..739171208 100644
--- a/src/searchengine/nova3/engines/versions.txt
+++ b/src/searchengine/nova3/engines/versions.txt
@@ -1,8 +1,9 @@
-torrentreactor: 1.33
-mininova: 1.51
-piratebay: 2.11
extratorrent: 1.2
+torrentreactor: 1.35
+mininova: 2.00
+piratebay: 2.11
+extratorrent: 2.0
kickasstorrents: 1.26
btdigg: 1.23
-legittorrents: 1.03
torrentz: 2.13
+legittorrents: 1.04