[search engine] engines update

10 years ago · d6d0f422f5
10 changed files with 660 additions and 507 deletions
--- a/src/searchengine/nova/engines/extratorrent.py
+++ b/src/searchengine/nova/engines/extratorrent.py
@ -1,4 +1,4 @@
-#VERSION: 1.2
+#VERSION: 2.0
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,135 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
-
+from HTMLParser import HTMLParser
 from httplib import HTTPConnection as http
 #qBt
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
+from helpers import download_file
 import sgmllib
 import re
 class extratorrent(object):
-  url = 'http://extratorrent.cc'
+    """ Search engine class """
-  name = 'extratorrent'
+    url = 'http://extratorrent.cc'
-  supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
+    name = 'ExtraTorrent'
-
+    supported_categories = {'all'       : '0',
-  def __init__(self):
+                            'movies'    : '4',
-    self.results = []
+                            'tv'        : '8',
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+                            'music'     : '5',
-
+                            'games'     : '3',
-  def download_torrent(self, info):
+                            'anime'     : '1',
-    print download_file(info)
+                            'software'  : '7',
-
+                            'books'     : '2',
-  class SimpleSGMLParser(sgmllib.SGMLParser):
+                            'pictures'  : '6'}
-    def __init__(self, results, url, *args):
+
-      sgmllib.SGMLParser.__init__(self)
+    def download_torrent(self, info):
-      self.url = url
+        """ Downloader """
-      self.td_counter = None
+        print(download_file(info))
-      self.current_item = None
+
-      self.start_name = False
+    class MyHtmlParseWithBlackJack(HTMLParser):
-      self.results = results
+        """ Parser class """
-      
+        def __init__(self, list_searches, url):
-    def start_a(self, attr):
+            HTMLParser.__init__(self)
-      params = dict(attr)
+            self.url = url
-      #print params
+            self.list_searches = list_searches
-      if params.has_key('href') and params['href'].startswith("/torrent_download/"):
+            self.current_item = None
-        self.current_item = {}
+            self.cur_item_name = None
-        self.td_counter = 0
+            self.pending_size = False
-        self.start_name = False
+            self.next_queries = True
-        torrent_id = '/'.join(params['href'].split('/')[2:])
+            self.pending_next_queries = False
-        self.current_item['link']=self.url+'/download/'+torrent_id
+
-      elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
+        def handle_starttag(self, tag, attrs):
-        self.current_item['desc_link'] = self.url + params['href'].strip()
+            if self.current_item:
-        self.start_name = True
+                if tag == "a":
-    
+                    params = dict(attrs)
-    def handle_data(self, data):
+                    link = params['href']
-      if self.td_counter == 2:
+
-        if not self.current_item.has_key('name') and self.start_name:
+                    if not link.startswith("/torrent"):
-          self.current_item['name'] = data.strip()
+                        return
-      elif self.td_counter == 3:
+
-        if not self.current_item.has_key('size'):
+                    if link[8] == "/":
-          self.current_item['size'] = ''
+                        #description
-        self.current_item['size']+= data.replace("&nbsp;", " ").strip()
+                        self.current_item["desc_link"] = "".join((self.url, link))
-      elif self.td_counter == 4:
+                        #remove view at the beginning
-        if not self.current_item.has_key('seeds'):
+                        self.current_item["name"] = params["title"][5:]
-          self.current_item['seeds'] = ''
+                        self.pending_size = True
-        self.current_item['seeds']+= data.strip()
+                    elif link[8] == "_":
-      elif self.td_counter == 5:
+                        #download link
-        if not self.current_item.has_key('leech'):
+                        link = link.replace("torrent_", "", 1)
-          self.current_item['leech'] = ''
+                        self.current_item["link"] = "".join((self.url, link))
-        self.current_item['leech']+= data.strip()
+
-      
+                elif tag == "td":
-    def start_td(self,attr):
+                    if self.pending_size:
-        if isinstance(self.td_counter,int):
+                        self.cur_item_name = "size"
-          self.td_counter += 1
+                        self.current_item["size"] = ""
-          if self.td_counter > 5:
+                        self.pending_size = False
-            self.td_counter = None
+
-            # Display item
+                    for attr in attrs:
                        if attr[0] == "class":
                            if attr[1][0] == "s":
                                self.cur_item_name = "seeds"
                                self.current_item["seeds"] = ""
                            elif attr[1][0] == "l":
                                self.cur_item_name = "leech"
                                self.current_item["leech"] = ""
                        break
            elif tag == "tr":
                for attr in attrs:
                    if attr[0] == "class" and attr[1].startswith("tl"):
                        self.current_item = dict()
                        self.current_item["engine_url"] = self.url
                        break
            elif self.pending_next_queries:
                if tag == "a":
                    params = dict(attrs)
                    self.list_searches.append(params['href'])
                    if params["title"] == "10":
                        self.pending_next_queries = False
                else:
                    self.pending_next_queries = False
            elif self.next_queries:
                if tag == "b" and ("class", "pager_no_link") in attrs:
                    self.next_queries = False
                    self.pending_next_queries = True
        def handle_data(self, data):
            if self.cur_item_name:
                temp = self.current_item[self.cur_item_name]
                self.current_item[self.cur_item_name] = " ".join((temp, data))
                #Due to utf-8 we need to handle data two times if there is space
                if not self.cur_item_name == "size":
                    self.cur_item_name = None
        def handle_endtag(self, tag):
            if self.current_item:
-              self.current_item['engine_url'] = self.url
+                if tag == "tr":
-              if not self.current_item['seeds'].isdigit():
+                    prettyPrinter(self.current_item)
-                self.current_item['seeds'] = 0
+                    self.current_item = None
-              if not self.current_item['leech'].isdigit():
+
-                self.current_item['leech'] = 0
+    def search(self, what, cat="all"):
-              prettyPrinter(self.current_item)
+        """ Performs search """
-              self.results.append('a')
+        connection = http("extratorrent.cc")
-
+
-  def search(self, what, cat='all'):
+        query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
-    ret = []
+
-    i = 1
+        connection.request("GET", query)
-    while True and i<11:
+        response = connection.getresponse()
-      results = []
+        if response.status != 200:
-      parser = self.SimpleSGMLParser(results, self.url)
+            return
-      dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
+
-      results_re = re.compile('(?s)<table class="tl"><thead>.*')
+        list_searches = []
-      for match in results_re.finditer(dat):
+        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
-        res_tab = match.group(0)
+        parser.feed(response.read().decode('utf-8'))
        parser.feed(res_tab)
        parser.close()
        break
      if len(results) <= 0:
        break
      i += 1
        for search_query in list_searches:
            connection.request("GET", search_query)
            response = connection.getresponse()
            parser.feed(response.read().decode('utf-8'))
            parser.close()
        connection.close()
        return
--- a/src/searchengine/nova/engines/legittorrents.py
+++ b/src/searchengine/nova/engines/legittorrents.py
@ -1,4 +1,4 @@
-#VERSION: 1.02
+#VERSION: 1.03
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
  name = 'legittorrents'
  supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
  def __init__(self):
    self.results = []
    self.parser = self.SimpleSGMLParser(self.results, self.url)
  def download_torrent(self, info):
    print download_file(info)
--- a/src/searchengine/nova/engines/mininova.py
+++ b/src/searchengine/nova/engines/mininova.py
@ -1,4 +1,4 @@
-#VERSION: 1.51
+#VERSION: 2.00
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,124 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 from HTMLParser import HTMLParser
 from httplib import HTTPConnection as http
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
+from helpers import download_file
 import sgmllib
 import re
 class mininova(object):
-  # Mandatory properties
+    """ Search engine class """
-  url = 'http://www.mininova.org'
+    url = 'http://www.mininova.org'
-  name = 'Mininova'
+    name = 'Mininova'
-  supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
+    supported_categories = {'all'       : '0',
-
+                            'movies'    : '4',
-  def __init__(self):
+                            'tv'        : '8',
-    self.results = []
+                            'music'     : '5',
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+                            'games'     : '3',
-
+                            'anime'     : '1',
-  def download_torrent(self, info):
+                            'software'  : '7',
-    print download_file(info)
+                            'pictures'  : '6',
-
+                            'books'     : '2'}
-  class SimpleSGMLParser(sgmllib.SGMLParser):
+
-    def __init__(self, results, url, *args):
+    def download_torrent(self, info):
-      sgmllib.SGMLParser.__init__(self)
+        print(download_file(info))
-      self.url = url
+
-      self.td_counter = None
+    class MyHtmlParseWithBlackJack(HTMLParser):
-      self.current_item = None
+        """ Parser class """
-      self.results = results
+        def __init__(self, list_searches, url):
-      
+            HTMLParser.__init__(self)
-    def start_a(self, attr):
+            self.list_searches = list_searches
-      params = dict(attr)
+            self.url = url
-      #print params
+            self.table_results = False
-      if params.has_key('href'):
+            self.current_item = None
-        if params['href'].startswith("/get/"):
+            self.cur_item_name = None
-          self.current_item = {}
+            self.next_queries = True
-          self.td_counter = 0
+
-          self.current_item['link']=self.url+params['href'].strip()
+        def handle_starttag_tr(self, _):
-        elif params['href'].startswith("/tor/") and self.current_item is not None:
+            """ Handler of tr start tag """
-          self.current_item['desc_link']=self.url+params['href'].strip()
+            self.current_item = dict()
-    
+
-    def handle_data(self, data):
+        def handle_starttag_a(self, attrs):
-      if self.td_counter == 0:
+            """ Handler of a start tag """
-        if not self.current_item.has_key('name'):
+            params = dict(attrs)
-          self.current_item['name'] = ''
+            link = params["href"]
-        self.current_item['name']+= data
+
-      elif self.td_counter == 1:
+            if link.startswith("/get/"):
-        if not self.current_item.has_key('size'):
+                #download link
-          self.current_item['size'] = ''
+                self.current_item["link"] = "".join((self.url, link))
-        self.current_item['size']+= data.strip()
+            elif link.startswith("/tor/"):
-      elif self.td_counter == 2:
+                #description
-        if not self.current_item.has_key('seeds'):
+                self.current_item["desc_link"] = "".join((self.url, link))
-          self.current_item['seeds'] = ''
+                self.cur_item_name = "name"
-        self.current_item['seeds']+= data.strip()
+                self.current_item["name"] = ""
-      elif self.td_counter == 3:
+            elif self.next_queries and link.startswith("/search"):
-        if not self.current_item.has_key('leech'):
+                if params["title"].startswith("Page"):
-          self.current_item['leech'] = ''
+                    self.list_searches.append(link)
-        self.current_item['leech']+= data.strip()
+
-      
+        def handle_starttag_td(self, attrs):
-    def start_td(self,attr):
+            """ Handler of td start tag """
-        if isinstance(self.td_counter,int):
+            if ("align", "right") in attrs:
-          self.td_counter += 1
+                if not "size" in self.current_item.keys():
-          if self.td_counter > 4:
+                    self.cur_item_name = "size"
-            self.td_counter = None
+                    self.current_item["size"] = ""
-            # Display item
+
-            if self.current_item:
+        def handle_starttag_span(self, attrs):
-              self.current_item['engine_url'] = self.url
+            """ Handler of span start tag """
-              if not self.current_item['seeds'].isdigit():
+            if ("class", "g") in attrs:
-                self.current_item['seeds'] = 0
+                self.cur_item_name = "seeds"
-              if not self.current_item['leech'].isdigit():
+                self.current_item["seeds"] = ""
-                self.current_item['leech'] = 0
+            elif ("class", "b") in attrs:
-              prettyPrinter(self.current_item)
+                self.cur_item_name = "leech"
-              self.results.append('a')
+                self.current_item["leech"] = ""
-
+
-  def search(self, what, cat='all'):
+        def handle_starttag(self, tag, attrs):
-    ret = []
+            """ Parser's start tag handler """
-    i = 1
+            if self.table_results:
-    while True and i<11:
+                dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
-      results = []
+                if dispatcher:
-      parser = self.SimpleSGMLParser(results, self.url)
+                    dispatcher(attrs)
-      dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
+
-      results_re = re.compile('(?s)<h1>Search results for.*')
+            elif tag == "table":
-      for match in results_re.finditer(dat):
+                self.table_results = ("class", "maintable") in attrs
-        res_tab = match.group(0)
+
-        parser.feed(res_tab)
+        def handle_endtag(self, tag):
            """ Parser's end tag handler """
            if tag == "tr" and self.current_item:
                self.current_item["engine_url"] = self.url
                prettyPrinter(self.current_item)
                self.current_item = None
            elif self.cur_item_name:
                if tag == "a" or tag == "span":
                    self.cur_item_name = None
        def handle_data(self, data):
            """ Parser's data handler """
            if self.cur_item_name:
                temp = self.current_item[self.cur_item_name]
                self.current_item[self.cur_item_name] = " ".join((temp, data))
    def search(self, what, cat="all"):
        """ Performs search """
        connection = http("www.mininova.org")
        query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
        connection.request("GET", query)
        response = connection.getresponse()
        if response.status != 200:
            return
        list_searches = []
        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
        parser.feed(response.read().decode('utf-8'))
        parser.close()
        break
      if len(results) <= 0:
        break
      i += 1
        parser.next_queries = False
        for search_query in list_searches:
            connection.request("GET", search_query)
            response = connection.getresponse()
            parser.feed(response.read().decode('utf-8'))
            parser.close()
        connection.close()
        return
--- a/src/searchengine/nova/engines/torrentreactor.py
+++ b/src/searchengine/nova/engines/torrentreactor.py
@ -1,4 +1,4 @@
-#VERSION: 1.33
+#VERSION: 1.35
 #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
 #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
 #              Bruno Barbieri (brunorex@gmail.com)
@ -28,92 +28,94 @@
 # POSSIBILITY OF SUCH DAMAGE.
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
+from helpers import download_file
 from urllib2 import HTTPError 
 from HTMLParser import HTMLParser
 import urllib
 from HTMLParser import HTMLParser
 from httplib import HTTPConnection as http
 import re
 class torrentreactor(object):
-	url = 'http://www.torrentreactor.net'
+    url = 'http://www.torrentreactor.net'
-	name = 'TorrentReactor.Net'
+    name = 'TorrentReactor.Net'
-	supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
+    supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
    def download_torrent(self, info):
        print(download_file(info))
-	def download_torrent(self, info):
+    class SimpleHTMLParser(HTMLParser):
-		print download_file(info)
+        def __init__(self, results, url, *args):
            HTMLParser.__init__(self)
            self.td_counter = None
            self.current_item = None
            self.results = results
            self.id = None
            self.url = url
            self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
-	class SimpleHTMLParser(HTMLParser):
+        def handle_starttag(self, tag, attrs):
-		def __init__(self, results, url, *args):
+            if tag in self.dispatcher:
-			HTMLParser.__init__(self)
+                self.dispatcher[tag](attrs)
 			self.td_counter = None
 			self.current_item = None
 			self.results = results
 			self.id = None
 			self.url = url
 			self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
-		def handle_starttag(self, tag, attrs):
+        def start_a(self, attr):
-			if tag in self.dispatcher:
+            params = dict(attr)
-				self.dispatcher[tag](attrs)
+            if re.match("/torrents/\d+.*", params['href']):
                self.current_item = {}
                self.current_item['desc_link'] = self.url+params['href'].strip()
            elif 'torrentreactor.net/download.php' in params['href']:
                self.td_counter = 0
                self.current_item['link'] = params['href'].strip()
                self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
-		def start_a(self, attr):
+        def handle_data(self, data):
-			params = dict(attr)
+            if self.td_counter == 1:
-			if re.match("/torrents/\d+.*", params['href']):
+                if 'size' not in self.current_item:
-				self.current_item = {}
+                    self.current_item['size'] = ''
-				self.current_item['desc_link'] = self.url+params['href'].strip()
+                self.current_item['size']+= data.strip()
-			elif 'torrentreactor.net/download.php' in params['href']:
+            elif self.td_counter == 2:
-				self.td_counter = 0
+                if 'seeds' not in self.current_item:
-				self.current_item['link'] = params['href'].strip()
+                    self.current_item['seeds'] = ''
-				self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
+                self.current_item['seeds']+= data.strip()
            elif self.td_counter == 3:
                if 'leech' not in self.current_item:
                    self.current_item['leech'] = ''
                self.current_item['leech']+= data.strip()
-		def handle_data(self, data):
+        def start_td(self,attr):
-			if self.td_counter == 1:
+            if isinstance(self.td_counter,int):
-				if not self.current_item.has_key('size'):
+                self.td_counter += 1
-					self.current_item['size'] = ''
+                if self.td_counter > 3:
-				self.current_item['size']+= data.strip()
+                    self.td_counter = None
-			elif self.td_counter == 2:
+                    # add item to results
-				if not self.current_item.has_key('seeds'):
+                    if self.current_item:
-					self.current_item['seeds'] = ''
+                        self.current_item['engine_url'] = self.url
-				self.current_item['seeds']+= data.strip()
+                        if not self.current_item['seeds'].isdigit():
-			elif self.td_counter == 3:
+                            self.current_item['seeds'] = 0
-				if not self.current_item.has_key('leech'):
+                        if not self.current_item['leech'].isdigit():
-					self.current_item['leech'] = ''
+                            self.current_item['leech'] = 0
-				self.current_item['leech']+= data.strip()
+                        prettyPrinter(self.current_item)
                        self.has_results = True
                        self.results.append('a')
-		def start_td(self,attr):
+    def search(self, what, cat='all'):
-			if isinstance(self.td_counter,int):
+        i = 0
-				self.td_counter += 1
+        dat = ''
-				if self.td_counter > 3:
+        connection = http("www.torrentreactor.net")
 					self.td_counter = None
 					# add item to results
 					if self.current_item:
 						self.current_item['engine_url'] = self.url
 						if not self.current_item['seeds'].isdigit():
 							self.current_item['seeds'] = 0
 						if not self.current_item['leech'].isdigit():
 							self.current_item['leech'] = 0
 						prettyPrinter(self.current_item)
 						self.has_results = True
 						self.results.append('a')
-	def __init__(self):
+        while True and i<11:
-		self.results = []
+            results = []
-		self.parser = self.SimpleHTMLParser(self.results, self.url)
+            parser = self.SimpleHTMLParser(results, self.url)
            query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
            connection.request("GET", query)
            response = connection.getresponse()
            if response.status != 200:
                break
-	def search(self, what, cat='all'):
+            dat = response.read().decode('utf-8')
 		i = 0
 		dat = ''
 		while True and i<11:
 			results = []
 			parser = self.SimpleHTMLParser(results, self.url)
-			try:
+            parser.feed(dat)
-				dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
+            parser.close()
-			except HTTPError:
+            if len(results) <= 0:
-				break
+                break
            i += 1
-			parser.feed(dat)
+        connection.close()
 			parser.close()
 			if len(results) <= 0:
 				break
 			i += 1
--- a/src/searchengine/nova/engines/versions.txt
+++ b/src/searchengine/nova/engines/versions.txt
@ -1,8 +1,9 @@
 torrentreactor: 1.33
 mininova: 1.51
 piratebay: 2.11
 extratorrent: 1.2
 torrentreactor: 1.35
 mininova: 2.00
 piratebay: 2.11
 extratorrent: 2.0
 kickasstorrents: 1.26
 btdigg: 1.24
 legittorrents: 1.02
 torrentz: 2.13
 legittorrents: 1.03
--- a/src/searchengine/nova3/engines/extratorrent.py
+++ b/src/searchengine/nova3/engines/extratorrent.py
@ -1,4 +1,4 @@
-#VERSION: 1.2
+#VERSION: 2.0
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,135 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
-
+from html.parser import HTMLParser
 from http.client import HTTPConnection as http
 #qBt
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
+from helpers import download_file
 import sgmllib3
 import re
 class extratorrent(object):
-  url = 'http://extratorrent.cc'
+    """ Search engine class """
-  name = 'extratorrent'
+    url = 'http://extratorrent.cc'
-  supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
+    name = 'ExtraTorrent'
-
+    supported_categories = {'all'       : '0',
-  def __init__(self):
+                            'movies'    : '4',
-    self.results = []
+                            'tv'        : '8',
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+                            'music'     : '5',
-
+                            'games'     : '3',
-  def download_torrent(self, info):
+                            'anime'     : '1',
-    print(download_file(info))
+                            'software'  : '7',
-
+                            'books'     : '2',
-  class SimpleSGMLParser(sgmllib3.SGMLParser):
+                            'pictures'  : '6'}
-    def __init__(self, results, url, *args):
+
-      sgmllib3.SGMLParser.__init__(self)
+    def download_torrent(self, info):
-      self.url = url
+        """ Downloader """
-      self.td_counter = None
+        print(download_file(info))
-      self.current_item = None
+
-      self.start_name = False
+    class MyHtmlParseWithBlackJack(HTMLParser):
-      self.results = results
+        """ Parser class """
-      
+        def __init__(self, list_searches, url):
-    def start_a(self, attr):
+            HTMLParser.__init__(self)
-      params = dict(attr)
+            self.url = url
-      #print params
+            self.list_searches = list_searches
-      if 'href' in params and params['href'].startswith("/torrent_download/"):
+            self.current_item = None
-        self.current_item = {}
+            self.cur_item_name = None
-        self.td_counter = 0
+            self.pending_size = False
-        self.start_name = False
+            self.next_queries = True
-        torrent_id = '/'.join(params['href'].split('/')[2:])
+            self.pending_next_queries = False
-        self.current_item['link']=self.url+'/download/'+torrent_id
+
-      elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
+        def handle_starttag(self, tag, attrs):
-        self.current_item['desc_link'] = self.url + params['href'].strip()
+            if self.current_item:
-        self.start_name = True
+                if tag == "a":
-    
+                    params = dict(attrs)
-    def handle_data(self, data):
+                    link = params['href']
-      if self.td_counter == 2:
+
-        if 'name' not in self.current_item and self.start_name:
+                    if not link.startswith("/torrent"):
-          self.current_item['name'] = data.strip()
+                        return
-      elif self.td_counter == 3:
+
-        if 'size' not in self.current_item:
+                    if link[8] == "/":
-          self.current_item['size'] = ''
+                        #description
-        self.current_item['size']+= data.replace("&nbsp;", " ").strip()
+                        self.current_item["desc_link"] = "".join((self.url, link))
-      elif self.td_counter == 4:
+                        #remove view at the beginning
-        if 'seeds' not in self.current_item:
+                        self.current_item["name"] = params["title"][5:]
-          self.current_item['seeds'] = ''
+                        self.pending_size = True
-        self.current_item['seeds']+= data.strip()
+                    elif link[8] == "_":
-      elif self.td_counter == 5:
+                        #download link
-        if 'leech' not in self.current_item:
+                        link = link.replace("torrent_", "", 1)
-          self.current_item['leech'] = ''
+                        self.current_item["link"] = "".join((self.url, link))
-        self.current_item['leech']+= data.strip()
+
-      
+                elif tag == "td":
-    def start_td(self,attr):
+                    if self.pending_size:
-        if isinstance(self.td_counter,int):
+                        self.cur_item_name = "size"
-          self.td_counter += 1
+                        self.current_item["size"] = ""
-          if self.td_counter > 5:
+                        self.pending_size = False
-            self.td_counter = None
+
-            # Display item
+                    for attr in attrs:
                        if attr[0] == "class":
                            if attr[1][0] == "s":
                                self.cur_item_name = "seeds"
                                self.current_item["seeds"] = ""
                            elif attr[1][0] == "l":
                                self.cur_item_name = "leech"
                                self.current_item["leech"] = ""
                        break
            elif tag == "tr":
                for attr in attrs:
                    if attr[0] == "class" and attr[1].startswith("tl"):
                        self.current_item = dict()
                        self.current_item["engine_url"] = self.url
                        break
            elif self.pending_next_queries:
                if tag == "a":
                    params = dict(attrs)
                    self.list_searches.append(params['href'])
                    if params["title"] == "10":
                        self.pending_next_queries = False
                else:
                    self.pending_next_queries = False
            elif self.next_queries:
                if tag == "b" and ("class", "pager_no_link") in attrs:
                    self.next_queries = False
                    self.pending_next_queries = True
        def handle_data(self, data):
            if self.cur_item_name:
                temp = self.current_item[self.cur_item_name]
                self.current_item[self.cur_item_name] = " ".join((temp, data))
                #Due to utf-8 we need to handle data two times if there is space
                if not self.cur_item_name == "size":
                    self.cur_item_name = None
        def handle_endtag(self, tag):
            if self.current_item:
-              self.current_item['engine_url'] = self.url
+                if tag == "tr":
-              if not self.current_item['seeds'].isdigit():
+                    prettyPrinter(self.current_item)
-                self.current_item['seeds'] = 0
+                    self.current_item = None
-              if not self.current_item['leech'].isdigit():
+
-                self.current_item['leech'] = 0
+    def search(self, what, cat="all"):
-              prettyPrinter(self.current_item)
+        """ Performs search """
-              self.results.append('a')
+        connection = http("extratorrent.cc")
-
+
-  def search(self, what, cat='all'):
+        query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
-    ret = []
+
-    i = 1
+        connection.request("GET", query)
-    while True and i<11:
+        response = connection.getresponse()
-      results = []
+        if response.status != 200:
-      parser = self.SimpleSGMLParser(results, self.url)
+            return
-      dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
+
-      results_re = re.compile('(?s)<table class="tl"><thead>.*')
+        list_searches = []
-      for match in results_re.finditer(dat):
+        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
-        res_tab = match.group(0)
+        parser.feed(response.read().decode('utf-8'))
        parser.feed(res_tab)
        parser.close()
        break
      if len(results) <= 0:
        break
      i += 1
        for search_query in list_searches:
            connection.request("GET", search_query)
            response = connection.getresponse()
            parser.feed(response.read().decode('utf-8'))
            parser.close()
        connection.close()
        return
--- a/src/searchengine/nova3/engines/legittorrents.py
+++ b/src/searchengine/nova3/engines/legittorrents.py
@ -1,4 +1,4 @@
-#VERSION: 1.03
+#VERSION: 1.04
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
  name = 'legittorrents'
  supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
  def __init__(self):
    self.results = []
    self.parser = self.SimpleSGMLParser(self.results, self.url)
  def download_torrent(self, info):
    print(download_file(info))
--- a/src/searchengine/nova3/engines/mininova.py
+++ b/src/searchengine/nova3/engines/mininova.py
@ -1,4 +1,4 @@
-#VERSION: 1.51
+#VERSION: 2.00
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,124 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 from html.parser import HTMLParser
 from http.client import HTTPConnection as http
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
+from helpers import download_file
 import sgmllib3
 import re
 class mininova(object):
-  # Mandatory properties
+    """ Search engine class """
-  url = 'http://www.mininova.org'
+    url = 'http://www.mininova.org'
-  name = 'Mininova'
+    name = 'Mininova'
-  supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
+    supported_categories = {'all'       : '0',
-
+                            'movies'    : '4',
-  def __init__(self):
+                            'tv'        : '8',
-    self.results = []
+                            'music'     : '5',
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+                            'games'     : '3',
-
+                            'anime'     : '1',
-  def download_torrent(self, info):
+                            'software'  : '7',
-    print(download_file(info))
+                            'pictures'  : '6',
-
+                            'books'     : '2'}
-  class SimpleSGMLParser(sgmllib3.SGMLParser):
+
-    def __init__(self, results, url, *args):
+    def download_torrent(self, info):
-      sgmllib3.SGMLParser.__init__(self)
+        print(download_file(info))
-      self.url = url
+
-      self.td_counter = None
+    class MyHtmlParseWithBlackJack(HTMLParser):
-      self.current_item = None
+        """ Parser class """
-      self.results = results
+        def __init__(self, list_searches, url):
-      
+            HTMLParser.__init__(self)
-    def start_a(self, attr):
+            self.list_searches = list_searches
-      params = dict(attr)
+            self.url = url
-      #print params
+            self.table_results = False
-      if 'href' in params:
+            self.current_item = None
-        if params['href'].startswith("/get/"):
+            self.cur_item_name = None
-          self.current_item = {}
+            self.next_queries = True
-          self.td_counter = 0
+
-          self.current_item['link']=self.url+params['href'].strip()
+        def handle_starttag_tr(self, _):
-        elif params['href'].startswith("/tor/") and self.current_item is not None:
+            """ Handler of tr start tag """
-          self.current_item['desc_link']=self.url+params['href'].strip()
+            self.current_item = dict()
-    
+
-    def handle_data(self, data):
+        def handle_starttag_a(self, attrs):
-      if self.td_counter == 0:
+            """ Handler of a start tag """
-        if 'name' not in self.current_item:
+            params = dict(attrs)
-          self.current_item['name'] = ''
+            link = params["href"]
-        self.current_item['name']+= data
+
-      elif self.td_counter == 1:
+            if link.startswith("/get/"):
-        if 'size' not in self.current_item:
+                #download link
-          self.current_item['size'] = ''
+                self.current_item["link"] = "".join((self.url, link))
-        self.current_item['size']+= data.strip()
+            elif link.startswith("/tor/"):
-      elif self.td_counter == 2:
+                #description
-        if 'seeds' not in self.current_item:
+                self.current_item["desc_link"] = "".join((self.url, link))
-          self.current_item['seeds'] = ''
+                self.cur_item_name = "name"
-        self.current_item['seeds']+= data.strip()
+                self.current_item["name"] = ""
-      elif self.td_counter == 3:
+            elif self.next_queries and link.startswith("/search"):
-        if 'leech' not in self.current_item:
+                if params["title"].startswith("Page"):
-          self.current_item['leech'] = ''
+                    self.list_searches.append(link)
-        self.current_item['leech']+= data.strip()
+
-      
+        def handle_starttag_td(self, attrs):
-    def start_td(self,attr):
+            """ Handler of td start tag """
-        if isinstance(self.td_counter,int):
+            if ("align", "right") in attrs:
-          self.td_counter += 1
+                if not "size" in self.current_item.keys():
-          if self.td_counter > 4:
+                    self.cur_item_name = "size"
-            self.td_counter = None
+                    self.current_item["size"] = ""
-            # Display item
+
-            if self.current_item:
+        def handle_starttag_span(self, attrs):
-              self.current_item['engine_url'] = self.url
+            """ Handler of span start tag """
-              if not self.current_item['seeds'].isdigit():
+            if ("class", "g") in attrs:
-                self.current_item['seeds'] = 0
+                self.cur_item_name = "seeds"
-              if not self.current_item['leech'].isdigit():
+                self.current_item["seeds"] = ""
-                self.current_item['leech'] = 0
+            elif ("class", "b") in attrs:
-              prettyPrinter(self.current_item)
+                self.cur_item_name = "leech"
-              self.results.append('a')
+                self.current_item["leech"] = ""
-
+
-  def search(self, what, cat='all'):
+        def handle_starttag(self, tag, attrs):
-    ret = []
+            """ Parser's start tag handler """
-    i = 1
+            if self.table_results:
-    while True and i<11:
+                dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
-      results = []
+                if dispatcher:
-      parser = self.SimpleSGMLParser(results, self.url)
+                    dispatcher(attrs)
-      dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
+
-      results_re = re.compile('(?s)<h1>Search results for.*')
+            elif tag == "table":
-      for match in results_re.finditer(dat):
+                self.table_results = ("class", "maintable") in attrs
-        res_tab = match.group(0)
+
-        parser.feed(res_tab)
+        def handle_endtag(self, tag):
            """ Parser's end tag handler """
            if tag == "tr" and self.current_item:
                self.current_item["engine_url"] = self.url
                prettyPrinter(self.current_item)
                self.current_item = None
            elif self.cur_item_name:
                if tag == "a" or tag == "span":
                    self.cur_item_name = None
        def handle_data(self, data):
            """ Parser's data handler """
            if self.cur_item_name:
                temp = self.current_item[self.cur_item_name]
                self.current_item[self.cur_item_name] = " ".join((temp, data))
    def search(self, what, cat="all"):
        """ Performs search """
        connection = http("www.mininova.org")
        query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
        connection.request("GET", query)
        response = connection.getresponse()
        if response.status != 200:
            return
        list_searches = []
        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
        parser.feed(response.read().decode('utf-8'))
        parser.close()
        break
      if len(results) <= 0:
        break
      i += 1
        parser.next_queries = False
        for search_query in list_searches:
            connection.request("GET", search_query)
            response = connection.getresponse()
            parser.feed(response.read().decode('utf-8'))
            parser.close()
        connection.close()
        return
--- a/src/searchengine/nova3/engines/torrentreactor.py
+++ b/src/searchengine/nova3/engines/torrentreactor.py
@ -1,4 +1,4 @@
-#VERSION: 1.33
+#VERSION: 1.35
 #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
 #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
 #              Bruno Barbieri (brunorex@gmail.com)
@ -28,91 +28,94 @@
 # POSSIBILITY OF SUCH DAMAGE.
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
+from helpers import download_file
-from urllib import error, parse
+from urllib import parse
 from html.parser import HTMLParser
 from http.client import HTTPConnection as http
 import re
 class torrentreactor(object):
-	url = 'http://www.torrentreactor.net'
+    url = 'http://www.torrentreactor.net'
-	name = 'TorrentReactor.Net'
+    name = 'TorrentReactor.Net'
-	supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
+    supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
-	def download_torrent(self, info):
+    def download_torrent(self, info):
-		print(download_file(info))
+        print(download_file(info))
-	class SimpleHTMLParser(HTMLParser):
+    class SimpleHTMLParser(HTMLParser):
-		def __init__(self, results, url, *args):
+        def __init__(self, results, url, *args):
-			HTMLParser.__init__(self)
+            HTMLParser.__init__(self)
-			self.td_counter = None
+            self.td_counter = None
-			self.current_item = None
+            self.current_item = None
-			self.results = results
+            self.results = results
-			self.id = None
+            self.id = None
-			self.url = url
+            self.url = url
-			self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
+            self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
-		def handle_starttag(self, tag, attrs):
+        def handle_starttag(self, tag, attrs):
-			if tag in self.dispatcher:
+            if tag in self.dispatcher:
-				self.dispatcher[tag](attrs)
+                self.dispatcher[tag](attrs)
-		def start_a(self, attr):
+        def start_a(self, attr):
-			params = dict(attr)
+            params = dict(attr)
-			if re.match("/torrents/\d+.*", params['href']):
+            if re.match("/torrents/\d+.*", params['href']):
-				self.current_item = {}
+                self.current_item = {}
-				self.current_item['desc_link'] = self.url+params['href'].strip()
+                self.current_item['desc_link'] = self.url+params['href'].strip()
-			elif 'torrentreactor.net/download.php' in params['href']:
+            elif 'torrentreactor.net/download.php' in params['href']:
-				self.td_counter = 0
+                self.td_counter = 0
-				self.current_item['link'] = params['href'].strip()
+                self.current_item['link'] = params['href'].strip()
-				self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
+                self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
-		def handle_data(self, data):
+        def handle_data(self, data):
-			if self.td_counter == 1:
+            if self.td_counter == 1:
-				if 'size' not in self.current_item:
+                if 'size' not in self.current_item:
-					self.current_item['size'] = ''
+                    self.current_item['size'] = ''
-				self.current_item['size']+= data.strip()
+                self.current_item['size']+= data.strip()
-			elif self.td_counter == 2:
+            elif self.td_counter == 2:
-				if 'seeds' not in self.current_item:
+                if 'seeds' not in self.current_item:
-					self.current_item['seeds'] = ''
+                    self.current_item['seeds'] = ''
-				self.current_item['seeds']+= data.strip()
+                self.current_item['seeds']+= data.strip()
-			elif self.td_counter == 3:
+            elif self.td_counter == 3:
-				if 'leech' not in self.current_item:
+                if 'leech' not in self.current_item:
-					self.current_item['leech'] = ''
+                    self.current_item['leech'] = ''
-				self.current_item['leech']+= data.strip()
+                self.current_item['leech']+= data.strip()
-		def start_td(self,attr):
+        def start_td(self,attr):
-			if isinstance(self.td_counter,int):
+            if isinstance(self.td_counter,int):
-				self.td_counter += 1
+                self.td_counter += 1
-				if self.td_counter > 3:
+                if self.td_counter > 3:
-					self.td_counter = None
+                    self.td_counter = None
-					# add item to results
+                    # add item to results
-					if self.current_item:
+                    if self.current_item:
-						self.current_item['engine_url'] = self.url
+                        self.current_item['engine_url'] = self.url
-						if not self.current_item['seeds'].isdigit():
+                        if not self.current_item['seeds'].isdigit():
-							self.current_item['seeds'] = 0
+                            self.current_item['seeds'] = 0
-						if not self.current_item['leech'].isdigit():
+                        if not self.current_item['leech'].isdigit():
-							self.current_item['leech'] = 0
+                            self.current_item['leech'] = 0
-						prettyPrinter(self.current_item)
+                        prettyPrinter(self.current_item)
-						self.has_results = True
+                        self.has_results = True
-						self.results.append('a')
+                        self.results.append('a')
-	def __init__(self):
+    def search(self, what, cat='all'):
-		self.results = []
+        i = 0
-		self.parser = self.SimpleHTMLParser(self.results, self.url)
+        dat = ''
        connection = http("www.torrentreactor.net")
-	def search(self, what, cat='all'):
+        while True and i<11:
-		i = 0
+            results = []
-		dat = ''
+            parser = self.SimpleHTMLParser(results, self.url)
-		while True and i<11:
+            query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
-			results = []
+            connection.request("GET", query)
-			parser = self.SimpleHTMLParser(results, self.url)
+            response = connection.getresponse()
            if response.status != 200:
                break
-			try:
+            dat = response.read().decode('utf-8')
 				dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
 			except error.HTTPError:
 				break
-			parser.feed(dat)
+            parser.feed(dat)
-			parser.close()
+            parser.close()
-			if len(results) <= 0:
+            if len(results) <= 0:
-				break
+                break
-			i += 1
+            i += 1
        connection.close()
--- a/src/searchengine/nova3/engines/versions.txt
+++ b/src/searchengine/nova3/engines/versions.txt
@ -1,8 +1,9 @@
 torrentreactor: 1.33
 mininova: 1.51
 piratebay: 2.11
 extratorrent: 1.2
 torrentreactor: 1.35
 mininova: 2.00
 piratebay: 2.11
 extratorrent: 2.0
 kickasstorrents: 1.26
 btdigg: 1.23
 legittorrents: 1.03
 torrentz: 2.13
 legittorrents: 1.04