Merge pull request #2550 from DoumanAsh/multiprocessor_search

[search engine] Replace threading with multiprocessing
10 years ago · 3d40834c57
22 changed files with 1683 additions and 1503 deletions
--- a/src/searchengine/nova/engines/extratorrent.py
+++ b/src/searchengine/nova/engines/extratorrent.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.2
+#VERSION: 2.0
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)

 # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,139 @@
				@@ -25,92 +25,139 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-
+from HTMLParser import HTMLParser
+from httplib import HTTPConnection as http
+#qBt
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib
-import re
+from helpers import download_file

 class extratorrent(object):
+    """ Search engine class """
    url = 'http://extratorrent.cc'
-  name = 'extratorrent'
-  supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
-
-  def __init__(self):
-    self.results = []
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+    name = 'ExtraTorrent'
+    supported_categories = {'all'       : '0',
+                            'movies'    : '4',
+                            'tv'        : '8',
+                            'music'     : '5',
+                            'games'     : '3',
+                            'anime'     : '1',
+                            'software'  : '7',
+                            'books'     : '2',
+                            'pictures'  : '6'}

    def download_torrent(self, info):
-    print download_file(info)
+        """ Downloader """
+        print(download_file(info))

-  class SimpleSGMLParser(sgmllib.SGMLParser):
-    def __init__(self, results, url, *args):
-      sgmllib.SGMLParser.__init__(self)
+    class MyHtmlParseWithBlackJack(HTMLParser):
+        """ Parser class """
+        def __init__(self, list_searches, url):
+            HTMLParser.__init__(self)
            self.url = url
-      self.td_counter = None
+            self.list_searches = list_searches
            self.current_item = None
-      self.start_name = False
-      self.results = results
-      
-    def start_a(self, attr):
-      params = dict(attr)
-      #print params
-      if params.has_key('href') and params['href'].startswith("/torrent_download/"):
-        self.current_item = {}
-        self.td_counter = 0
-        self.start_name = False
-        torrent_id = '/'.join(params['href'].split('/')[2:])
-        self.current_item['link']=self.url+'/download/'+torrent_id
-      elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
-        self.current_item['desc_link'] = self.url + params['href'].strip()
-        self.start_name = True
+            self.cur_item_name = None
+            self.pending_size = False
+            self.next_queries = True
+            self.pending_next_queries = False
+            self.next_queries_set = set()
+
+        def handle_starttag(self, tag, attrs):
+            if self.current_item:
+                if tag == "a":
+                    params = dict(attrs)
+                    link = params['href']
+
+                    if not link.startswith("/torrent"):
+                        return
+
+                    if link[8] == "/":
+                        #description
+                        self.current_item["desc_link"] = "".join((self.url, link))
+                        #remove view at the beginning
+                        self.current_item["name"] = params["title"][5:].replace("&amp;", "&")
+                        self.pending_size = True
+                    elif link[8] == "_":
+                        #download link
+                        link = link.replace("torrent_", "", 1)
+                        self.current_item["link"] = "".join((self.url, link))
+
+                elif tag == "td":
+                    if self.pending_size:
+                        self.cur_item_name = "size"
+                        self.current_item["size"] = ""
+                        self.pending_size = False
+
+                    for attr in attrs:
+                        if attr[0] == "class":
+                            if attr[1][0] == "s":
+                                self.cur_item_name = "seeds"
+                                self.current_item["seeds"] = ""
+                            elif attr[1][0] == "l":
+                                self.cur_item_name = "leech"
+                                self.current_item["leech"] = ""
+                        break
+
+
+            elif tag == "tr":
+                for attr in attrs:
+                    if attr[0] == "class" and attr[1].startswith("tl"):
+                        self.current_item = dict()
+                        self.current_item["engine_url"] = self.url
+                        break
+
+            elif self.pending_next_queries:
+                if tag == "a":
+                    params = dict(attrs)
+                    if params["title"] in self.next_queries_set:
+                        return
+                    self.list_searches.append(params['href'])
+                    self.next_queries_set.add(params["title"])
+                    if params["title"] == "10":
+                        self.pending_next_queries = False
+                else:
+                    self.pending_next_queries = False
+
+            elif self.next_queries:
+                if tag == "b" and ("class", "pager_no_link") in attrs:
+                    self.next_queries = False
+                    self.pending_next_queries = True

        def handle_data(self, data):
-      if self.td_counter == 2:
-        if not self.current_item.has_key('name') and self.start_name:
-          self.current_item['name'] = data.strip()
-      elif self.td_counter == 3:
-        if not self.current_item.has_key('size'):
-          self.current_item['size'] = ''
-        self.current_item['size']+= data.replace("&nbsp;", " ").strip()
-      elif self.td_counter == 4:
-        if not self.current_item.has_key('seeds'):
-          self.current_item['seeds'] = ''
-        self.current_item['seeds']+= data.strip()
-      elif self.td_counter == 5:
-        if not self.current_item.has_key('leech'):
-          self.current_item['leech'] = ''
-        self.current_item['leech']+= data.strip()
-      
-    def start_td(self,attr):
-        if isinstance(self.td_counter,int):
-          self.td_counter += 1
-          if self.td_counter > 5:
-            self.td_counter = None
-            # Display item
+            if self.cur_item_name:
+                temp = self.current_item[self.cur_item_name]
+                self.current_item[self.cur_item_name] = " ".join((temp, data))
+                #Due to utf-8 we need to handle data two times if there is space
+                if not self.cur_item_name == "size":
+                    self.cur_item_name = None
+
+        def handle_endtag(self, tag):
            if self.current_item:
-              self.current_item['engine_url'] = self.url
-              if not self.current_item['seeds'].isdigit():
-                self.current_item['seeds'] = 0
-              if not self.current_item['leech'].isdigit():
-                self.current_item['leech'] = 0
+                if tag == "tr":
                    prettyPrinter(self.current_item)
-              self.results.append('a')
-
-  def search(self, what, cat='all'):
-    ret = []
-    i = 1
-    while True and i<11:
-      results = []
-      parser = self.SimpleSGMLParser(results, self.url)
-      dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
-      results_re = re.compile('(?s)<table class="tl"><thead>.*')
-      for match in results_re.finditer(dat):
-        res_tab = match.group(0)
-        parser.feed(res_tab)
+                    self.current_item = None
+
+    def search(self, what, cat="all"):
+        """ Performs search """
+        connection = http("extratorrent.cc")
+
+        query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
+
+        connection.request("GET", query)
+        response = connection.getresponse()
+        if response.status != 200:
+            return
+
+        list_searches = []
+        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+        parser.feed(response.read().decode('utf-8'))
+        parser.close()
+
+        for search_query in list_searches:
+            connection.request("GET", search_query)
+            response = connection.getresponse()
+            parser.feed(response.read().decode('utf-8'))
            parser.close()
-        break
-      if len(results) <= 0:
-        break
-      i += 1

+        connection.close()
+        return
--- a/src/searchengine/nova/engines/legittorrents.py
+++ b/src/searchengine/nova/engines/legittorrents.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.02
+#VERSION: 1.03
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)

 # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
				@@ -36,10 +36,6 @@ class legittorrents(object):
  name = 'legittorrents'
  supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}

-  def __init__(self):
-    self.results = []
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
-
  def download_torrent(self, info):
    print download_file(info)

--- a/src/searchengine/nova/engines/mininova.py
+++ b/src/searchengine/nova/engines/mininova.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.51
+#VERSION: 2.00
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)

@ -26,90 +26,123 @@
				@@ -26,90 +26,123 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

+from HTMLParser import HTMLParser
+from httplib import HTTPConnection as http
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib
-import re
+from helpers import download_file

 class mininova(object):
-  # Mandatory properties
+    """ Search engine class """
    url = 'http://www.mininova.org'
    name = 'Mininova'
-  supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
-
-  def __init__(self):
-    self.results = []
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+    supported_categories = {'all'       : '0',
+                            'movies'    : '4',
+                            'tv'        : '8',
+                            'music'     : '5',
+                            'games'     : '3',
+                            'anime'     : '1',
+                            'software'  : '7',
+                            'pictures'  : '6',
+                            'books'     : '2'}

    def download_torrent(self, info):
-    print download_file(info)
+        print(download_file(info))

-  class SimpleSGMLParser(sgmllib.SGMLParser):
-    def __init__(self, results, url, *args):
-      sgmllib.SGMLParser.__init__(self)
+    class MyHtmlParseWithBlackJack(HTMLParser):
+        """ Parser class """
+        def __init__(self, list_searches, url):
+            HTMLParser.__init__(self)
+            self.list_searches = list_searches
            self.url = url
-      self.td_counter = None
+            self.table_results = False
            self.current_item = None
-      self.results = results
-      
-    def start_a(self, attr):
-      params = dict(attr)
-      #print params
-      if params.has_key('href'):
-        if params['href'].startswith("/get/"):
-          self.current_item = {}
-          self.td_counter = 0
-          self.current_item['link']=self.url+params['href'].strip()
-        elif params['href'].startswith("/tor/") and self.current_item is not None:
-          self.current_item['desc_link']=self.url+params['href'].strip()
+            self.cur_item_name = None
+            self.next_queries = True

-    def handle_data(self, data):
-      if self.td_counter == 0:
-        if not self.current_item.has_key('name'):
-          self.current_item['name'] = ''
-        self.current_item['name']+= data
-      elif self.td_counter == 1:
-        if not self.current_item.has_key('size'):
-          self.current_item['size'] = ''
-        self.current_item['size']+= data.strip()
-      elif self.td_counter == 2:
-        if not self.current_item.has_key('seeds'):
-          self.current_item['seeds'] = ''
-        self.current_item['seeds']+= data.strip()
-      elif self.td_counter == 3:
-        if not self.current_item.has_key('leech'):
-          self.current_item['leech'] = ''
-        self.current_item['leech']+= data.strip()
-      
-    def start_td(self,attr):
-        if isinstance(self.td_counter,int):
-          self.td_counter += 1
-          if self.td_counter > 4:
-            self.td_counter = None
-            # Display item
-            if self.current_item:
-              self.current_item['engine_url'] = self.url
-              if not self.current_item['seeds'].isdigit():
-                self.current_item['seeds'] = 0
-              if not self.current_item['leech'].isdigit():
-                self.current_item['leech'] = 0
+        def handle_starttag_tr(self, _):
+            """ Handler of tr start tag """
+            self.current_item = dict()
+
+        def handle_starttag_a(self, attrs):
+            """ Handler of a start tag """
+            params = dict(attrs)
+            link = params["href"]
+
+            if link.startswith("/tor/"):
+                #description
+                self.current_item["desc_link"] = "".join((self.url, link))
+                #get download link from description by id
+                self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
+                self.cur_item_name = "name"
+                self.current_item["name"] = ""
+            elif self.next_queries and link.startswith("/search"):
+                if params["title"].startswith("Page"):
+                    self.list_searches.append(link)
+
+        def handle_starttag_td(self, attrs):
+            """ Handler of td start tag """
+            if ("align", "right") in attrs:
+                if not "size" in self.current_item:
+                    self.cur_item_name = "size"
+                    self.current_item["size"] = ""
+
+        def handle_starttag_span(self, attrs):
+            """ Handler of span start tag """
+            if ("class", "g") in attrs:
+                self.cur_item_name = "seeds"
+                self.current_item["seeds"] = ""
+            elif ("class", "b") in attrs:
+                self.cur_item_name = "leech"
+                self.current_item["leech"] = ""
+
+        def handle_starttag(self, tag, attrs):
+            """ Parser's start tag handler """
+            if self.table_results:
+                dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
+                if dispatcher:
+                    dispatcher(attrs)
+
+            elif tag == "table":
+                self.table_results = ("class", "maintable") in attrs
+
+        def handle_endtag(self, tag):
+            """ Parser's end tag handler """
+            if tag == "tr" and self.current_item:
+                self.current_item["engine_url"] = self.url
                prettyPrinter(self.current_item)
-              self.results.append('a')
-
-  def search(self, what, cat='all'):
-    ret = []
-    i = 1
-    while True and i<11:
-      results = []
-      parser = self.SimpleSGMLParser(results, self.url)
-      dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
-      results_re = re.compile('(?s)<h1>Search results for.*')
-      for match in results_re.finditer(dat):
-        res_tab = match.group(0)
-        parser.feed(res_tab)
+                self.current_item = None
+            elif self.cur_item_name:
+                if tag == "a" or tag == "td":
+                    self.cur_item_name = None
+
+        def handle_data(self, data):
+            """ Parser's data handler """
+            if self.cur_item_name:
+                temp = self.current_item[self.cur_item_name]
+                self.current_item[self.cur_item_name] = " ".join((temp, data))
+
+    def search(self, what, cat="all"):
+        """ Performs search """
+        connection = http("www.mininova.org")
+
+        query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
+
+        connection.request("GET", query)
+        response = connection.getresponse()
+        if response.status != 200:
+            return
+
+        list_searches = []
+        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+        parser.feed(response.read().decode('utf-8'))
+        parser.close()
+
+        parser.next_queries = False
+        for search_query in list_searches:
+            connection.request("GET", search_query)
+            response = connection.getresponse()
+            parser.feed(response.read().decode('utf-8'))
            parser.close()
-        break
-      if len(results) <= 0:
-        break
-      i += 1

+        connection.close()
+        return
--- a/src/searchengine/nova/engines/torrentreactor.png
+++ b/src/searchengine/nova/engines/torrentreactor.png
--- a/src/searchengine/nova/engines/torrentreactor.py
+++ b/src/searchengine/nova/engines/torrentreactor.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.33
+#VERSION: 1.36
 #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
 #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
 #              Bruno Barbieri (brunorex@gmail.com)
@ -28,19 +28,18 @@
				@@ -28,19 +28,18 @@
 # POSSIBILITY OF SUCH DAMAGE.

 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-from urllib2 import HTTPError 
-from HTMLParser import HTMLParser
+from helpers import download_file, retrieve_url
 import urllib
-import re
+from HTMLParser import HTMLParser
+from re import compile as re_compile

 class torrentreactor(object):
    url = 'http://www.torrentreactor.net'
-	name = 'TorrentReactor.Net'
+    name = 'TorrentReactor'
    supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}

    def download_torrent(self, info):
-		print download_file(info)
+        print(download_file(info))

    class SimpleHTMLParser(HTMLParser):
        def __init__(self, results, url, *args):
@ -50,6 +49,7 @@ class torrentreactor(object):
				@@ -50,6 +49,7 @@ class torrentreactor(object):
            self.results = results
            self.id = None
            self.url = url
+            self.torrents_matcher = re_compile("/torrents/\d+.*")
            self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }

        def handle_starttag(self, tag, attrs):
@ -58,7 +58,7 @@ class torrentreactor(object):
				@@ -58,7 +58,7 @@ class torrentreactor(object):

        def start_a(self, attr):
            params = dict(attr)
-			if re.match("/torrents/\d+.*", params['href']):
+            if self.torrents_matcher.match(params['href']):
                self.current_item = {}
                self.current_item['desc_link'] = self.url+params['href'].strip()
            elif 'torrentreactor.net/download.php' in params['href']:
@ -68,15 +68,15 @@ class torrentreactor(object):
				@@ -68,15 +68,15 @@ class torrentreactor(object):

        def handle_data(self, data):
            if self.td_counter == 1:
-				if not self.current_item.has_key('size'):
+                if 'size' not in self.current_item:
                    self.current_item['size'] = ''
                self.current_item['size']+= data.strip()
            elif self.td_counter == 2:
-				if not self.current_item.has_key('seeds'):
+                if 'seeds' not in self.current_item:
                    self.current_item['seeds'] = ''
                self.current_item['seeds']+= data.strip()
            elif self.td_counter == 3:
-				if not self.current_item.has_key('leech'):
+                if 'leech' not in self.current_item:
                    self.current_item['leech'] = ''
                self.current_item['leech']+= data.strip()

@ -96,22 +96,14 @@ class torrentreactor(object):
				@@ -96,22 +96,14 @@ class torrentreactor(object):
                        self.has_results = True
                        self.results.append('a')

-	def __init__(self):
-		self.results = []
-		self.parser = self.SimpleHTMLParser(self.results, self.url)
-
    def search(self, what, cat='all'):
        i = 0
        dat = ''
-		while True and i<11:
+
+        while i < 11:
            results = []
            parser = self.SimpleHTMLParser(results, self.url)
-
-			try:
-				dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
-			except HTTPError:
-				break
-
+            dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
            parser.feed(dat)
            parser.close()
            if len(results) <= 0:
--- a/src/searchengine/nova/engines/torrentz.py
+++ b/src/searchengine/nova/engines/torrentz.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 2.13
+#VERSION: 2.14
 #AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)

 # Redistribution and use in source and binary forms, with or without
@ -105,7 +105,7 @@ class torrentz(object):
				@@ -105,7 +105,7 @@ class torrentz(object):
        while i < 6:
            results_list = []
            # "what" is already urlencoded
-            html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i))
+            html = retrieve_url('%s/any?f=%s&p=%d' % (self.url, what, i))
            parser = self.MyHtmlParser(results_list, self.url, trackers)
            parser.feed(html)
            parser.close()
--- a/src/searchengine/nova/engines/versions.txt
+++ b/src/searchengine/nova/engines/versions.txt
@ -1,8 +1,9 @@
				@@ -1,8 +1,9 @@
-torrentreactor: 1.33
-mininova: 1.51
-piratebay: 2.11
 extratorrent: 1.2
+torrentreactor: 1.36
+mininova: 2.00
+piratebay: 2.11
+extratorrent: 2.0
 kickasstorrents: 1.26
 btdigg: 1.24
-legittorrents: 1.02
-torrentz: 2.13
+torrentz: 2.14
+legittorrents: 1.03
--- a/src/searchengine/nova/nova2.py
+++ b/src/searchengine/nova/nova2.py
@ -26,7 +26,7 @@
				@@ -26,7 +26,7 @@
 # POSSIBILITY OF SUCH DAMAGE.


-#VERSION: 1.32
+#VERSION: 1.40

 # Author:
 #  Fabien Devaux <fab AT gnux DOT info>
@ -37,16 +37,15 @@
				@@ -37,16 +37,15 @@
 #
 # Licence: BSD

-import sys
-import threading
-import os
-import glob
 import urllib
-
-import fix_encoding
+from os import path
+from glob import glob
+from sys import argv
+from multiprocessing import Pool, cpu_count
+from fix_encoding import fix_encoding

 THREADED = True
-CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
+CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}

 ################################################################################
 # Every engine should have a "search" method taking
@ -56,34 +55,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
				@@ -56,34 +55,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
 # As a convention, try to list results by decrasing number of seeds or similar
 ################################################################################

-supported_engines = []
+def initialize_engines():
+    """ Import available engines
+
+        Return list of available engines
+    """
+    supported_engines = []

-engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
-for engine in engines:
-	e = engine.split(os.sep)[-1][:-3]
-	if len(e.strip()) == 0: continue
-	if e.startswith('_'): continue
+    engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
+    for engine in engines:
+        engi = path.basename(engine).split('.')[0].strip()
+        if len(engi) == 0 or engi.startswith('_'):
+            continue
        try:
-		exec "from engines.%s import %s"%(e,e)
-		supported_engines.append(e)
+            #import engines.[engine]
+            engine_module = __import__(".".join(("engines", engi)))
+            #get low-level module
+            engine_module = getattr(engine_module, engi)
+            #bind class name
+            globals()[engi] = getattr(engine_module, engi)
+            supported_engines.append(engi)
        except:
            pass

-def engineToXml(short_name):
-	xml = "<%s>\n"%short_name
-	exec "engine = %s()"%short_name
-	xml += "<name>%s</name>\n"%engine.name
-	xml += "<url>%s</url>\n"%engine.url
-	xml += "<categories>"
-	if hasattr(engine, 'supported_categories'):
-		supported_categories = engine.supported_categories.keys()
-		supported_categories.remove('all')
-		xml += " ".join(supported_categories)
-	xml += "</categories>\n"
-	xml += "</%s>\n"%short_name
-	return xml
-
-def displayCapabilities():
+    return supported_engines
+
+def engines_to_xml(supported_engines):
+    """ Generates xml for supported engines """
+    tab = " " * 4
+
+    for short_name in supported_engines:
+        search_engine = globals()[short_name]()
+
+        supported_categories = ""
+        if hasattr(search_engine, "supported_categories"):
+            supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
+                                             if key is not "all"))
+
+        yield  "".join((tab, "<", short_name, ">\n",
+                        tab, tab, "<name>", search_engine.name, "</name>\n",
+                        tab, tab, "<url>", search_engine.url, "</url>\n",
+                        tab, tab, "<categories>", supported_categories, "</categories>\n",
+                        tab, "</", short_name, ">\n"))
+
+def displayCapabilities(supported_engines):
    """
    Display capabilities in XML format
    <capabilities>
@ -94,70 +109,75 @@ def displayCapabilities():
				@@ -94,70 +109,75 @@ def displayCapabilities():
      </engine_short_name>
    </capabilities>
    """
-	xml = "<capabilities>"
-	for short_name in supported_engines:
-		xml += engineToXml(short_name)
-	xml += "</capabilities>"
-	print xml
-
-class EngineLauncher(threading.Thread):
-	def __init__(self, engine, what, cat='all'):
-		threading.Thread.__init__(self)
-		self.engine = engine
-		self.what = what
-		self.cat = cat
-	def run(self):
-		if hasattr(self.engine, 'supported_categories'):
-			if self.cat == 'all' or self.cat in self.engine.supported_categories.keys():
-				self.engine.search(self.what, self.cat)
-		elif self.cat == 'all':
-				self.engine.search(self.what)
-
-if __name__ == '__main__':
-	# Make sure we enforce utf-8 encoding
-	fix_encoding.fix_encoding()
-
-	if len(sys.argv) < 2:
-		raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
-				(','.join(supported_engines)))
-
-	if len(sys.argv) == 2:
-		if sys.argv[1] == "--capabilities":
-			displayCapabilities()
-			sys.exit(0)
+    xml = "".join(("<capabilities>\n",
+                   "".join(engines_to_xml(supported_engines)),
+                   "</capabilities>"))
+    print(xml)
+
+def run_search(engine_list):
+    """ Run search in engine
+
+        @param engine_list List with engine, query and category
+
+        @retval False if any exceptions occured
+        @retval True  otherwise
+    """
+    engine, what, cat = engine_list
+    try:
+        engine = engine()
+        #avoid exceptions due to invalid category
+        if hasattr(engine, 'supported_categories'):
+            cat = cat if cat in engine.supported_categories else "all"
+            engine.search(what, cat)
        else:
-			raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
-					(','.join(supported_engines)))
+            engine.search(what)
+        return True
+    except:
+        return False
+
+def main(args):
+    fix_encoding()
+    supported_engines = initialize_engines()

-	engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
+    if not args:
+        raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
+                         "available engines: %s" % (','.join(supported_engines)))
+
+    elif args[0] == "--capabilities":
+        displayCapabilities(supported_engines)
+        return
+
+    elif len(args) < 3:
+        raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
+                         "available engines: %s" % (','.join(supported_engines)))
+
+    #get only unique engines with set
+    engines_list = set(e.lower() for e in args[0].strip().split(','))

    if 'all' in engines_list:
        engines_list = supported_engines
+    else:
+        #discard un-supported engines
+        engines_list = [engine for engine in engines_list
+                        if engine in supported_engines]

-	cat = sys.argv[2].lower()
+    if not engines_list:
+        #engine list is empty. Nothing to do here
+        return
+
+    cat = args[1].lower()

    if cat not in CATEGORIES:
-		raise SystemExit('Invalid category!')
+        raise SystemExit(" - ".join(('Invalid category', cat)))

-	what = urllib.quote(' '.join(sys.argv[3:]))
+    what = urllib.quote(' '.join(args[2:]))

-	threads = []
-	for engine in engines_list:
-		try:
    if THREADED:
-				exec "l = EngineLauncher(%s(), what, cat)"%engine
-				threads.append(l)
-				l.start()
+        #child process spawning is controlled min(number of searches, number of cpu)
+        pool = Pool(min(len(engines_list), cpu_count()))
+        pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
    else:
-				exec "e = %s()"%engine
-				if hasattr(engine, 'supported_categories'):
-					if cat == 'all' or cat in e.supported_categories.keys():
-						e.search(what, cat)
-				elif self.cat == 'all':
-						e.search(what)
-						engine().search(what, cat)
-		except:
-			pass
-	if THREADED:
-		for t in threads:
-			t.join()
+        map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
+
+if __name__ == "__main__":
+    main(argv[1:])
--- a/src/searchengine/nova/nova2dl.py
+++ b/src/searchengine/nova/nova2dl.py
@ -25,7 +25,7 @@
				@@ -25,7 +25,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-#VERSION: 1.10
+#VERSION: 1.20

 # Author:
 #  Christophe DUMEZ (chris@qbittorrent.org)
@ -43,8 +43,8 @@ for engine in engines:
				@@ -43,8 +43,8 @@ for engine in engines:
    if len(e.strip()) == 0: continue
    if e.startswith('_'): continue
    try:
-		exec "from engines.%s import %s"%(e,e)
-		exec "engine_url = %s.url"%e
+        exec("from engines.%s import %s"%(e,e))
+        exec("engine_url = %s.url"%e)
        supported_engines[engine_url] = e
    except:
        pass
@ -54,11 +54,11 @@ if __name__ == '__main__':
				@@ -54,11 +54,11 @@ if __name__ == '__main__':
        raise SystemExit('./nova2dl.py engine_url download_parameter')
    engine_url = sys.argv[1].strip()
    download_param = sys.argv[2].strip()
-	if engine_url not in supported_engines.keys():
+    if engine_url not in list(supported_engines.keys()):
        raise SystemExit('./nova2dl.py: this engine_url was not recognized')
-	exec "engine = %s()"%supported_engines[engine_url]
+    exec("engine = %s()"%supported_engines[engine_url])
    if hasattr(engine, 'download_torrent'):
        engine.download_torrent(download_param)
    else:
-		print download_file(download_param)
+        print(download_file(download_param))
    sys.exit(0)
--- a/src/searchengine/nova/novaprinter.py
+++ b/src/searchengine/nova/novaprinter.py
@ -25,20 +25,19 @@
				@@ -25,20 +25,19 @@
 # POSSIBILITY OF SUCH DAMAGE.

 import sys, codecs
+from io import open

 # Force UTF-8 printing
 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

 def prettyPrinter(dictionary):
-	# Convert everything to unicode for safe printing
-	for key,value in dictionary.items():
-		if isinstance(dictionary[key], str):
-			dictionary[key] = unicode(dictionary[key], 'utf-8')
    dictionary['size'] = anySizeToBytes(dictionary['size'])
-	if dictionary.has_key('desc_link'):
-		print u"%s|%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link'])
-	else:
-		print u"%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
+    outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
+    if 'desc_link' in dictionary:
+        outtext = "|".join((outtext, dictionary["desc_link"]))
+
+    with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout:
+        utf8_stdout.write(unicode("".join((outtext, "\n"))))

 def anySizeToBytes(size_string):
    """
@ -63,7 +62,7 @@ def anySizeToBytes(size_string):
				@@ -63,7 +62,7 @@ def anySizeToBytes(size_string):
    short_unit = unit.upper()[0]

    # convert
-	units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
-	if units_dict.has_key( short_unit ):
+    units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
+    if units_dict.has_key(short_unit):
        size = size * 2**units_dict[short_unit]
    return int(size)
--- a/src/searchengine/nova3/engines/extratorrent.py
+++ b/src/searchengine/nova3/engines/extratorrent.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.2
+#VERSION: 2.0
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)

 # Redistribution and use in source and binary forms, with or without
@ -25,92 +25,139 @@
				@@ -25,92 +25,139 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-
+from html.parser import HTMLParser
+from http.client import HTTPConnection as http
+#qBt
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib3
-import re
+from helpers import download_file

 class extratorrent(object):
+    """ Search engine class """
    url = 'http://extratorrent.cc'
-  name = 'extratorrent'
-  supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
-
-  def __init__(self):
-    self.results = []
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+    name = 'ExtraTorrent'
+    supported_categories = {'all'       : '0',
+                            'movies'    : '4',
+                            'tv'        : '8',
+                            'music'     : '5',
+                            'games'     : '3',
+                            'anime'     : '1',
+                            'software'  : '7',
+                            'books'     : '2',
+                            'pictures'  : '6'}

    def download_torrent(self, info):
+        """ Downloader """
        print(download_file(info))

-  class SimpleSGMLParser(sgmllib3.SGMLParser):
-    def __init__(self, results, url, *args):
-      sgmllib3.SGMLParser.__init__(self)
+    class MyHtmlParseWithBlackJack(HTMLParser):
+        """ Parser class """
+        def __init__(self, list_searches, url):
+            HTMLParser.__init__(self)
            self.url = url
-      self.td_counter = None
+            self.list_searches = list_searches
            self.current_item = None
-      self.start_name = False
-      self.results = results
-      
-    def start_a(self, attr):
-      params = dict(attr)
-      #print params
-      if 'href' in params and params['href'].startswith("/torrent_download/"):
-        self.current_item = {}
-        self.td_counter = 0
-        self.start_name = False
-        torrent_id = '/'.join(params['href'].split('/')[2:])
-        self.current_item['link']=self.url+'/download/'+torrent_id
-      elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
-        self.current_item['desc_link'] = self.url + params['href'].strip()
-        self.start_name = True
+            self.cur_item_name = None
+            self.pending_size = False
+            self.next_queries = True
+            self.pending_next_queries = False
+            self.next_queries_set = set()
+
+        def handle_starttag(self, tag, attrs):
+            if self.current_item:
+                if tag == "a":
+                    params = dict(attrs)
+                    link = params['href']
+
+                    if not link.startswith("/torrent"):
+                        return
+
+                    if link[8] == "/":
+                        #description
+                        self.current_item["desc_link"] = "".join((self.url, link))
+                        #remove view at the beginning
+                        self.current_item["name"] = params["title"][5:].replace("&amp;", "&")
+                        self.pending_size = True
+                    elif link[8] == "_":
+                        #download link
+                        link = link.replace("torrent_", "", 1)
+                        self.current_item["link"] = "".join((self.url, link))
+
+                elif tag == "td":
+                    if self.pending_size:
+                        self.cur_item_name = "size"
+                        self.current_item["size"] = ""
+                        self.pending_size = False
+
+                    for attr in attrs:
+                        if attr[0] == "class":
+                            if attr[1][0] == "s":
+                                self.cur_item_name = "seeds"
+                                self.current_item["seeds"] = ""
+                            elif attr[1][0] == "l":
+                                self.cur_item_name = "leech"
+                                self.current_item["leech"] = ""
+                        break
+
+
+            elif tag == "tr":
+                for attr in attrs:
+                    if attr[0] == "class" and attr[1].startswith("tl"):
+                        self.current_item = dict()
+                        self.current_item["engine_url"] = self.url
+                        break
+
+            elif self.pending_next_queries:
+                if tag == "a":
+                    params = dict(attrs)
+                    if params["title"] in self.next_queries_set:
+                        return
+                    self.list_searches.append(params['href'])
+                    self.next_queries_set.add(params["title"])
+                    if params["title"] == "10":
+                        self.pending_next_queries = False
+                else:
+                    self.pending_next_queries = False
+
+            elif self.next_queries:
+                if tag == "b" and ("class", "pager_no_link") in attrs:
+                    self.next_queries = False
+                    self.pending_next_queries = True

        def handle_data(self, data):
-      if self.td_counter == 2:
-        if 'name' not in self.current_item and self.start_name:
-          self.current_item['name'] = data.strip()
-      elif self.td_counter == 3:
-        if 'size' not in self.current_item:
-          self.current_item['size'] = ''
-        self.current_item['size']+= data.replace("&nbsp;", " ").strip()
-      elif self.td_counter == 4:
-        if 'seeds' not in self.current_item:
-          self.current_item['seeds'] = ''
-        self.current_item['seeds']+= data.strip()
-      elif self.td_counter == 5:
-        if 'leech' not in self.current_item:
-          self.current_item['leech'] = ''
-        self.current_item['leech']+= data.strip()
-      
-    def start_td(self,attr):
-        if isinstance(self.td_counter,int):
-          self.td_counter += 1
-          if self.td_counter > 5:
-            self.td_counter = None
-            # Display item
+            if self.cur_item_name:
+                temp = self.current_item[self.cur_item_name]
+                self.current_item[self.cur_item_name] = " ".join((temp, data))
+                #Due to utf-8 we need to handle data two times if there is space
+                if not self.cur_item_name == "size":
+                    self.cur_item_name = None
+
+        def handle_endtag(self, tag):
            if self.current_item:
-              self.current_item['engine_url'] = self.url
-              if not self.current_item['seeds'].isdigit():
-                self.current_item['seeds'] = 0
-              if not self.current_item['leech'].isdigit():
-                self.current_item['leech'] = 0
+                if tag == "tr":
                    prettyPrinter(self.current_item)
-              self.results.append('a')
-
-  def search(self, what, cat='all'):
-    ret = []
-    i = 1
-    while True and i<11:
-      results = []
-      parser = self.SimpleSGMLParser(results, self.url)
-      dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
-      results_re = re.compile('(?s)<table class="tl"><thead>.*')
-      for match in results_re.finditer(dat):
-        res_tab = match.group(0)
-        parser.feed(res_tab)
+                    self.current_item = None
+
+    def search(self, what, cat="all"):
+        """ Performs search """
+        connection = http("extratorrent.cc")
+
+        query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
+
+        connection.request("GET", query)
+        response = connection.getresponse()
+        if response.status != 200:
+            return
+
+        list_searches = []
+        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+        parser.feed(response.read().decode('utf-8'))
+        parser.close()
+
+        for search_query in list_searches:
+            connection.request("GET", search_query)
+            response = connection.getresponse()
+            parser.feed(response.read().decode('utf-8'))
            parser.close()
-        break
-      if len(results) <= 0:
-        break
-      i += 1

+        connection.close()
+        return
--- a/src/searchengine/nova3/engines/legittorrents.py
+++ b/src/searchengine/nova3/engines/legittorrents.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.03
+#VERSION: 1.04
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)

 # Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object):
				@@ -36,10 +36,6 @@ class legittorrents(object):
  name = 'legittorrents'
  supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}

-  def __init__(self):
-    self.results = []
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
-
  def download_torrent(self, info):
    print(download_file(info))

--- a/src/searchengine/nova3/engines/mininova.py
+++ b/src/searchengine/nova3/engines/mininova.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.51
+#VERSION: 2.00
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)

@ -26,90 +26,123 @@
				@@ -26,90 +26,123 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

+from html.parser import HTMLParser
+from http.client import HTTPConnection as http
 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-import sgmllib3
-import re
+from helpers import download_file

 class mininova(object):
-  # Mandatory properties
+    """ Search engine class """
    url = 'http://www.mininova.org'
    name = 'Mininova'
-  supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
-
-  def __init__(self):
-    self.results = []
-    self.parser = self.SimpleSGMLParser(self.results, self.url)
+    supported_categories = {'all'       : '0',
+                            'movies'    : '4',
+                            'tv'        : '8',
+                            'music'     : '5',
+                            'games'     : '3',
+                            'anime'     : '1',
+                            'software'  : '7',
+                            'pictures'  : '6',
+                            'books'     : '2'}

    def download_torrent(self, info):
        print(download_file(info))

-  class SimpleSGMLParser(sgmllib3.SGMLParser):
-    def __init__(self, results, url, *args):
-      sgmllib3.SGMLParser.__init__(self)
+    class MyHtmlParseWithBlackJack(HTMLParser):
+        """ Parser class """
+        def __init__(self, list_searches, url):
+            HTMLParser.__init__(self)
+            self.list_searches = list_searches
            self.url = url
-      self.td_counter = None
+            self.table_results = False
            self.current_item = None
-      self.results = results
-      
-    def start_a(self, attr):
-      params = dict(attr)
-      #print params
-      if 'href' in params:
-        if params['href'].startswith("/get/"):
-          self.current_item = {}
-          self.td_counter = 0
-          self.current_item['link']=self.url+params['href'].strip()
-        elif params['href'].startswith("/tor/") and self.current_item is not None:
-          self.current_item['desc_link']=self.url+params['href'].strip()
+            self.cur_item_name = None
+            self.next_queries = True

-    def handle_data(self, data):
-      if self.td_counter == 0:
-        if 'name' not in self.current_item:
-          self.current_item['name'] = ''
-        self.current_item['name']+= data
-      elif self.td_counter == 1:
-        if 'size' not in self.current_item:
-          self.current_item['size'] = ''
-        self.current_item['size']+= data.strip()
-      elif self.td_counter == 2:
-        if 'seeds' not in self.current_item:
-          self.current_item['seeds'] = ''
-        self.current_item['seeds']+= data.strip()
-      elif self.td_counter == 3:
-        if 'leech' not in self.current_item:
-          self.current_item['leech'] = ''
-        self.current_item['leech']+= data.strip()
-      
-    def start_td(self,attr):
-        if isinstance(self.td_counter,int):
-          self.td_counter += 1
-          if self.td_counter > 4:
-            self.td_counter = None
-            # Display item
-            if self.current_item:
-              self.current_item['engine_url'] = self.url
-              if not self.current_item['seeds'].isdigit():
-                self.current_item['seeds'] = 0
-              if not self.current_item['leech'].isdigit():
-                self.current_item['leech'] = 0
+        def handle_starttag_tr(self, _):
+            """ Handler of tr start tag """
+            self.current_item = dict()
+
+        def handle_starttag_a(self, attrs):
+            """ Handler of a start tag """
+            params = dict(attrs)
+            link = params["href"]
+
+            if link.startswith("/tor/"):
+                #description
+                self.current_item["desc_link"] = "".join((self.url, link))
+                #get download link from description by id
+                self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
+                self.cur_item_name = "name"
+                self.current_item["name"] = ""
+            elif self.next_queries and link.startswith("/search"):
+                if params["title"].startswith("Page"):
+                    self.list_searches.append(link)
+
+        def handle_starttag_td(self, attrs):
+            """ Handler of td start tag """
+            if ("align", "right") in attrs:
+                if not "size" in self.current_item:
+                    self.cur_item_name = "size"
+                    self.current_item["size"] = ""
+
+        def handle_starttag_span(self, attrs):
+            """ Handler of span start tag """
+            if ("class", "g") in attrs:
+                self.cur_item_name = "seeds"
+                self.current_item["seeds"] = ""
+            elif ("class", "b") in attrs:
+                self.cur_item_name = "leech"
+                self.current_item["leech"] = ""
+
+        def handle_starttag(self, tag, attrs):
+            """ Parser's start tag handler """
+            if self.table_results:
+                dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
+                if dispatcher:
+                    dispatcher(attrs)
+
+            elif tag == "table":
+                self.table_results = ("class", "maintable") in attrs
+
+        def handle_endtag(self, tag):
+            """ Parser's end tag handler """
+            if tag == "tr" and self.current_item:
+                self.current_item["engine_url"] = self.url
                prettyPrinter(self.current_item)
-              self.results.append('a')
-
-  def search(self, what, cat='all'):
-    ret = []
-    i = 1
-    while True and i<11:
-      results = []
-      parser = self.SimpleSGMLParser(results, self.url)
-      dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
-      results_re = re.compile('(?s)<h1>Search results for.*')
-      for match in results_re.finditer(dat):
-        res_tab = match.group(0)
-        parser.feed(res_tab)
+                self.current_item = None
+            elif self.cur_item_name:
+                if tag == "a" or tag == "td":
+                    self.cur_item_name = None
+
+        def handle_data(self, data):
+            """ Parser's data handler """
+            if self.cur_item_name:
+                temp = self.current_item[self.cur_item_name]
+                self.current_item[self.cur_item_name] = " ".join((temp, data))
+
+    def search(self, what, cat="all"):
+        """ Performs search """
+        connection = http("www.mininova.org")
+
+        query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
+
+        connection.request("GET", query)
+        response = connection.getresponse()
+        if response.status != 200:
+            return
+
+        list_searches = []
+        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
+        parser.feed(response.read().decode('utf-8'))
+        parser.close()
+
+        parser.next_queries = False
+        for search_query in list_searches:
+            connection.request("GET", search_query)
+            response = connection.getresponse()
+            parser.feed(response.read().decode('utf-8'))
            parser.close()
-        break
-      if len(results) <= 0:
-        break
-      i += 1

+        connection.close()
+        return
--- a/src/searchengine/nova3/engines/torrentreactor.png
+++ b/src/searchengine/nova3/engines/torrentreactor.png
--- a/src/searchengine/nova3/engines/torrentreactor.py
+++ b/src/searchengine/nova3/engines/torrentreactor.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 1.33
+#VERSION: 1.36
 #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
 #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
 #              Bruno Barbieri (brunorex@gmail.com)
@ -28,14 +28,14 @@
				@@ -28,14 +28,14 @@
 # POSSIBILITY OF SUCH DAMAGE.

 from novaprinter import prettyPrinter
-from helpers import retrieve_url, download_file
-from urllib import error, parse
+from helpers import download_file, retrieve_url
+from urllib import parse
 from html.parser import HTMLParser
-import re
+from re import compile as re_compile

 class torrentreactor(object):
    url = 'http://www.torrentreactor.net'
-	name = 'TorrentReactor.Net'
+    name = 'TorrentReactor'
    supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}

    def download_torrent(self, info):
@ -49,6 +49,7 @@ class torrentreactor(object):
				@@ -49,6 +49,7 @@ class torrentreactor(object):
            self.results = results
            self.id = None
            self.url = url
+            self.torrents_matcher = re_compile("/torrents/\d+.*")
            self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }

        def handle_starttag(self, tag, attrs):
@ -57,7 +58,7 @@ class torrentreactor(object):
				@@ -57,7 +58,7 @@ class torrentreactor(object):

        def start_a(self, attr):
            params = dict(attr)
-			if re.match("/torrents/\d+.*", params['href']):
+            if self.torrents_matcher.match(params['href']):
                self.current_item = {}
                self.current_item['desc_link'] = self.url+params['href'].strip()
            elif 'torrentreactor.net/download.php' in params['href']:
@ -95,22 +96,14 @@ class torrentreactor(object):
				@@ -95,22 +96,14 @@ class torrentreactor(object):
                        self.has_results = True
                        self.results.append('a')

-	def __init__(self):
-		self.results = []
-		self.parser = self.SimpleHTMLParser(self.results, self.url)
-
    def search(self, what, cat='all'):
        i = 0
        dat = ''
-		while True and i<11:
+
+        while i < 11:
            results = []
            parser = self.SimpleHTMLParser(results, self.url)
-
-			try:
-				dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
-			except error.HTTPError:
-				break
-
+            dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
            parser.feed(dat)
            parser.close()
            if len(results) <= 0:
--- a/src/searchengine/nova3/engines/torrentz.py
+++ b/src/searchengine/nova3/engines/torrentz.py
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-#VERSION: 2.13
+#VERSION: 2.14
 #AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)

 # Redistribution and use in source and binary forms, with or without
--- a/src/searchengine/nova3/engines/versions.txt
+++ b/src/searchengine/nova3/engines/versions.txt
@ -1,8 +1,9 @@
				@@ -1,8 +1,9 @@
-torrentreactor: 1.33
-mininova: 1.51
-piratebay: 2.11
 extratorrent: 1.2
+torrentreactor: 1.36
+mininova: 2.00
+piratebay: 2.11
+extratorrent: 2.0
 kickasstorrents: 1.26
 btdigg: 1.23
-legittorrents: 1.03
-torrentz: 2.13
+torrentz: 2.14
+legittorrents: 1.04
--- a/src/searchengine/nova3/nova2.py
+++ b/src/searchengine/nova3/nova2.py
@ -26,7 +26,7 @@
				@@ -26,7 +26,7 @@
 # POSSIBILITY OF SUCH DAMAGE.


-#VERSION: 1.24
+#VERSION: 1.40

 # Author:
 #  Fabien Devaux <fab AT gnux DOT info>
@ -37,14 +37,14 @@
				@@ -37,14 +37,14 @@
 #
 # Licence: BSD

-import sys
-import threading
-import os
-import glob
 import urllib.parse
+from os import path, cpu_count
+from glob import glob
+from sys import argv
+from multiprocessing import Pool

 THREADED = True
-CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
+CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}

 ################################################################################
 # Every engine should have a "search" method taking
@ -54,34 +54,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
				@@ -54,34 +54,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
 # As a convention, try to list results by decrasing number of seeds or similar
 ################################################################################

-supported_engines = []
+def initialize_engines():
+    """ Import available engines

-engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
-for engine in engines:
-	e = engine.split(os.sep)[-1][:-3]
-	if len(e.strip()) == 0: continue
-	if e.startswith('_'): continue
+        Return list of available engines
+    """
+    supported_engines = []
+
+    engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
+    for engine in engines:
+        engi = path.basename(engine).split('.')[0].strip()
+        if len(engi) == 0 or engi.startswith('_'):
+            continue
        try:
-		exec("from engines.%s import %s"%(e,e))
-		supported_engines.append(e)
+            #import engines.[engine]
+            engine_module = __import__(".".join(("engines", engi)))
+            #get low-level module
+            engine_module = getattr(engine_module, engi)
+            #bind class name
+            globals()[engi] = getattr(engine_module, engi)
+            supported_engines.append(engi)
        except:
            pass

-def engineToXml(short_name):
-	xml = "<%s>\n"%short_name
-	exec("search_engine = %s()"%short_name, globals())
-	xml += "<name>%s</name>\n"%search_engine.name
-	xml += "<url>%s</url>\n"%search_engine.url
-	xml += "<categories>"
-	if hasattr(search_engine, 'supported_categories'):
-		supported_categories = list(search_engine.supported_categories.keys())
-		supported_categories.remove('all')
-		xml += " ".join(supported_categories)
-	xml += "</categories>\n"
-	xml += "</%s>\n"%short_name
-	return xml
-
-def displayCapabilities():
+    return supported_engines
+
+def engines_to_xml(supported_engines):
+    """ Generates xml for supported engines """
+    tab = " " * 4
+
+    for short_name in supported_engines:
+        search_engine = globals()[short_name]()
+
+        supported_categories = ""
+        if hasattr(search_engine, "supported_categories"):
+            supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
+                                             if key is not "all"))
+
+        yield  "".join((tab, "<", short_name, ">\n",
+                        tab, tab, "<name>", search_engine.name, "</name>\n",
+                        tab, tab, "<url>", search_engine.url, "</url>\n",
+                        tab, tab, "<categories>", supported_categories, "</categories>\n",
+                        tab, "</", short_name, ">\n"))
+
+def displayCapabilities(supported_engines):
    """
    Display capabilities in XML format
    <capabilities>
@ -92,67 +108,75 @@ def displayCapabilities():
				@@ -92,67 +108,75 @@ def displayCapabilities():
      </engine_short_name>
    </capabilities>
    """
-	xml = "<capabilities>"
-	for short_name in supported_engines:
-		xml += engineToXml(short_name)
-	xml += "</capabilities>"
+    xml = "".join(("<capabilities>\n",
+                   "".join(engines_to_xml(supported_engines)),
+                   "</capabilities>"))
    print(xml)

-class EngineLauncher(threading.Thread):
-	def __init__(self, engine, what, cat='all'):
-		threading.Thread.__init__(self)
-		self.engine = engine
-		self.what = what
-		self.cat = cat
-	def run(self):
-		if hasattr(self.engine, 'supported_categories'):
-			if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()):
-				self.engine.search(self.what, self.cat)
-		elif self.cat == 'all':
-				self.engine.search(self.what)
-
-if __name__ == '__main__':
-	if len(sys.argv) < 2:
-		raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
-				(','.join(supported_engines)))
-
-	if len(sys.argv) == 2:
-		if sys.argv[1] == "--capabilities":
-			displayCapabilities()
-			sys.exit(0)
+def run_search(engine_list):
+    """ Run search in engine
+
+        @param engine_list List with engine, query and category
+
+        @retval False if any exceptions occured
+        @retval True  otherwise
+    """
+    engine, what, cat = engine_list
+    try:
+        engine = engine()
+        #avoid exceptions due to invalid category
+        if hasattr(engine, 'supported_categories'):
+            cat = cat if cat in engine.supported_categories else "all"
+            engine.search(what, cat)
        else:
-			raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
-					(','.join(supported_engines)))
+            engine.search(what)

-	engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
+        return True
+    except:
+        return False
+
+def main(args):
+    supported_engines = initialize_engines()
+
+    if not args:
+        raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
+                         "available engines: %s" % (','.join(supported_engines)))
+
+    elif args[0] == "--capabilities":
+        displayCapabilities(supported_engines)
+        return
+
+    elif len(args) < 3:
+        raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
+                         "available engines: %s" % (','.join(supported_engines)))
+
+    #get only unique engines with set
+    engines_list = set(e.lower() for e in args[0].strip().split(','))

    if 'all' in engines_list:
        engines_list = supported_engines
+    else:
+        #discard un-supported engines
+        engines_list = [engine for engine in engines_list
+                        if engine in supported_engines]

-	cat = sys.argv[2].lower()
+    if not engines_list:
+        #engine list is empty. Nothing to do here
+        return

-	if cat not in CATEGORIES:
-		raise SystemExit('Invalid category!')
+    cat = args[1].lower()

-	what = urllib.parse.quote(' '.join(sys.argv[3:]))
+    if cat not in CATEGORIES:
+        raise SystemExit(" - ".join(('Invalid category', cat)))

-	threads = []
-	for engine in engines_list:
-		try:
+    what = urllib.parse.quote(' '.join(args[2:]))
    if THREADED:
-				exec("l = EngineLauncher(%s(), what, cat)"%engine)
-				threads.append(l)
-				l.start()
+        #child process spawning is controlled min(number of searches, number of cpu)
+        with Pool(min(len(engines_list), cpu_count())) as pool:
+            pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
    else:
-				exec("e = %s()"%engine)
-				if hasattr(engine, 'supported_categories'):
-					if cat == 'all' or cat in list(e.supported_categories.keys()):
-						e.search(what, cat)
-				elif self.cat == 'all':
-						e.search(what)
-						engine().search(what, cat)
-		except:
-			pass
-	if THREADED:
-		for t in threads:
-			t.join()
+        #py3 note: map is needed to be evaluated for content to be executed
+        all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list)))
+
+if __name__ == "__main__":
+    main(argv[1:])
--- a/src/searchengine/nova3/nova2dl.py
+++ b/src/searchengine/nova3/nova2dl.py
@ -25,7 +25,7 @@
				@@ -25,7 +25,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-#VERSION: 1.10
+#VERSION: 1.20

 # Author:
 #  Christophe DUMEZ (chris@qbittorrent.org)
--- a/src/searchengine/nova3/novaprinter.py
+++ b/src/searchengine/nova3/novaprinter.py
@ -26,12 +26,10 @@
				@@ -26,12 +26,10 @@


 def prettyPrinter(dictionary):
-	outtext = ''
    dictionary['size'] = anySizeToBytes(dictionary['size'])
+    outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
    if 'desc_link' in dictionary:
-		outtext = '%s|%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link'])
-	else:
-		outtext = '%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
+        outtext = "|".join((outtext, dictionary["desc_link"]))

    # fd 1 is stdout
    with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout:
@ -60,7 +58,7 @@ def anySizeToBytes(size_string):
				@@ -60,7 +58,7 @@ def anySizeToBytes(size_string):
    short_unit = unit.upper()[0]

    # convert
-	units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
+    units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
    if short_unit in units_dict:
        size = size * 2**units_dict[short_unit]
    return int(size)