[Search engine] Update extratorrent plugin. Closes #6261

2025-03-13 05:41:17 +00:00 · 2017-01-21 19:22:04 +01:00 · 2017-01-21 19:22:04 +01:00 · a9c5dab0dd
commit a9c5dab0dd
parent 5b35981c85
4 changed files with 38 additions and 190 deletions
--- a/src/searchengine/nova/engines/extratorrent.py
+++ b/src/searchengine/nova/engines/extratorrent.py
@ -1,4 +1,4 @@
-#VERSION: 2.06
+#VERSION: 3.00
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 #CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)

@ -26,7 +26,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-from HTMLParser import HTMLParser
+from xml.dom import minidom
 #qBt
 from novaprinter import prettyPrinter
 from helpers import download_file, retrieve_url
@ -49,102 +49,26 @@ class extratorrent(object):
        """ Downloader """
        print(download_file(info))

-    class MyHtmlParseWithBlackJack(HTMLParser):
-        """ Parser class """
-        def __init__(self, list_searches, url):
-            HTMLParser.__init__(self)
-            self.url = url
-            self.list_searches = list_searches
-            self.current_item = None
-            self.cur_item_name = None
-            self.pending_size = False
-            self.next_queries = True
-            self.pending_next_queries = False
-            self.next_queries_set = set()
-
-        def handle_starttag(self, tag, attrs):
-            if self.current_item:
-                if tag == "a":
-                    params = dict(attrs)
-                    link = params['href']
-
-                    if link.startswith("/torrent/"):
-                        #description
-                        self.current_item["desc_link"] = "".join((self.url, link))
-                        #remove view at the beginning
-                        self.current_item["name"] = params["title"][5:-8].replace("&amp;", "&")
-                        self.pending_size = True
-                    elif link.startswith("magnet"):
-                        #magnet link
-                        self.current_item["link"] = link
-
-                elif tag == "td":
-                    if self.pending_size:
-                        self.cur_item_name = "size"
-                        self.current_item["size"] = ""
-                        self.pending_size = False
-
-                    for attr in attrs:
-                        if attr[0] == "class":
-                            if attr[1][0] == "s":
-                                self.cur_item_name = "seeds"
-                                self.current_item["seeds"] = ""
-                            elif attr[1][0] == "l":
-                                self.cur_item_name = "leech"
-                                self.current_item["leech"] = ""
-                        break
-
-
-            elif tag == "tr":
-                for attr in attrs:
-                    if attr[0] == "class" and attr[1].startswith("tl"):
-                        self.current_item = dict()
-                        self.current_item["engine_url"] = self.url
-                        break
-
-            elif self.pending_next_queries:
-                if tag == "a":
-                    params = dict(attrs)
-                    if params["title"] in self.next_queries_set:
-                        return
-                    self.list_searches.append(params['href'])
-                    self.next_queries_set.add(params["title"])
-                    if params["title"] == "10":
-                        self.pending_next_queries = False
-                else:
-                    self.pending_next_queries = False
-
-            elif self.next_queries:
-                if tag == "b" and ("class", "pager_no_link") in attrs:
-                    self.next_queries = False
-                    self.pending_next_queries = True
-
-        def handle_data(self, data):
-            if self.cur_item_name:
-                self.current_item[self.cur_item_name] = data
-                if not self.cur_item_name == "size":
-                    self.cur_item_name = None
-
-        def handle_endtag(self, tag):
-            if self.current_item:
-                if tag == "tr":
-                    prettyPrinter(self.current_item)
-                    self.current_item = None
-
    def search(self, what, cat="all"):
        """ Performs search """
-        query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat]))
-
+        query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat]))
        response = retrieve_url(query)

-        list_searches = []
-        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
-        parser.feed(response)
-        parser.close()
+        xmldoc = minidom.parseString(response)
+        itemlist = xmldoc.getElementsByTagName('item')
+        for item in itemlist:
+            current_item = current_item = {"engine_url" : self.url}
+            current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data
+            current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value
+            current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data
+            current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data
+            current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data
+            if not current_item["leech"].isdigit():
+                current_item["leech"] = ''
+            current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data
+            if not current_item["seeds"].isdigit():
+                current_item["seeds"] = ''

-        for search_query in list_searches:
-            response = retrieve_url(self.url + search_query)
-            parser.feed(response)
-            parser.close()
+            prettyPrinter(current_item)

        return
--- a/src/searchengine/nova/engines/versions.txt
+++ b/src/searchengine/nova/engines/versions.txt
@ -1,5 +1,5 @@
 demonoid: 1.22
-extratorrent: 2.06
+extratorrent: 3.00
 legittorrents: 2.01
 mininova: 2.02
 piratebay: 2.15
--- a/src/searchengine/nova3/engines/extratorrent.py
+++ b/src/searchengine/nova3/engines/extratorrent.py
@ -1,4 +1,4 @@
-#VERSION: 2.06
+#VERSION: 3.00
 #AUTHORS: Christophe Dumez (chris@qbittorrent.org)
 #CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)

@ -26,7 +26,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.

-from html.parser import HTMLParser
+from xml.dom import minidom
 #qBt
 from novaprinter import prettyPrinter
 from helpers import download_file, retrieve_url
@ -49,102 +49,26 @@ class extratorrent(object):
        """ Downloader """
        print(download_file(info))

-    class MyHtmlParseWithBlackJack(HTMLParser):
-        """ Parser class """
-        def __init__(self, list_searches, url):
-            HTMLParser.__init__(self)
-            self.url = url
-            self.list_searches = list_searches
-            self.current_item = None
-            self.cur_item_name = None
-            self.pending_size = False
-            self.next_queries = True
-            self.pending_next_queries = False
-            self.next_queries_set = set()
-
-        def handle_starttag(self, tag, attrs):
-            if self.current_item:
-                if tag == "a":
-                    params = dict(attrs)
-                    link = params['href']
-
-                    if link.startswith("/torrent/"):
-                        #description
-                        self.current_item["desc_link"] = "".join((self.url, link))
-                        #remove view at the beginning
-                        self.current_item["name"] = params["title"][5:-8].replace("&amp;", "&")
-                        self.pending_size = True
-                    elif link.startswith("magnet"):
-                        #magnet link
-                        self.current_item["link"] = link
-
-                elif tag == "td":
-                    if self.pending_size:
-                        self.cur_item_name = "size"
-                        self.current_item["size"] = ""
-                        self.pending_size = False
-
-                    for attr in attrs:
-                        if attr[0] == "class":
-                            if attr[1][0] == "s":
-                                self.cur_item_name = "seeds"
-                                self.current_item["seeds"] = ""
-                            elif attr[1][0] == "l":
-                                self.cur_item_name = "leech"
-                                self.current_item["leech"] = ""
-                        break
-
-
-            elif tag == "tr":
-                for attr in attrs:
-                    if attr[0] == "class" and attr[1].startswith("tl"):
-                        self.current_item = dict()
-                        self.current_item["engine_url"] = self.url
-                        break
-
-            elif self.pending_next_queries:
-                if tag == "a":
-                    params = dict(attrs)
-                    if params["title"] in self.next_queries_set:
-                        return
-                    self.list_searches.append(params['href'])
-                    self.next_queries_set.add(params["title"])
-                    if params["title"] == "10":
-                        self.pending_next_queries = False
-                else:
-                    self.pending_next_queries = False
-
-            elif self.next_queries:
-                if tag == "b" and ("class", "pager_no_link") in attrs:
-                    self.next_queries = False
-                    self.pending_next_queries = True
-
-        def handle_data(self, data):
-            if self.cur_item_name:
-                self.current_item[self.cur_item_name] = data
-                if not self.cur_item_name == "size":
-                    self.cur_item_name = None
-
-        def handle_endtag(self, tag):
-            if self.current_item:
-                if tag == "tr":
-                    prettyPrinter(self.current_item)
-                    self.current_item = None
-
    def search(self, what, cat="all"):
        """ Performs search """
-        query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat]))
-
+        query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat]))
        response = retrieve_url(query)

-        list_searches = []
-        parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
-        parser.feed(response)
-        parser.close()
+        xmldoc = minidom.parseString(response)
+        itemlist = xmldoc.getElementsByTagName('item')
+        for item in itemlist:
+            current_item = current_item = {"engine_url" : self.url}
+            current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data
+            current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value
+            current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data
+            current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data
+            current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data
+            if not current_item["leech"].isdigit():
+                current_item["leech"] = ''
+            current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data
+            if not current_item["seeds"].isdigit():
+                current_item["seeds"] = ''

-        for search_query in list_searches:
-            response = retrieve_url(self.url + search_query)
-            parser.feed(response)
-            parser.close()
+            prettyPrinter(current_item)

        return
--- a/src/searchengine/nova3/engines/versions.txt
+++ b/src/searchengine/nova3/engines/versions.txt
@ -1,5 +1,5 @@
 demonoid: 1.22
-extratorrent: 2.06
+extratorrent: 3.00
 legittorrents: 2.01
 mininova: 2.02
 piratebay: 2.15