From a9c5dab0ddd2a7ab1162db6fce89f50da985fb01 Mon Sep 17 00:00:00 2001 From: ngosang Date: Sat, 21 Jan 2017 19:22:04 +0100 Subject: [PATCH] [Search engine] Update extratorrent plugin. Closes #6261 --- src/searchengine/nova/engines/extratorrent.py | 114 +++--------------- src/searchengine/nova/engines/versions.txt | 2 +- .../nova3/engines/extratorrent.py | 114 +++--------------- src/searchengine/nova3/engines/versions.txt | 2 +- 4 files changed, 40 insertions(+), 192 deletions(-) diff --git a/src/searchengine/nova/engines/extratorrent.py b/src/searchengine/nova/engines/extratorrent.py index 07db8b5ed..9469eac86 100644 --- a/src/searchengine/nova/engines/extratorrent.py +++ b/src/searchengine/nova/engines/extratorrent.py @@ -1,4 +1,4 @@ -#VERSION: 2.06 +#VERSION: 3.00 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es) @@ -26,7 +26,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -from HTMLParser import HTMLParser +from xml.dom import minidom #qBt from novaprinter import prettyPrinter from helpers import download_file, retrieve_url @@ -49,102 +49,26 @@ class extratorrent(object): """ Downloader """ print(download_file(info)) - class MyHtmlParseWithBlackJack(HTMLParser): - """ Parser class """ - def __init__(self, list_searches, url): - HTMLParser.__init__(self) - self.url = url - self.list_searches = list_searches - self.current_item = None - self.cur_item_name = None - self.pending_size = False - self.next_queries = True - self.pending_next_queries = False - self.next_queries_set = set() - - def handle_starttag(self, tag, attrs): - if self.current_item: - if tag == "a": - params = dict(attrs) - link = params['href'] - - if link.startswith("/torrent/"): - #description - self.current_item["desc_link"] = "".join((self.url, link)) - #remove view at the beginning - self.current_item["name"] = params["title"][5:-8].replace("&", "&") - self.pending_size = True - elif link.startswith("magnet"): - #magnet link - self.current_item["link"] = link - - elif tag == "td": - if self.pending_size: - self.cur_item_name = "size" - self.current_item["size"] = "" - self.pending_size = False - - for attr in attrs: - if attr[0] == "class": - if attr[1][0] == "s": - self.cur_item_name = "seeds" - self.current_item["seeds"] = "" - elif attr[1][0] == "l": - self.cur_item_name = "leech" - self.current_item["leech"] = "" - break - - - elif tag == "tr": - for attr in attrs: - if attr[0] == "class" and attr[1].startswith("tl"): - self.current_item = dict() - self.current_item["engine_url"] = self.url - break - - elif self.pending_next_queries: - if tag == "a": - params = dict(attrs) - if params["title"] in self.next_queries_set: - return - self.list_searches.append(params['href']) - self.next_queries_set.add(params["title"]) - if params["title"] == "10": - self.pending_next_queries = False - else: - self.pending_next_queries = False - - elif self.next_queries: - if tag == "b" and ("class", "pager_no_link") in attrs: - self.next_queries = False - self.pending_next_queries = True - - def handle_data(self, data): - if self.cur_item_name: - self.current_item[self.cur_item_name] = data - if not self.cur_item_name == "size": - self.cur_item_name = None - - def handle_endtag(self, tag): - if self.current_item: - if tag == "tr": - prettyPrinter(self.current_item) - self.current_item = None - def search(self, what, cat="all"): """ Performs search """ - query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat])) - + query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat])) response = retrieve_url(query) - list_searches = [] - parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) - parser.feed(response) - parser.close() - - for search_query in list_searches: - response = retrieve_url(self.url + search_query) - parser.feed(response) - parser.close() + xmldoc = minidom.parseString(response) + itemlist = xmldoc.getElementsByTagName('item') + for item in itemlist: + current_item = current_item = {"engine_url" : self.url} + current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data + current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value + current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data + current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data + current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data + if not current_item["leech"].isdigit(): + current_item["leech"] = '' + current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data + if not current_item["seeds"].isdigit(): + current_item["seeds"] = '' + + prettyPrinter(current_item) return diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt index 034b795b3..c59347d3e 100644 --- a/src/searchengine/nova/engines/versions.txt +++ b/src/searchengine/nova/engines/versions.txt @@ -1,5 +1,5 @@ demonoid: 1.22 -extratorrent: 2.06 +extratorrent: 3.00 legittorrents: 2.01 mininova: 2.02 piratebay: 2.15 diff --git a/src/searchengine/nova3/engines/extratorrent.py b/src/searchengine/nova3/engines/extratorrent.py index dbe71c535..9469eac86 100644 --- a/src/searchengine/nova3/engines/extratorrent.py +++ b/src/searchengine/nova3/engines/extratorrent.py @@ -1,4 +1,4 @@ -#VERSION: 2.06 +#VERSION: 3.00 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es) @@ -26,7 +26,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -from html.parser import HTMLParser +from xml.dom import minidom #qBt from novaprinter import prettyPrinter from helpers import download_file, retrieve_url @@ -49,102 +49,26 @@ class extratorrent(object): """ Downloader """ print(download_file(info)) - class MyHtmlParseWithBlackJack(HTMLParser): - """ Parser class """ - def __init__(self, list_searches, url): - HTMLParser.__init__(self) - self.url = url - self.list_searches = list_searches - self.current_item = None - self.cur_item_name = None - self.pending_size = False - self.next_queries = True - self.pending_next_queries = False - self.next_queries_set = set() - - def handle_starttag(self, tag, attrs): - if self.current_item: - if tag == "a": - params = dict(attrs) - link = params['href'] - - if link.startswith("/torrent/"): - #description - self.current_item["desc_link"] = "".join((self.url, link)) - #remove view at the beginning - self.current_item["name"] = params["title"][5:-8].replace("&", "&") - self.pending_size = True - elif link.startswith("magnet"): - #magnet link - self.current_item["link"] = link - - elif tag == "td": - if self.pending_size: - self.cur_item_name = "size" - self.current_item["size"] = "" - self.pending_size = False - - for attr in attrs: - if attr[0] == "class": - if attr[1][0] == "s": - self.cur_item_name = "seeds" - self.current_item["seeds"] = "" - elif attr[1][0] == "l": - self.cur_item_name = "leech" - self.current_item["leech"] = "" - break - - - elif tag == "tr": - for attr in attrs: - if attr[0] == "class" and attr[1].startswith("tl"): - self.current_item = dict() - self.current_item["engine_url"] = self.url - break - - elif self.pending_next_queries: - if tag == "a": - params = dict(attrs) - if params["title"] in self.next_queries_set: - return - self.list_searches.append(params['href']) - self.next_queries_set.add(params["title"]) - if params["title"] == "10": - self.pending_next_queries = False - else: - self.pending_next_queries = False - - elif self.next_queries: - if tag == "b" and ("class", "pager_no_link") in attrs: - self.next_queries = False - self.pending_next_queries = True - - def handle_data(self, data): - if self.cur_item_name: - self.current_item[self.cur_item_name] = data - if not self.cur_item_name == "size": - self.cur_item_name = None - - def handle_endtag(self, tag): - if self.current_item: - if tag == "tr": - prettyPrinter(self.current_item) - self.current_item = None - def search(self, what, cat="all"): """ Performs search """ - query = "".join((self.url, "/advanced_search/?with=", what, "&s_cat=", self.supported_categories[cat])) - + query = "".join((self.url, "/rss.xml?type=search&search=", what, "&cid=", self.supported_categories[cat])) response = retrieve_url(query) - list_searches = [] - parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) - parser.feed(response) - parser.close() - - for search_query in list_searches: - response = retrieve_url(self.url + search_query) - parser.feed(response) - parser.close() + xmldoc = minidom.parseString(response) + itemlist = xmldoc.getElementsByTagName('item') + for item in itemlist: + current_item = current_item = {"engine_url" : self.url} + current_item['name'] = item.getElementsByTagName('title')[0].childNodes[0].data + current_item["link"] = item.getElementsByTagName('enclosure')[0].attributes['url'].value + current_item["desc_link"] = item.getElementsByTagName('link')[0].childNodes[0].data + current_item["size"] = item.getElementsByTagName('size')[0].childNodes[0].data + current_item["leech"] = item.getElementsByTagName('leechers')[0].childNodes[0].data + if not current_item["leech"].isdigit(): + current_item["leech"] = '' + current_item["seeds"] = item.getElementsByTagName('seeders')[0].childNodes[0].data + if not current_item["seeds"].isdigit(): + current_item["seeds"] = '' + + prettyPrinter(current_item) return diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt index 034b795b3..c59347d3e 100644 --- a/src/searchengine/nova3/engines/versions.txt +++ b/src/searchengine/nova3/engines/versions.txt @@ -1,5 +1,5 @@ demonoid: 1.22 -extratorrent: 2.06 +extratorrent: 3.00 legittorrents: 2.01 mininova: 2.02 piratebay: 2.15