From b86f92212f0cc675b4fc0159e9fc038674e5365f Mon Sep 17 00:00:00 2001 From: ngosang Date: Sat, 26 Mar 2016 16:34:17 +0100 Subject: [PATCH] [Search engine] Added TorLock search engine --- src/searchengine/nova/engines/torlock.png | Bin 0 -> 561 bytes src/searchengine/nova/engines/torlock.py | 97 ++++++++++++++++++++ src/searchengine/nova/engines/versions.txt | 1 + src/searchengine/nova3/engines/torlock.png | Bin 0 -> 561 bytes src/searchengine/nova3/engines/torlock.py | 97 ++++++++++++++++++++ src/searchengine/nova3/engines/versions.txt | 1 + 6 files changed, 196 insertions(+) create mode 100644 src/searchengine/nova/engines/torlock.png create mode 100644 src/searchengine/nova/engines/torlock.py create mode 100644 src/searchengine/nova3/engines/torlock.png create mode 100644 src/searchengine/nova3/engines/torlock.py diff --git a/src/searchengine/nova/engines/torlock.png b/src/searchengine/nova/engines/torlock.png new file mode 100644 index 0000000000000000000000000000000000000000..c6ee6a55b006d986435105f885e7dc4a91742214 GIT binary patch literal 561 zcmV-10?z%3P)|WK~y-)t&&Yh6k!<0f6wf=EW0iiWD!Ahu<+}$+93k#Qs7(BCFtxx zK}Qcl&_G}|fk+Z06%|c~)NWZ&(-(a}c1w$*T8gcx2wBjL#X)qor^^_zm)hueecu27 zeLnt(dpRMlU&lXU11%t1#P#RH>4T}1Mo%Q*-QOBmpv~&_T-;rH-j~i@*!4QKwrjbl z-H00OKXnj_>yN%zoc(mD_+V!tx%w`ALk%l|USLiQ3XDiVrP@Hm_4ffEfRBxf-=EIa zW#%KXxBi%NZib{yCJ2|*&;hgp&wvClBLT&D1PrhPxL!;)oQ)*$W_$T<;Zc3_L?#HA z;*TQW6tD-_2Ha3X@A3#~V6%Eo7l63_vG#?p*Cy+mM#MdQr84D+!kECA1PsN7XVh?2 z4LOk`mwZHKfQJI>6|(9%!*}+#jW1ZqFMCVVvk0xgaS6!(TCkxBa4ba9wpPvqd4&n} zoMC|OjeVzYHRMV_hr+qv#W7`Ioekt8Nm%MRg{lW_Wh?m#NDH(8lU4Oqz&<|_?6u1v~>N(|DumU(5U!Vef z2Kpu7WlgzPgQJFlswme)7D!b`xfX86CjXD0Q-7-st*MN500000NkvXXu0mjfG-Uu@ literal 0 HcmV?d00001 diff --git a/src/searchengine/nova/engines/torlock.py b/src/searchengine/nova/engines/torlock.py new file mode 100644 index 000000000..f154bf032 --- /dev/null +++ b/src/searchengine/nova/engines/torlock.py @@ -0,0 +1,97 @@ +#VERSION: 2.0 +#AUTHORS: Douman (custparasite@gmx.se) +#CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es) + +from novaprinter import prettyPrinter +from helpers import retrieve_url, download_file +from re import compile as re_compile +from HTMLParser import HTMLParser + +class torlock(object): + url = "https://www.torlock.com" + name = "TorLock" + supported_categories = {'all' : 'all', + 'anime' : 'anime', + 'software' : 'software', + 'games' : 'game', + 'movies' : 'movie', + 'music' : 'music', + 'tv' : 'television', + 'books' : 'ebooks'} + + def download_torrent(self, info): + print(download_file(info)) + + class MyHtmlParser(HTMLParser): + """ Sub-class for parsing results """ + def __init__(self, url): + HTMLParser.__init__(self) + self.url = url + self.article_found = False #true when
with results is found + self.item_found = False + self.item_bad = False #set to True for malicious links + self.current_item = None #dict for found item + self.item_name = None #key's name in current_item dict + self.parser_class = {"ts" : "size", + "tul" : "seeds", + "tdl" : "leech"} + + def handle_starttag(self, tag, attrs): + params = dict(attrs) + if self.item_found: + if tag == "td": + if "class" in params: + self.item_name = self.parser_class.get(params["class"], None) + if self.item_name: + self.current_item[self.item_name] = "" + + elif self.article_found and tag == "a": + if "href" in params: + link = params["href"] + if link.startswith("/torrent"): + self.current_item["desc_link"] = "".join((self.url, link)) + self.current_item["link"] = "".join((self.url, "/tor/", link.split('/')[2], ".torrent")) + self.current_item["engine_url"] = self.url + self.item_found = True + self.item_name = "name" + self.current_item["name"] = "" + self.item_bad = "rel" in params and params["rel"] == "nofollow" + + elif tag == "article": + self.article_found = True + self.current_item = {} + + def handle_data(self, data): + if self.item_name: + self.current_item[self.item_name] += data + + def handle_endtag(self, tag): + if tag == "article": + self.article_found = False + elif self.item_name and (tag == "a" or tag == "td"): + self.item_name = None + elif self.item_found and tag == "tr": + self.item_found = False + if not self.item_bad: + prettyPrinter(self.current_item) + self.current_item = {} + + def search(self, query, cat='all'): + """ Performs search """ + query = query.replace("%20", "-") + + parser = self.MyHtmlParser(self.url) + page = "".join((self.url, "/", self.supported_categories[cat], "/torrents/", query, ".html?sort=seeds&page=1")) + html = retrieve_url(page) + parser.feed(html) + + counter = 1 + additional_pages = re_compile("/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+".format(self.supported_categories[cat], query)) + list_searches = additional_pages.findall(html)[:-1] #last link is next(i.e. second) + for page in map(lambda link: "".join((self.url, link)), list_searches): + html = retrieve_url(page) + parser.feed(html) + counter += 1 + if counter > 3: + break + parser.close() diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt index b32625719..66dd20848 100644 --- a/src/searchengine/nova/engines/versions.txt +++ b/src/searchengine/nova/engines/versions.txt @@ -5,5 +5,6 @@ kickasstorrents: 1.28 legittorrents: 2.00 mininova: 2.01 piratebay: 2.14 +torlock: 2.0 torrentreactor: 1.41 torrentz: 2.17 diff --git a/src/searchengine/nova3/engines/torlock.png b/src/searchengine/nova3/engines/torlock.png new file mode 100644 index 0000000000000000000000000000000000000000..c6ee6a55b006d986435105f885e7dc4a91742214 GIT binary patch literal 561 zcmV-10?z%3P)|WK~y-)t&&Yh6k!<0f6wf=EW0iiWD!Ahu<+}$+93k#Qs7(BCFtxx zK}Qcl&_G}|fk+Z06%|c~)NWZ&(-(a}c1w$*T8gcx2wBjL#X)qor^^_zm)hueecu27 zeLnt(dpRMlU&lXU11%t1#P#RH>4T}1Mo%Q*-QOBmpv~&_T-;rH-j~i@*!4QKwrjbl z-H00OKXnj_>yN%zoc(mD_+V!tx%w`ALk%l|USLiQ3XDiVrP@Hm_4ffEfRBxf-=EIa zW#%KXxBi%NZib{yCJ2|*&;hgp&wvClBLT&D1PrhPxL!;)oQ)*$W_$T<;Zc3_L?#HA z;*TQW6tD-_2Ha3X@A3#~V6%Eo7l63_vG#?p*Cy+mM#MdQr84D+!kECA1PsN7XVh?2 z4LOk`mwZHKfQJI>6|(9%!*}+#jW1ZqFMCVVvk0xgaS6!(TCkxBa4ba9wpPvqd4&n} zoMC|OjeVzYHRMV_hr+qv#W7`Ioekt8Nm%MRg{lW_Wh?m#NDH(8lU4Oqz&<|_?6u1v~>N(|DumU(5U!Vef z2Kpu7WlgzPgQJFlswme)7D!b`xfX86CjXD0Q-7-st*MN500000NkvXXu0mjfG-Uu@ literal 0 HcmV?d00001 diff --git a/src/searchengine/nova3/engines/torlock.py b/src/searchengine/nova3/engines/torlock.py new file mode 100644 index 000000000..a1afbf121 --- /dev/null +++ b/src/searchengine/nova3/engines/torlock.py @@ -0,0 +1,97 @@ +#VERSION: 2.0 +#AUTHORS: Douman (custparasite@gmx.se) +#CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es) + +from novaprinter import prettyPrinter +from helpers import retrieve_url, download_file +from re import compile as re_compile +from html.parser import HTMLParser + +class torlock(object): + url = "https://www.torlock.com" + name = "TorLock" + supported_categories = {'all' : 'all', + 'anime' : 'anime', + 'software' : 'software', + 'games' : 'game', + 'movies' : 'movie', + 'music' : 'music', + 'tv' : 'television', + 'books' : 'ebooks'} + + def download_torrent(self, info): + print(download_file(info)) + + class MyHtmlParser(HTMLParser): + """ Sub-class for parsing results """ + def __init__(self, url): + HTMLParser.__init__(self) + self.url = url + self.article_found = False #true when
with results is found + self.item_found = False + self.item_bad = False #set to True for malicious links + self.current_item = None #dict for found item + self.item_name = None #key's name in current_item dict + self.parser_class = {"ts" : "size", + "tul" : "seeds", + "tdl" : "leech"} + + def handle_starttag(self, tag, attrs): + params = dict(attrs) + if self.item_found: + if tag == "td": + if "class" in params: + self.item_name = self.parser_class.get(params["class"], None) + if self.item_name: + self.current_item[self.item_name] = "" + + elif self.article_found and tag == "a": + if "href" in params: + link = params["href"] + if link.startswith("/torrent"): + self.current_item["desc_link"] = "".join((self.url, link)) + self.current_item["link"] = "".join((self.url, "/tor/", link.split('/')[2], ".torrent")) + self.current_item["engine_url"] = self.url + self.item_found = True + self.item_name = "name" + self.current_item["name"] = "" + self.item_bad = "rel" in params and params["rel"] == "nofollow" + + elif tag == "article": + self.article_found = True + self.current_item = {} + + def handle_data(self, data): + if self.item_name: + self.current_item[self.item_name] += data + + def handle_endtag(self, tag): + if tag == "article": + self.article_found = False + elif self.item_name and (tag == "a" or tag == "td"): + self.item_name = None + elif self.item_found and tag == "tr": + self.item_found = False + if not self.item_bad: + prettyPrinter(self.current_item) + self.current_item = {} + + def search(self, query, cat='all'): + """ Performs search """ + query = query.replace("%20", "-") + + parser = self.MyHtmlParser(self.url) + page = "".join((self.url, "/", self.supported_categories[cat], "/torrents/", query, ".html?sort=seeds&page=1")) + html = retrieve_url(page) + parser.feed(html) + + counter = 1 + additional_pages = re_compile("/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+".format(self.supported_categories[cat], query)) + list_searches = additional_pages.findall(html)[:-1] #last link is next(i.e. second) + for page in map(lambda link: "".join((self.url, link)), list_searches): + html = retrieve_url(page) + parser.feed(html) + counter += 1 + if counter > 3: + break + parser.close() diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt index b32625719..66dd20848 100644 --- a/src/searchengine/nova3/engines/versions.txt +++ b/src/searchengine/nova3/engines/versions.txt @@ -5,5 +5,6 @@ kickasstorrents: 1.28 legittorrents: 2.00 mininova: 2.01 piratebay: 2.14 +torlock: 2.0 torrentreactor: 1.41 torrentz: 2.17