mirror of
https://github.com/d47081/qBittorrent.git
synced 2025-01-25 22:14:32 +00:00
[searchengine] Fix piratebay. Closes #2270
This commit is contained in:
parent
b7898cccd0
commit
503626bde8
@ -1,4 +1,4 @@
|
|||||||
#VERSION: 2.01
|
#VERSION: 2.10
|
||||||
#AUTHORS: Fabien Devaux (fab@gnux.info)
|
#AUTHORS: Fabien Devaux (fab@gnux.info)
|
||||||
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
||||||
# Arthur (custparasite@gmx.se)
|
# Arthur (custparasite@gmx.se)
|
||||||
@ -27,113 +27,149 @@
|
|||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
from novaprinter import prettyPrinter
|
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
|
from httplib import HTTPSConnection as https
|
||||||
|
#qBt
|
||||||
|
from novaprinter import prettyPrinter
|
||||||
from helpers import download_file
|
from helpers import download_file
|
||||||
import urllib2
|
|
||||||
|
|
||||||
PREVIOUS_IDS = set()
|
|
||||||
|
|
||||||
class piratebay(object):
|
class piratebay(object):
|
||||||
|
""" Search engine class """
|
||||||
url = 'https://thepiratebay.se'
|
url = 'https://thepiratebay.se'
|
||||||
name = 'The Pirate Bay'
|
name = 'The Pirate Bay'
|
||||||
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
|
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
|
""" Downloader """
|
||||||
print(download_file(info))
|
print(download_file(info))
|
||||||
|
|
||||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||||
def __init__(self, results, url):
|
""" Parser class """
|
||||||
|
def __init__(self, list_searches, url):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
|
self.list_searches = list_searches
|
||||||
self.url = url
|
self.url = url
|
||||||
self.results = results
|
|
||||||
self.current_item = None
|
self.current_item = None
|
||||||
self.size_found = False
|
self.save_item = None
|
||||||
self.unit_found = False
|
self.result_table = False #table with results is found
|
||||||
self.seed_found = False
|
self.result_tbody = False
|
||||||
self.skip_td = False
|
self.add_query = True
|
||||||
self.leech_found = False
|
self.result_query = False
|
||||||
self.dispatcher = {'a' : self.handle_tag_a_ref,
|
|
||||||
'font' : self.handle_tag_font_size,
|
|
||||||
'td' : self.handle_tag_td_sl }
|
|
||||||
|
|
||||||
def handle_tag_a_ref(self, attrs):
|
def handle_start_tag_default(self, attrs):
|
||||||
params = dict(attrs)
|
""" Default handler for start tag dispatcher """
|
||||||
#1
|
pass
|
||||||
if params['href'].startswith('/torrent/'):
|
|
||||||
get_id = params['href'].split('/')[2]
|
|
||||||
if not get_id in PREVIOUS_IDS:
|
|
||||||
self.current_item = {}
|
|
||||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
|
||||||
self.current_item['name'] = params['title'][12:].strip()
|
|
||||||
self.current_item['id'] = get_id
|
|
||||||
#2
|
|
||||||
elif (not self.current_item is None) and (params['href'].startswith('magnet:')):
|
|
||||||
self.current_item['link'] = params['href'].strip()
|
|
||||||
|
|
||||||
def handle_tag_font_size(self, attrs):
|
def handle_start_tag_a(self, attrs):
|
||||||
if not self.current_item is None:
|
""" Handler for start tag a """
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
#3
|
link = params["href"]
|
||||||
if params['class'] == "detDesc":
|
if link.startswith("/torrent"):
|
||||||
self.size_found = True
|
self.current_item["desc_link"] = "".join((self.url, link))
|
||||||
|
self.save_item = "name"
|
||||||
|
elif link.startswith("magnet"):
|
||||||
|
self.current_item["link"] = link
|
||||||
|
|
||||||
def handle_tag_td_sl(self, attrs):
|
def handle_start_tag_font(self, attrs):
|
||||||
if not self.current_item is None:
|
""" Handler for start tag font """
|
||||||
params = dict(attrs)
|
for attr in attrs:
|
||||||
if not self.current_item is None:
|
if attr[1] == "detDesc":
|
||||||
if self.seed_found:
|
self.save_item = "size"
|
||||||
#5
|
break
|
||||||
self.current_item['leech'] = ''
|
|
||||||
self.leech_found = True
|
def handle_start_tag_td(self, attrs):
|
||||||
self.seed_found = False
|
""" Handler for start tag td """
|
||||||
|
for attr in attrs:
|
||||||
|
if attr[1] == "right":
|
||||||
|
if "seeds" in self.current_item.keys():
|
||||||
|
self.save_item = "leech"
|
||||||
else:
|
else:
|
||||||
#4
|
self.save_item = "seeds"
|
||||||
self.current_item['seeds'] = ''
|
break
|
||||||
self.seed_found = True
|
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if tag in self.dispatcher:
|
""" Parser's start tag handler """
|
||||||
self.dispatcher[tag](attrs)
|
if self.current_item:
|
||||||
|
dispatcher = getattr(self, "_".join(("handle_start_tag", tag)), self.handle_start_tag_default)
|
||||||
|
dispatcher(attrs)
|
||||||
|
|
||||||
|
elif self.result_tbody:
|
||||||
|
if tag == "tr":
|
||||||
|
self.current_item = {"engine_url" : self.url}
|
||||||
|
|
||||||
|
elif tag == "table":
|
||||||
|
self.result_table = "searchResult" == attrs[0][1]
|
||||||
|
|
||||||
|
elif self.add_query:
|
||||||
|
if self.result_query and tag == "a":
|
||||||
|
if len(self.list_searches) < 10:
|
||||||
|
self.list_searches.append(attrs[0][1])
|
||||||
|
else:
|
||||||
|
self.add_query = False
|
||||||
|
self.result_query = False
|
||||||
|
elif tag == "div":
|
||||||
|
self.result_query = "center" == attrs[0][1]
|
||||||
|
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
""" Parser's end tag handler """
|
||||||
|
if self.result_tbody:
|
||||||
|
if tag == "tr":
|
||||||
|
prettyPrinter(self.current_item)
|
||||||
|
self.current_item = None
|
||||||
|
elif tag == "font":
|
||||||
|
self.save_item = None
|
||||||
|
elif tag == "table":
|
||||||
|
self.result_table = self.result_tbody = False
|
||||||
|
|
||||||
|
elif self.result_table:
|
||||||
|
if tag == "thead":
|
||||||
|
self.result_tbody = True
|
||||||
|
elif tag == "table":
|
||||||
|
self.result_table = self.result_tbody = False
|
||||||
|
|
||||||
|
elif self.add_query and self.result_query:
|
||||||
|
if tag == "div":
|
||||||
|
self.add_query = self.result_query = False
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if not self.current_item is None:
|
""" Parser's data handler """
|
||||||
if self.size_found:
|
if self.save_item == "size":
|
||||||
#with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
|
temp_data = data.split()
|
||||||
temp = data.split()
|
if "Size" in temp_data:
|
||||||
if 'Size' in temp:
|
self.current_item[self.save_item] = temp_data[2]
|
||||||
sizeIn = temp.index('Size')
|
elif "ULed" in temp_data:
|
||||||
self.current_item['size'] = temp[sizeIn + 1]
|
temp_string = self.current_item[self.save_item]
|
||||||
self.size_found = False
|
self.current_item[self.save_item] = " ".join((temp_string, temp_data[0][:-1]))
|
||||||
self.unit_found = True
|
elif self.save_item:
|
||||||
elif self.unit_found:
|
self.current_item[self.save_item] = data
|
||||||
temp = data.split()
|
self.save_item = None
|
||||||
self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
|
|
||||||
self.unit_found = False
|
|
||||||
elif self.seed_found:
|
|
||||||
self.current_item['seeds'] += data.rstrip()
|
|
||||||
elif self.leech_found:
|
|
||||||
self.current_item['leech'] += data.rstrip()
|
|
||||||
self.current_item['engine_url'] = self.url
|
|
||||||
prettyPrinter(self.current_item)
|
|
||||||
PREVIOUS_IDS.add(self.current_item['id'])
|
|
||||||
self.results.append('a')
|
|
||||||
self.current_item = None
|
|
||||||
self.size_found = False
|
|
||||||
self.unit_found = False
|
|
||||||
self.seed_found = False
|
|
||||||
self.leech_found = False
|
|
||||||
|
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
ret = []
|
""" Performs search """
|
||||||
i = 0
|
connection = https("thepiratebay.se")
|
||||||
while i < 11:
|
|
||||||
results = []
|
#prepare query. 7 is filtering by seeders
|
||||||
parser = self.MyHtmlParseWithBlackJack(results, self.url)
|
cat = cat.lower()
|
||||||
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat])
|
query = "/".join(("/search", what, "0", "7", self.supported_categories[cat]))
|
||||||
dat = urllib2.urlopen(query)
|
|
||||||
parser.feed(dat.read().decode('utf-8'))
|
connection.request("GET", query)
|
||||||
|
response = connection.getresponse()
|
||||||
|
if response.status != 200:
|
||||||
|
return
|
||||||
|
|
||||||
|
list_searches = []
|
||||||
|
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
|
||||||
|
parser.feed(response.read().decode('utf-8'))
|
||||||
parser.close()
|
parser.close()
|
||||||
if len(results) <= 0:
|
|
||||||
break
|
parser.add_query = False
|
||||||
i += 1
|
for search_query in list_searches:
|
||||||
|
connection.request("GET", search_query)
|
||||||
|
response = connection.getresponse()
|
||||||
|
parser.feed(response.read().decode('utf-8'))
|
||||||
|
parser.close()
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
return
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
torrentreactor: 1.33
|
torrentreactor: 1.33
|
||||||
mininova: 1.51
|
mininova: 1.51
|
||||||
piratebay: 2.01
|
piratebay: 2.10
|
||||||
extratorrent: 1.2
|
extratorrent: 1.2
|
||||||
kickasstorrents: 1.25
|
kickasstorrents: 1.25
|
||||||
btdigg: 1.23
|
btdigg: 1.23
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#VERSION: 2.01
|
#VERSION: 2.10
|
||||||
#AUTHORS: Fabien Devaux (fab@gnux.info)
|
#AUTHORS: Fabien Devaux (fab@gnux.info)
|
||||||
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
||||||
# Arthur (custparasite@gmx.se)
|
# Arthur (custparasite@gmx.se)
|
||||||
@ -27,113 +27,149 @@
|
|||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
from novaprinter import prettyPrinter
|
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
from http.client import HTTPSConnection as https
|
||||||
|
#qBt
|
||||||
|
from novaprinter import prettyPrinter
|
||||||
from helpers import download_file
|
from helpers import download_file
|
||||||
import urllib.request
|
|
||||||
|
|
||||||
PREVIOUS_IDS = set()
|
|
||||||
|
|
||||||
class piratebay(object):
|
class piratebay(object):
|
||||||
|
""" Search engine class """
|
||||||
url = 'https://thepiratebay.se'
|
url = 'https://thepiratebay.se'
|
||||||
name = 'The Pirate Bay'
|
name = 'The Pirate Bay'
|
||||||
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
|
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
|
||||||
|
|
||||||
def download_torrent(self, info):
|
def download_torrent(self, info):
|
||||||
|
""" Downloader """
|
||||||
print(download_file(info))
|
print(download_file(info))
|
||||||
|
|
||||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||||
def __init__(self, results, url):
|
""" Parser class """
|
||||||
super().__init__()
|
def __init__(self, list_searches, url):
|
||||||
|
HTMLParser.__init__(self)
|
||||||
|
self.list_searches = list_searches
|
||||||
self.url = url
|
self.url = url
|
||||||
self.results = results
|
|
||||||
self.current_item = None
|
self.current_item = None
|
||||||
self.size_found = False
|
self.save_item = None
|
||||||
self.unit_found = False
|
self.result_table = False #table with results is found
|
||||||
self.seed_found = False
|
self.result_tbody = False
|
||||||
self.skip_td = False
|
self.add_query = True
|
||||||
self.leech_found = False
|
self.result_query = False
|
||||||
self.dispatcher = {'a' : self.handle_tag_a_ref,
|
|
||||||
'font' : self.handle_tag_font_size,
|
|
||||||
'td' : self.handle_tag_td_sl }
|
|
||||||
|
|
||||||
def handle_tag_a_ref(self, attrs):
|
def handle_start_tag_default(self, attrs):
|
||||||
params = dict(attrs)
|
""" Default handler for start tag dispatcher """
|
||||||
#1
|
pass
|
||||||
if params['href'].startswith('/torrent/'):
|
|
||||||
get_id = params['href'].split('/')[2]
|
|
||||||
if not get_id in PREVIOUS_IDS:
|
|
||||||
self.current_item = {}
|
|
||||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
|
||||||
self.current_item['name'] = params['title'][12:].strip()
|
|
||||||
self.current_item['id'] = get_id
|
|
||||||
#2
|
|
||||||
elif (not self.current_item is None) and (params['href'].startswith('magnet:')):
|
|
||||||
self.current_item['link'] = params['href'].strip()
|
|
||||||
|
|
||||||
def handle_tag_font_size(self, attrs):
|
def handle_start_tag_a(self, attrs):
|
||||||
if not self.current_item is None:
|
""" Handler for start tag a """
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
#3
|
link = params["href"]
|
||||||
if params['class'] == "detDesc":
|
if link.startswith("/torrent"):
|
||||||
self.size_found = True
|
self.current_item["desc_link"] = "".join((self.url, link))
|
||||||
|
self.save_item = "name"
|
||||||
|
elif link.startswith("magnet"):
|
||||||
|
self.current_item["link"] = link
|
||||||
|
|
||||||
def handle_tag_td_sl(self, attrs):
|
def handle_start_tag_font(self, attrs):
|
||||||
if not self.current_item is None:
|
""" Handler for start tag font """
|
||||||
params = dict(attrs)
|
for attr in attrs:
|
||||||
if not self.current_item is None:
|
if attr[1] == "detDesc":
|
||||||
if self.seed_found:
|
self.save_item = "size"
|
||||||
#5
|
break
|
||||||
self.current_item['leech'] = ''
|
|
||||||
self.leech_found = True
|
def handle_start_tag_td(self, attrs):
|
||||||
self.seed_found = False
|
""" Handler for start tag td """
|
||||||
|
for attr in attrs:
|
||||||
|
if attr[1] == "right":
|
||||||
|
if "seeds" in self.current_item.keys():
|
||||||
|
self.save_item = "leech"
|
||||||
else:
|
else:
|
||||||
#4
|
self.save_item = "seeds"
|
||||||
self.current_item['seeds'] = ''
|
break
|
||||||
self.seed_found = True
|
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if tag in self.dispatcher:
|
""" Parser's start tag handler """
|
||||||
self.dispatcher[tag](attrs)
|
if self.current_item:
|
||||||
|
dispatcher = getattr(self, "_".join(("handle_start_tag", tag)), self.handle_start_tag_default)
|
||||||
|
dispatcher(attrs)
|
||||||
|
|
||||||
|
elif self.result_tbody:
|
||||||
|
if tag == "tr":
|
||||||
|
self.current_item = {"engine_url" : self.url}
|
||||||
|
|
||||||
|
elif tag == "table":
|
||||||
|
self.result_table = "searchResult" == attrs[0][1]
|
||||||
|
|
||||||
|
elif self.add_query:
|
||||||
|
if self.result_query and tag == "a":
|
||||||
|
if len(self.list_searches) < 10:
|
||||||
|
self.list_searches.append(attrs[0][1])
|
||||||
|
else:
|
||||||
|
self.add_query = False
|
||||||
|
self.result_query = False
|
||||||
|
elif tag == "div":
|
||||||
|
self.result_query = "center" == attrs[0][1]
|
||||||
|
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
""" Parser's end tag handler """
|
||||||
|
if self.result_tbody:
|
||||||
|
if tag == "tr":
|
||||||
|
prettyPrinter(self.current_item)
|
||||||
|
self.current_item = None
|
||||||
|
elif tag == "font":
|
||||||
|
self.save_item = None
|
||||||
|
elif tag == "table":
|
||||||
|
self.result_table = self.result_tbody = False
|
||||||
|
|
||||||
|
elif self.result_table:
|
||||||
|
if tag == "thead":
|
||||||
|
self.result_tbody = True
|
||||||
|
elif tag == "table":
|
||||||
|
self.result_table = self.result_tbody = False
|
||||||
|
|
||||||
|
elif self.add_query and self.result_query:
|
||||||
|
if tag == "div":
|
||||||
|
self.add_query = self.result_query = False
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if not self.current_item is None:
|
""" Parser's data handler """
|
||||||
if self.size_found:
|
if self.save_item == "size":
|
||||||
#with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
|
temp_data = data.split()
|
||||||
temp = data.split()
|
if "Size" in temp_data:
|
||||||
if 'Size' in temp:
|
self.current_item[self.save_item] = temp_data[2]
|
||||||
sizeIn = temp.index('Size')
|
elif "ULed" in temp_data:
|
||||||
self.current_item['size'] = temp[sizeIn + 1]
|
temp_string = self.current_item[self.save_item]
|
||||||
self.size_found = False
|
self.current_item[self.save_item] = " ".join((temp_string, temp_data[0][:-1]))
|
||||||
self.unit_found = True
|
elif self.save_item:
|
||||||
elif self.unit_found:
|
self.current_item[self.save_item] = data
|
||||||
temp = data.split()
|
self.save_item = None
|
||||||
self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
|
|
||||||
self.unit_found = False
|
|
||||||
elif self.seed_found:
|
|
||||||
self.current_item['seeds'] += data.rstrip()
|
|
||||||
elif self.leech_found:
|
|
||||||
self.current_item['leech'] += data.rstrip()
|
|
||||||
self.current_item['engine_url'] = self.url
|
|
||||||
prettyPrinter(self.current_item)
|
|
||||||
PREVIOUS_IDS.add(self.current_item['id'])
|
|
||||||
self.results.append('a')
|
|
||||||
self.current_item = None
|
|
||||||
self.size_found = False
|
|
||||||
self.unit_found = False
|
|
||||||
self.seed_found = False
|
|
||||||
self.leech_found = False
|
|
||||||
|
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
ret = []
|
""" Performs search """
|
||||||
i = 0
|
connection = https("thepiratebay.se")
|
||||||
while i < 11:
|
|
||||||
results = []
|
#prepare query. 7 is filtering by seeders
|
||||||
parser = self.MyHtmlParseWithBlackJack(results, self.url)
|
cat = cat.lower()
|
||||||
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat])
|
query = "/".join(("/search", what, "0", "7", self.supported_categories[cat]))
|
||||||
dat = urllib.request.urlopen(query)
|
|
||||||
parser.feed(dat.read().decode('utf-8'))
|
connection.request("GET", query)
|
||||||
|
response = connection.getresponse()
|
||||||
|
if response.status != 200:
|
||||||
|
return
|
||||||
|
|
||||||
|
list_searches = []
|
||||||
|
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
|
||||||
|
parser.feed(response.read().decode('utf-8'))
|
||||||
parser.close()
|
parser.close()
|
||||||
if len(results) <= 0:
|
|
||||||
break
|
parser.add_query = False
|
||||||
i += 1
|
for search_query in list_searches:
|
||||||
|
connection.request("GET", search_query)
|
||||||
|
response = connection.getresponse()
|
||||||
|
parser.feed(response.read().decode('utf-8'))
|
||||||
|
parser.close()
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
return
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
torrentreactor: 1.33
|
torrentreactor: 1.33
|
||||||
mininova: 1.51
|
mininova: 1.51
|
||||||
piratebay: 2.01
|
piratebay: 2.10
|
||||||
extratorrent: 1.2
|
extratorrent: 1.2
|
||||||
kickasstorrents: 1.25
|
kickasstorrents: 1.25
|
||||||
btdigg: 1.23
|
btdigg: 1.23
|
||||||
|
Loading…
x
Reference in New Issue
Block a user