mirror of
https://github.com/d47081/qBittorrent.git
synced 2025-01-25 22:14:32 +00:00
Pirate bay search engine update
This commit is contained in:
parent
0e0e8f7c27
commit
a62e30ea88
@ -1,6 +1,7 @@
|
||||
#VERSION: 1.53
|
||||
#VERSION: 2.00
|
||||
#AUTHORS: Fabien Devaux (fab@gnux.info)
|
||||
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
# Arthur (custparasite@gmx.se)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
@ -27,94 +28,112 @@
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from novaprinter import prettyPrinter
|
||||
import sgmllib
|
||||
from helpers import retrieve_url, download_file
|
||||
from HTMLParser import HTMLParser
|
||||
from helpers import download_file
|
||||
import urllib2
|
||||
|
||||
PREVIOUS_IDS = set()
|
||||
|
||||
class piratebay(object):
|
||||
url = 'https://thepiratebay.se'
|
||||
name = 'The Pirate Bay'
|
||||
supported_categories = {'all': '0', 'movies': '200', 'music': '100', 'games': '400', 'software': '300'}
|
||||
url = 'http://thepiratebay.se'
|
||||
name = 'The Pirate Bay'
|
||||
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
def download_torrent(self, info):
|
||||
print(download_file(info))
|
||||
|
||||
def download_torrent(self, info):
|
||||
print download_file(info)
|
||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||
def __init__(self, results, url):
|
||||
HTMLParser.__init__(self)
|
||||
self.url = url
|
||||
self.results = results
|
||||
self.current_item = None
|
||||
self.size_found = False
|
||||
self.unit_found = False
|
||||
self.seed_found = False
|
||||
self.skip_td = False
|
||||
self.leech_found = False
|
||||
self.dispatcher = {'a' : self.handle_tag_a_ref,
|
||||
'font' : self.handle_tag_font_size,
|
||||
'td' : self.handle_tag_td_sl }
|
||||
|
||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
self.td_counter = None
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.url = url
|
||||
self.code = 0
|
||||
self.in_name = None
|
||||
def handle_tag_a_ref(self, attrs):
|
||||
params = dict(attrs)
|
||||
#1
|
||||
if params['href'].startswith('/torrent/'):
|
||||
get_id = params['href'].split('/')[2]
|
||||
if not get_id in PREVIOUS_IDS:
|
||||
self.current_item = {}
|
||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
||||
self.current_item['name'] = params['title'][12:].strip()
|
||||
self.current_item['id'] = get_id
|
||||
#2
|
||||
elif (not self.current_item is None) and (params['href'].startswith('magnet:')):
|
||||
self.current_item['link'] = params['href'].strip()
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
if params['href'].startswith('/torrent/'):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
||||
self.in_name = True
|
||||
self.current_item['id'] = params['href'].split('/')[2]
|
||||
elif params['href'].startswith('magnet:'):
|
||||
self.current_item['link']=params['href'].strip()
|
||||
self.in_name = False
|
||||
def handle_tag_font_size(self, attrs):
|
||||
if not self.current_item is None:
|
||||
params = dict(attrs)
|
||||
#3
|
||||
if params['class'] == "detDesc":
|
||||
self.size_found = True
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 0:
|
||||
if self.in_name:
|
||||
if not self.current_item.has_key('name'):
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data.strip()
|
||||
else:
|
||||
#Parse size
|
||||
if 'Size' in data:
|
||||
self.current_item['size'] = data[data.index("Size")+5:]
|
||||
self.current_item['size'] = self.current_item['size'][:self.current_item['size'].index(',')]
|
||||
elif self.td_counter == 1:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 2:
|
||||
if not self.current_item.has_key('leech'):
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
def handle_tag_td_sl(self, attrs):
|
||||
if not self.current_item is None:
|
||||
params = dict(attrs)
|
||||
if not self.current_item is None:
|
||||
if self.seed_found:
|
||||
#5
|
||||
self.current_item['leech'] = ''
|
||||
self.leech_found = True
|
||||
self.seed_found = False
|
||||
else:
|
||||
#4
|
||||
self.current_item['seeds'] = ''
|
||||
self.seed_found = True
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 3:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
if self.current_item:
|
||||
if self.current_item['id'] in PREVIOUS_IDS:
|
||||
self.results = []
|
||||
self.reset()
|
||||
return
|
||||
self.current_item['engine_url'] = self.url
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
PREVIOUS_IDS.add(self.current_item['id'])
|
||||
self.results.append('a')
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 0
|
||||
order = 'se'
|
||||
while True and i<11:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = retrieve_url(self.url+'/search/%s/%d/7/%s' % (what, i, self.supported_categories[cat]))
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag in self.dispatcher:
|
||||
self.dispatcher[tag](attrs)
|
||||
|
||||
def handle_data(self, data):
|
||||
if not self.current_item is None:
|
||||
if self.size_found:
|
||||
#with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
|
||||
temp = data.split()
|
||||
if 'Size' in temp:
|
||||
sizeIn = temp.index('Size')
|
||||
self.current_item['size'] = temp[sizeIn + 1]
|
||||
self.size_found = False
|
||||
self.unit_found = True
|
||||
elif self.unit_found:
|
||||
temp = data.split()
|
||||
self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
|
||||
self.unit_found = False
|
||||
elif self.seed_found:
|
||||
self.current_item['seeds'] += data.rstrip()
|
||||
elif self.leech_found:
|
||||
self.current_item['leech'] += data.rstrip()
|
||||
self.current_item['engine_url'] = self.url
|
||||
prettyPrinter(self.current_item)
|
||||
PREVIOUS_IDS.add(self.current_item['id'])
|
||||
self.results.append('a')
|
||||
self.current_item = None
|
||||
self.size_found = False
|
||||
self.unit_found = False
|
||||
self.seed_found = False
|
||||
self.leech_found = False
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 0
|
||||
while i < 11:
|
||||
results = []
|
||||
parser = self.MyHtmlParseWithBlackJack(results, self.url)
|
||||
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat])
|
||||
dat = urllib2.urlopen(query)
|
||||
parser.feed(dat.read().decode('utf-8'))
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
@ -1,6 +1,6 @@
|
||||
torrentreactor: 1.33
|
||||
mininova: 1.50
|
||||
piratebay: 1.53
|
||||
piratebay: 2.00
|
||||
vertor: 1.3
|
||||
extratorrent: 1.2
|
||||
kickasstorrents: 1.24
|
||||
|
@ -1,6 +1,7 @@
|
||||
#VERSION: 1.53
|
||||
#VERSION: 2.00
|
||||
#AUTHORS: Fabien Devaux (fab@gnux.info)
|
||||
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
# Arthur (custparasite@gmx.se)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
@ -27,94 +28,112 @@
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from novaprinter import prettyPrinter
|
||||
import sgmllib3
|
||||
from helpers import retrieve_url, download_file
|
||||
from html.parser import HTMLParser
|
||||
from helpers import download_file
|
||||
import urllib.request
|
||||
|
||||
PREVIOUS_IDS = set()
|
||||
|
||||
class piratebay(object):
|
||||
url = 'https://thepiratebay.se'
|
||||
name = 'The Pirate Bay'
|
||||
supported_categories = {'all': '0', 'movies': '200', 'music': '100', 'games': '400', 'software': '300'}
|
||||
url = 'http://thepiratebay.se'
|
||||
name = 'The Pirate Bay'
|
||||
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
def download_torrent(self, info):
|
||||
print(download_file(info))
|
||||
|
||||
def download_torrent(self, info):
|
||||
print(download_file(info))
|
||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||
def __init__(self, results, url):
|
||||
super().__init__()
|
||||
self.url = url
|
||||
self.results = results
|
||||
self.current_item = None
|
||||
self.size_found = False
|
||||
self.unit_found = False
|
||||
self.seed_found = False
|
||||
self.skip_td = False
|
||||
self.leech_found = False
|
||||
self.dispatcher = {'a' : self.handle_tag_a_ref,
|
||||
'font' : self.handle_tag_font_size,
|
||||
'td' : self.handle_tag_td_sl }
|
||||
|
||||
class SimpleSGMLParser(sgmllib3.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib3.SGMLParser.__init__(self)
|
||||
self.td_counter = None
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.url = url
|
||||
self.code = 0
|
||||
self.in_name = None
|
||||
def handle_tag_a_ref(self, attrs):
|
||||
params = dict(attrs)
|
||||
#1
|
||||
if params['href'].startswith('/torrent/'):
|
||||
get_id = params['href'].split('/')[2]
|
||||
if not get_id in PREVIOUS_IDS:
|
||||
self.current_item = {}
|
||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
||||
self.current_item['name'] = params['title'][12:].strip()
|
||||
self.current_item['id'] = get_id
|
||||
#2
|
||||
elif (not self.current_item is None) and (params['href'].startswith('magnet:')):
|
||||
self.current_item['link'] = params['href'].strip()
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
if params['href'].startswith('/torrent/'):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
||||
self.in_name = True
|
||||
self.current_item['id'] = params['href'].split('/')[2]
|
||||
elif params['href'].startswith('magnet:'):
|
||||
self.current_item['link']=params['href'].strip()
|
||||
self.in_name = False
|
||||
def handle_tag_font_size(self, attrs):
|
||||
if not self.current_item is None:
|
||||
params = dict(attrs)
|
||||
#3
|
||||
if params['class'] == "detDesc":
|
||||
self.size_found = True
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 0:
|
||||
if self.in_name:
|
||||
if 'name' not in self.current_item:
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data.strip()
|
||||
else:
|
||||
#Parse size
|
||||
if 'Size' in data:
|
||||
self.current_item['size'] = data[data.index("Size")+5:]
|
||||
self.current_item['size'] = self.current_item['size'][:self.current_item['size'].index(',')]
|
||||
elif self.td_counter == 1:
|
||||
if 'seeds' not in self.current_item:
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 2:
|
||||
if 'leech' not in self.current_item:
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
def handle_tag_td_sl(self, attrs):
|
||||
if not self.current_item is None:
|
||||
params = dict(attrs)
|
||||
if not self.current_item is None:
|
||||
if self.seed_found:
|
||||
#5
|
||||
self.current_item['leech'] = ''
|
||||
self.leech_found = True
|
||||
self.seed_found = False
|
||||
else:
|
||||
#4
|
||||
self.current_item['seeds'] = ''
|
||||
self.seed_found = True
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 3:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
if self.current_item:
|
||||
if self.current_item['id'] in PREVIOUS_IDS:
|
||||
self.results = []
|
||||
self.reset()
|
||||
return
|
||||
self.current_item['engine_url'] = self.url
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
PREVIOUS_IDS.add(self.current_item['id'])
|
||||
self.results.append('a')
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 0
|
||||
order = 'se'
|
||||
while True and i<11:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = retrieve_url(self.url+'/search/%s/%d/7/%s' % (what, i, self.supported_categories[cat]))
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag in self.dispatcher:
|
||||
self.dispatcher[tag](attrs)
|
||||
|
||||
def handle_data(self, data):
|
||||
if not self.current_item is None:
|
||||
if self.size_found:
|
||||
#with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
|
||||
temp = data.split()
|
||||
if 'Size' in temp:
|
||||
sizeIn = temp.index('Size')
|
||||
self.current_item['size'] = temp[sizeIn + 1]
|
||||
self.size_found = False
|
||||
self.unit_found = True
|
||||
elif self.unit_found:
|
||||
temp = data.split()
|
||||
self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
|
||||
self.unit_found = False
|
||||
elif self.seed_found:
|
||||
self.current_item['seeds'] += data.rstrip()
|
||||
elif self.leech_found:
|
||||
self.current_item['leech'] += data.rstrip()
|
||||
self.current_item['engine_url'] = self.url
|
||||
prettyPrinter(self.current_item)
|
||||
PREVIOUS_IDS.add(self.current_item['id'])
|
||||
self.results.append('a')
|
||||
self.current_item = None
|
||||
self.size_found = False
|
||||
self.unit_found = False
|
||||
self.seed_found = False
|
||||
self.leech_found = False
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 0
|
||||
while i < 11:
|
||||
results = []
|
||||
parser = self.MyHtmlParseWithBlackJack(results, self.url)
|
||||
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat])
|
||||
dat = urllib.request.urlopen(query)
|
||||
parser.feed(dat.read().decode('utf-8'))
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
@ -1,6 +1,6 @@
|
||||
torrentreactor: 1.33
|
||||
mininova: 1.50
|
||||
piratebay: 1.53
|
||||
piratebay: 2.00
|
||||
vertor: 1.3
|
||||
extratorrent: 1.2
|
||||
kickasstorrents: 1.24
|
||||
|
Loading…
x
Reference in New Issue
Block a user