|
|
@ -1,4 +1,4 @@ |
|
|
|
#VERSION: 2.01 |
|
|
|
#VERSION: 2.10 |
|
|
|
#AUTHORS: Fabien Devaux (fab@gnux.info) |
|
|
|
#AUTHORS: Fabien Devaux (fab@gnux.info) |
|
|
|
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) |
|
|
|
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) |
|
|
|
# Arthur (custparasite@gmx.se) |
|
|
|
# Arthur (custparasite@gmx.se) |
|
|
@ -27,113 +27,149 @@ |
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
|
|
# POSSIBILITY OF SUCH DAMAGE. |
|
|
|
# POSSIBILITY OF SUCH DAMAGE. |
|
|
|
|
|
|
|
|
|
|
|
from novaprinter import prettyPrinter |
|
|
|
|
|
|
|
from HTMLParser import HTMLParser |
|
|
|
from HTMLParser import HTMLParser |
|
|
|
|
|
|
|
from httplib import HTTPSConnection as https |
|
|
|
|
|
|
|
#qBt |
|
|
|
|
|
|
|
from novaprinter import prettyPrinter |
|
|
|
from helpers import download_file |
|
|
|
from helpers import download_file |
|
|
|
import urllib2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PREVIOUS_IDS = set() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class piratebay(object): |
|
|
|
class piratebay(object): |
|
|
|
|
|
|
|
""" Search engine class """ |
|
|
|
url = 'https://thepiratebay.se' |
|
|
|
url = 'https://thepiratebay.se' |
|
|
|
name = 'The Pirate Bay' |
|
|
|
name = 'The Pirate Bay' |
|
|
|
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'} |
|
|
|
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'} |
|
|
|
|
|
|
|
|
|
|
|
def download_torrent(self, info): |
|
|
|
def download_torrent(self, info): |
|
|
|
|
|
|
|
""" Downloader """ |
|
|
|
print(download_file(info)) |
|
|
|
print(download_file(info)) |
|
|
|
|
|
|
|
|
|
|
|
class MyHtmlParseWithBlackJack(HTMLParser): |
|
|
|
class MyHtmlParseWithBlackJack(HTMLParser): |
|
|
|
def __init__(self, results, url): |
|
|
|
""" Parser class """ |
|
|
|
|
|
|
|
def __init__(self, list_searches, url): |
|
|
|
HTMLParser.__init__(self) |
|
|
|
HTMLParser.__init__(self) |
|
|
|
|
|
|
|
self.list_searches = list_searches |
|
|
|
self.url = url |
|
|
|
self.url = url |
|
|
|
self.results = results |
|
|
|
|
|
|
|
self.current_item = None |
|
|
|
self.current_item = None |
|
|
|
self.size_found = False |
|
|
|
self.save_item = None |
|
|
|
self.unit_found = False |
|
|
|
self.result_table = False #table with results is found |
|
|
|
self.seed_found = False |
|
|
|
self.result_tbody = False |
|
|
|
self.skip_td = False |
|
|
|
self.add_query = True |
|
|
|
self.leech_found = False |
|
|
|
self.result_query = False |
|
|
|
self.dispatcher = {'a' : self.handle_tag_a_ref, |
|
|
|
|
|
|
|
'font' : self.handle_tag_font_size, |
|
|
|
def handle_start_tag_default(self, attrs): |
|
|
|
'td' : self.handle_tag_td_sl } |
|
|
|
""" Default handler for start tag dispatcher """ |
|
|
|
|
|
|
|
pass |
|
|
|
def handle_tag_a_ref(self, attrs): |
|
|
|
|
|
|
|
|
|
|
|
def handle_start_tag_a(self, attrs): |
|
|
|
|
|
|
|
""" Handler for start tag a """ |
|
|
|
params = dict(attrs) |
|
|
|
params = dict(attrs) |
|
|
|
#1 |
|
|
|
link = params["href"] |
|
|
|
if params['href'].startswith('/torrent/'): |
|
|
|
if link.startswith("/torrent"): |
|
|
|
get_id = params['href'].split('/')[2] |
|
|
|
self.current_item["desc_link"] = "".join((self.url, link)) |
|
|
|
if not get_id in PREVIOUS_IDS: |
|
|
|
self.save_item = "name" |
|
|
|
self.current_item = {} |
|
|
|
elif link.startswith("magnet"): |
|
|
|
self.current_item['desc_link'] = self.url + params['href'].strip() |
|
|
|
self.current_item["link"] = link |
|
|
|
self.current_item['name'] = params['title'][12:].strip() |
|
|
|
|
|
|
|
self.current_item['id'] = get_id |
|
|
|
def handle_start_tag_font(self, attrs): |
|
|
|
#2 |
|
|
|
""" Handler for start tag font """ |
|
|
|
elif (not self.current_item is None) and (params['href'].startswith('magnet:')): |
|
|
|
for attr in attrs: |
|
|
|
self.current_item['link'] = params['href'].strip() |
|
|
|
if attr[1] == "detDesc": |
|
|
|
|
|
|
|
self.save_item = "size" |
|
|
|
def handle_tag_font_size(self, attrs): |
|
|
|
break |
|
|
|
if not self.current_item is None: |
|
|
|
|
|
|
|
params = dict(attrs) |
|
|
|
def handle_start_tag_td(self, attrs): |
|
|
|
#3 |
|
|
|
""" Handler for start tag td """ |
|
|
|
if params['class'] == "detDesc": |
|
|
|
for attr in attrs: |
|
|
|
self.size_found = True |
|
|
|
if attr[1] == "right": |
|
|
|
|
|
|
|
if "seeds" in self.current_item.keys(): |
|
|
|
def handle_tag_td_sl(self, attrs): |
|
|
|
self.save_item = "leech" |
|
|
|
if not self.current_item is None: |
|
|
|
|
|
|
|
params = dict(attrs) |
|
|
|
|
|
|
|
if not self.current_item is None: |
|
|
|
|
|
|
|
if self.seed_found: |
|
|
|
|
|
|
|
#5 |
|
|
|
|
|
|
|
self.current_item['leech'] = '' |
|
|
|
|
|
|
|
self.leech_found = True |
|
|
|
|
|
|
|
self.seed_found = False |
|
|
|
|
|
|
|
else: |
|
|
|
else: |
|
|
|
#4 |
|
|
|
self.save_item = "seeds" |
|
|
|
self.current_item['seeds'] = '' |
|
|
|
break |
|
|
|
self.seed_found = True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attrs): |
|
|
|
def handle_starttag(self, tag, attrs): |
|
|
|
if tag in self.dispatcher: |
|
|
|
""" Parser's start tag handler """ |
|
|
|
self.dispatcher[tag](attrs) |
|
|
|
if self.current_item: |
|
|
|
|
|
|
|
dispatcher = getattr(self, "_".join(("handle_start_tag", tag)), self.handle_start_tag_default) |
|
|
|
|
|
|
|
dispatcher(attrs) |
|
|
|
|
|
|
|
|
|
|
|
def handle_data(self, data): |
|
|
|
elif self.result_tbody: |
|
|
|
if not self.current_item is None: |
|
|
|
if tag == "tr": |
|
|
|
if self.size_found: |
|
|
|
self.current_item = {"engine_url" : self.url} |
|
|
|
#with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by'] |
|
|
|
|
|
|
|
temp = data.split() |
|
|
|
elif tag == "table": |
|
|
|
if 'Size' in temp: |
|
|
|
self.result_table = "searchResult" == attrs[0][1] |
|
|
|
sizeIn = temp.index('Size') |
|
|
|
|
|
|
|
self.current_item['size'] = temp[sizeIn + 1] |
|
|
|
elif self.add_query: |
|
|
|
self.size_found = False |
|
|
|
if self.result_query and tag == "a": |
|
|
|
self.unit_found = True |
|
|
|
if len(self.list_searches) < 10: |
|
|
|
elif self.unit_found: |
|
|
|
self.list_searches.append(attrs[0][1]) |
|
|
|
temp = data.split() |
|
|
|
else: |
|
|
|
self.current_item['size'] = ' '.join((self.current_item['size'], temp[0])) |
|
|
|
self.add_query = False |
|
|
|
self.unit_found = False |
|
|
|
self.result_query = False |
|
|
|
elif self.seed_found: |
|
|
|
elif tag == "div": |
|
|
|
self.current_item['seeds'] += data.rstrip() |
|
|
|
self.result_query = "center" == attrs[0][1] |
|
|
|
elif self.leech_found: |
|
|
|
|
|
|
|
self.current_item['leech'] += data.rstrip() |
|
|
|
|
|
|
|
self.current_item['engine_url'] = self.url |
|
|
|
def handle_endtag(self, tag): |
|
|
|
|
|
|
|
""" Parser's end tag handler """ |
|
|
|
|
|
|
|
if self.result_tbody: |
|
|
|
|
|
|
|
if tag == "tr": |
|
|
|
prettyPrinter(self.current_item) |
|
|
|
prettyPrinter(self.current_item) |
|
|
|
PREVIOUS_IDS.add(self.current_item['id']) |
|
|
|
|
|
|
|
self.results.append('a') |
|
|
|
|
|
|
|
self.current_item = None |
|
|
|
self.current_item = None |
|
|
|
self.size_found = False |
|
|
|
elif tag == "font": |
|
|
|
self.unit_found = False |
|
|
|
self.save_item = None |
|
|
|
self.seed_found = False |
|
|
|
elif tag == "table": |
|
|
|
self.leech_found = False |
|
|
|
self.result_table = self.result_tbody = False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif self.result_table: |
|
|
|
|
|
|
|
if tag == "thead": |
|
|
|
|
|
|
|
self.result_tbody = True |
|
|
|
|
|
|
|
elif tag == "table": |
|
|
|
|
|
|
|
self.result_table = self.result_tbody = False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif self.add_query and self.result_query: |
|
|
|
|
|
|
|
if tag == "div": |
|
|
|
|
|
|
|
self.add_query = self.result_query = False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_data(self, data): |
|
|
|
|
|
|
|
""" Parser's data handler """ |
|
|
|
|
|
|
|
if self.save_item == "size": |
|
|
|
|
|
|
|
temp_data = data.split() |
|
|
|
|
|
|
|
if "Size" in temp_data: |
|
|
|
|
|
|
|
self.current_item[self.save_item] = temp_data[2] |
|
|
|
|
|
|
|
elif "ULed" in temp_data: |
|
|
|
|
|
|
|
temp_string = self.current_item[self.save_item] |
|
|
|
|
|
|
|
self.current_item[self.save_item] = " ".join((temp_string, temp_data[0][:-1])) |
|
|
|
|
|
|
|
elif self.save_item: |
|
|
|
|
|
|
|
self.current_item[self.save_item] = data |
|
|
|
|
|
|
|
self.save_item = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def search(self, what, cat='all'): |
|
|
|
def search(self, what, cat='all'): |
|
|
|
ret = [] |
|
|
|
""" Performs search """ |
|
|
|
i = 0 |
|
|
|
connection = https("thepiratebay.se") |
|
|
|
while i < 11: |
|
|
|
|
|
|
|
results = [] |
|
|
|
#prepare query. 7 is filtering by seeders |
|
|
|
parser = self.MyHtmlParseWithBlackJack(results, self.url) |
|
|
|
cat = cat.lower() |
|
|
|
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat]) |
|
|
|
query = "/".join(("/search", what, "0", "7", self.supported_categories[cat])) |
|
|
|
dat = urllib2.urlopen(query) |
|
|
|
|
|
|
|
parser.feed(dat.read().decode('utf-8')) |
|
|
|
connection.request("GET", query) |
|
|
|
|
|
|
|
response = connection.getresponse() |
|
|
|
|
|
|
|
if response.status != 200: |
|
|
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
list_searches = [] |
|
|
|
|
|
|
|
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) |
|
|
|
|
|
|
|
parser.feed(response.read().decode('utf-8')) |
|
|
|
|
|
|
|
parser.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parser.add_query = False |
|
|
|
|
|
|
|
for search_query in list_searches: |
|
|
|
|
|
|
|
connection.request("GET", search_query) |
|
|
|
|
|
|
|
response = connection.getresponse() |
|
|
|
|
|
|
|
parser.feed(response.read().decode('utf-8')) |
|
|
|
parser.close() |
|
|
|
parser.close() |
|
|
|
if len(results) <= 0: |
|
|
|
|
|
|
|
break |
|
|
|
connection.close() |
|
|
|
i += 1 |
|
|
|
return |
|
|
|