|
|
@ -1,6 +1,7 @@ |
|
|
|
#VERSION: 1.53 |
|
|
|
#VERSION: 2.00 |
|
|
|
#AUTHORS: Fabien Devaux (fab@gnux.info) |
|
|
|
#AUTHORS: Fabien Devaux (fab@gnux.info) |
|
|
|
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) |
|
|
|
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) |
|
|
|
|
|
|
|
# Arthur (custparasite@gmx.se) |
|
|
|
|
|
|
|
|
|
|
|
# Redistribution and use in source and binary forms, with or without |
|
|
|
# Redistribution and use in source and binary forms, with or without |
|
|
|
# modification, are permitted provided that the following conditions are met: |
|
|
|
# modification, are permitted provided that the following conditions are met: |
|
|
@ -27,93 +28,111 @@ |
|
|
|
# POSSIBILITY OF SUCH DAMAGE. |
|
|
|
# POSSIBILITY OF SUCH DAMAGE. |
|
|
|
|
|
|
|
|
|
|
|
from novaprinter import prettyPrinter |
|
|
|
from novaprinter import prettyPrinter |
|
|
|
import sgmllib |
|
|
|
from HTMLParser import HTMLParser |
|
|
|
from helpers import retrieve_url, download_file |
|
|
|
from helpers import download_file |
|
|
|
|
|
|
|
import urllib2 |
|
|
|
|
|
|
|
|
|
|
|
PREVIOUS_IDS = set() |
|
|
|
PREVIOUS_IDS = set() |
|
|
|
|
|
|
|
|
|
|
|
class piratebay(object): |
|
|
|
class piratebay(object): |
|
|
|
url = 'https://thepiratebay.se' |
|
|
|
url = 'http://thepiratebay.se' |
|
|
|
name = 'The Pirate Bay' |
|
|
|
name = 'The Pirate Bay' |
|
|
|
supported_categories = {'all': '0', 'movies': '200', 'music': '100', 'games': '400', 'software': '300'} |
|
|
|
supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'} |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
|
|
self.results = [] |
|
|
|
|
|
|
|
self.parser = self.SimpleSGMLParser(self.results, self.url) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_torrent(self, info): |
|
|
|
def download_torrent(self, info): |
|
|
|
print download_file(info) |
|
|
|
print(download_file(info)) |
|
|
|
|
|
|
|
|
|
|
|
class SimpleSGMLParser(sgmllib.SGMLParser): |
|
|
|
class MyHtmlParseWithBlackJack(HTMLParser): |
|
|
|
def __init__(self, results, url, *args): |
|
|
|
def __init__(self, results, url): |
|
|
|
sgmllib.SGMLParser.__init__(self) |
|
|
|
HTMLParser.__init__(self) |
|
|
|
self.td_counter = None |
|
|
|
|
|
|
|
self.current_item = None |
|
|
|
|
|
|
|
self.results = results |
|
|
|
|
|
|
|
self.url = url |
|
|
|
self.url = url |
|
|
|
self.code = 0 |
|
|
|
self.results = results |
|
|
|
self.in_name = None |
|
|
|
self.current_item = None |
|
|
|
|
|
|
|
self.size_found = False |
|
|
|
|
|
|
|
self.unit_found = False |
|
|
|
|
|
|
|
self.seed_found = False |
|
|
|
|
|
|
|
self.skip_td = False |
|
|
|
|
|
|
|
self.leech_found = False |
|
|
|
|
|
|
|
self.dispatcher = {'a' : self.handle_tag_a_ref, |
|
|
|
|
|
|
|
'font' : self.handle_tag_font_size, |
|
|
|
|
|
|
|
'td' : self.handle_tag_td_sl } |
|
|
|
|
|
|
|
|
|
|
|
def start_a(self, attr): |
|
|
|
def handle_tag_a_ref(self, attrs): |
|
|
|
params = dict(attr) |
|
|
|
params = dict(attrs) |
|
|
|
|
|
|
|
#1 |
|
|
|
if params['href'].startswith('/torrent/'): |
|
|
|
if params['href'].startswith('/torrent/'): |
|
|
|
|
|
|
|
get_id = params['href'].split('/')[2] |
|
|
|
|
|
|
|
if not get_id in PREVIOUS_IDS: |
|
|
|
self.current_item = {} |
|
|
|
self.current_item = {} |
|
|
|
self.td_counter = 0 |
|
|
|
|
|
|
|
self.current_item['desc_link'] = self.url + params['href'].strip() |
|
|
|
self.current_item['desc_link'] = self.url + params['href'].strip() |
|
|
|
self.in_name = True |
|
|
|
self.current_item['name'] = params['title'][12:].strip() |
|
|
|
self.current_item['id'] = params['href'].split('/')[2] |
|
|
|
self.current_item['id'] = get_id |
|
|
|
elif params['href'].startswith('magnet:'): |
|
|
|
#2 |
|
|
|
self.current_item['link']=params['href'].strip() |
|
|
|
elif (not self.current_item is None) and (params['href'].startswith('magnet:')): |
|
|
|
self.in_name = False |
|
|
|
self.current_item['link'] = params['href'].strip() |
|
|
|
|
|
|
|
|
|
|
|
def handle_data(self, data): |
|
|
|
def handle_tag_font_size(self, attrs): |
|
|
|
if self.td_counter == 0: |
|
|
|
if not self.current_item is None: |
|
|
|
if self.in_name: |
|
|
|
params = dict(attrs) |
|
|
|
if not self.current_item.has_key('name'): |
|
|
|
#3 |
|
|
|
self.current_item['name'] = '' |
|
|
|
if params['class'] == "detDesc": |
|
|
|
self.current_item['name']+= data.strip() |
|
|
|
self.size_found = True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_tag_td_sl(self, attrs): |
|
|
|
|
|
|
|
if not self.current_item is None: |
|
|
|
|
|
|
|
params = dict(attrs) |
|
|
|
|
|
|
|
if not self.current_item is None: |
|
|
|
|
|
|
|
if self.seed_found: |
|
|
|
|
|
|
|
#5 |
|
|
|
|
|
|
|
self.current_item['leech'] = '' |
|
|
|
|
|
|
|
self.leech_found = True |
|
|
|
|
|
|
|
self.seed_found = False |
|
|
|
else: |
|
|
|
else: |
|
|
|
#Parse size |
|
|
|
#4 |
|
|
|
if 'Size' in data: |
|
|
|
|
|
|
|
self.current_item['size'] = data[data.index("Size")+5:] |
|
|
|
|
|
|
|
self.current_item['size'] = self.current_item['size'][:self.current_item['size'].index(',')] |
|
|
|
|
|
|
|
elif self.td_counter == 1: |
|
|
|
|
|
|
|
if not self.current_item.has_key('seeds'): |
|
|
|
|
|
|
|
self.current_item['seeds'] = '' |
|
|
|
self.current_item['seeds'] = '' |
|
|
|
self.current_item['seeds']+= data.strip() |
|
|
|
self.seed_found = True |
|
|
|
elif self.td_counter == 2: |
|
|
|
|
|
|
|
if not self.current_item.has_key('leech'): |
|
|
|
def handle_starttag(self, tag, attrs): |
|
|
|
self.current_item['leech'] = '' |
|
|
|
if tag in self.dispatcher: |
|
|
|
self.current_item['leech']+= data.strip() |
|
|
|
self.dispatcher[tag](attrs) |
|
|
|
|
|
|
|
|
|
|
|
def start_td(self,attr): |
|
|
|
def handle_data(self, data): |
|
|
|
if isinstance(self.td_counter,int): |
|
|
|
if not self.current_item is None: |
|
|
|
self.td_counter += 1 |
|
|
|
if self.size_found: |
|
|
|
if self.td_counter > 3: |
|
|
|
#with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by'] |
|
|
|
self.td_counter = None |
|
|
|
temp = data.split() |
|
|
|
# Display item |
|
|
|
if 'Size' in temp: |
|
|
|
if self.current_item: |
|
|
|
sizeIn = temp.index('Size') |
|
|
|
if self.current_item['id'] in PREVIOUS_IDS: |
|
|
|
self.current_item['size'] = temp[sizeIn + 1] |
|
|
|
self.results = [] |
|
|
|
self.size_found = False |
|
|
|
self.reset() |
|
|
|
self.unit_found = True |
|
|
|
return |
|
|
|
elif self.unit_found: |
|
|
|
|
|
|
|
temp = data.split() |
|
|
|
|
|
|
|
self.current_item['size'] = ' '.join((self.current_item['size'], temp[0])) |
|
|
|
|
|
|
|
self.unit_found = False |
|
|
|
|
|
|
|
elif self.seed_found: |
|
|
|
|
|
|
|
self.current_item['seeds'] += data.rstrip() |
|
|
|
|
|
|
|
elif self.leech_found: |
|
|
|
|
|
|
|
self.current_item['leech'] += data.rstrip() |
|
|
|
self.current_item['engine_url'] = self.url |
|
|
|
self.current_item['engine_url'] = self.url |
|
|
|
if not self.current_item['seeds'].isdigit(): |
|
|
|
|
|
|
|
self.current_item['seeds'] = 0 |
|
|
|
|
|
|
|
if not self.current_item['leech'].isdigit(): |
|
|
|
|
|
|
|
self.current_item['leech'] = 0 |
|
|
|
|
|
|
|
prettyPrinter(self.current_item) |
|
|
|
prettyPrinter(self.current_item) |
|
|
|
PREVIOUS_IDS.add(self.current_item['id']) |
|
|
|
PREVIOUS_IDS.add(self.current_item['id']) |
|
|
|
self.results.append('a') |
|
|
|
self.results.append('a') |
|
|
|
|
|
|
|
self.current_item = None |
|
|
|
|
|
|
|
self.size_found = False |
|
|
|
|
|
|
|
self.unit_found = False |
|
|
|
|
|
|
|
self.seed_found = False |
|
|
|
|
|
|
|
self.leech_found = False |
|
|
|
|
|
|
|
|
|
|
|
def search(self, what, cat='all'): |
|
|
|
def search(self, what, cat='all'): |
|
|
|
ret = [] |
|
|
|
ret = [] |
|
|
|
i = 0 |
|
|
|
i = 0 |
|
|
|
order = 'se' |
|
|
|
while i < 11: |
|
|
|
while True and i<11: |
|
|
|
|
|
|
|
results = [] |
|
|
|
results = [] |
|
|
|
parser = self.SimpleSGMLParser(results, self.url) |
|
|
|
parser = self.MyHtmlParseWithBlackJack(results, self.url) |
|
|
|
dat = retrieve_url(self.url+'/search/%s/%d/7/%s' % (what, i, self.supported_categories[cat])) |
|
|
|
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat]) |
|
|
|
parser.feed(dat) |
|
|
|
dat = urllib2.urlopen(query) |
|
|
|
|
|
|
|
parser.feed(dat.read().decode('utf-8')) |
|
|
|
parser.close() |
|
|
|
parser.close() |
|
|
|
if len(results) <= 0: |
|
|
|
if len(results) <= 0: |
|
|
|
break |
|
|
|
break |
|
|
|