Browse Source

Pirate bay search engine update

adaptive-webui-19844
DoumanAsh 10 years ago
parent
commit
a62e30ea88
  1. 183
      src/searchengine/nova/engines/piratebay.py
  2. 2
      src/searchengine/nova/engines/versions.txt
  3. 183
      src/searchengine/nova3/engines/piratebay.py
  4. 2
      src/searchengine/nova3/engines/versions.txt

183
src/searchengine/nova/engines/piratebay.py

@ -1,6 +1,7 @@
#VERSION: 1.53 #VERSION: 2.00
#AUTHORS: Fabien Devaux (fab@gnux.info) #AUTHORS: Fabien Devaux (fab@gnux.info)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Arthur (custparasite@gmx.se)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: # modification, are permitted provided that the following conditions are met:
@ -27,94 +28,112 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
import sgmllib from HTMLParser import HTMLParser
from helpers import retrieve_url, download_file from helpers import download_file
import urllib2
PREVIOUS_IDS = set() PREVIOUS_IDS = set()
class piratebay(object): class piratebay(object):
url = 'https://thepiratebay.se' url = 'http://thepiratebay.se'
name = 'The Pirate Bay' name = 'The Pirate Bay'
supported_categories = {'all': '0', 'movies': '200', 'music': '100', 'games': '400', 'software': '300'} supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
def __init__(self): def download_torrent(self, info):
self.results = [] print(download_file(info))
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info): class MyHtmlParseWithBlackJack(HTMLParser):
print download_file(info) def __init__(self, results, url):
HTMLParser.__init__(self)
self.url = url
self.results = results
self.current_item = None
self.size_found = False
self.unit_found = False
self.seed_found = False
self.skip_td = False
self.leech_found = False
self.dispatcher = {'a' : self.handle_tag_a_ref,
'font' : self.handle_tag_font_size,
'td' : self.handle_tag_td_sl }
class SimpleSGMLParser(sgmllib.SGMLParser): def handle_tag_a_ref(self, attrs):
def __init__(self, results, url, *args): params = dict(attrs)
sgmllib.SGMLParser.__init__(self) #1
self.td_counter = None if params['href'].startswith('/torrent/'):
self.current_item = None get_id = params['href'].split('/')[2]
self.results = results if not get_id in PREVIOUS_IDS:
self.url = url self.current_item = {}
self.code = 0 self.current_item['desc_link'] = self.url + params['href'].strip()
self.in_name = None self.current_item['name'] = params['title'][12:].strip()
self.current_item['id'] = get_id
#2
elif (not self.current_item is None) and (params['href'].startswith('magnet:')):
self.current_item['link'] = params['href'].strip()
def start_a(self, attr): def handle_tag_font_size(self, attrs):
params = dict(attr) if not self.current_item is None:
if params['href'].startswith('/torrent/'): params = dict(attrs)
self.current_item = {} #3
self.td_counter = 0 if params['class'] == "detDesc":
self.current_item['desc_link'] = self.url + params['href'].strip() self.size_found = True
self.in_name = True
self.current_item['id'] = params['href'].split('/')[2]
elif params['href'].startswith('magnet:'):
self.current_item['link']=params['href'].strip()
self.in_name = False
def handle_data(self, data): def handle_tag_td_sl(self, attrs):
if self.td_counter == 0: if not self.current_item is None:
if self.in_name: params = dict(attrs)
if not self.current_item.has_key('name'): if not self.current_item is None:
self.current_item['name'] = '' if self.seed_found:
self.current_item['name']+= data.strip() #5
else: self.current_item['leech'] = ''
#Parse size self.leech_found = True
if 'Size' in data: self.seed_found = False
self.current_item['size'] = data[data.index("Size")+5:] else:
self.current_item['size'] = self.current_item['size'][:self.current_item['size'].index(',')] #4
elif self.td_counter == 1: self.current_item['seeds'] = ''
if not self.current_item.has_key('seeds'): self.seed_found = True
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 2:
if not self.current_item.has_key('leech'):
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr): def handle_starttag(self, tag, attrs):
if isinstance(self.td_counter,int): if tag in self.dispatcher:
self.td_counter += 1 self.dispatcher[tag](attrs)
if self.td_counter > 3:
self.td_counter = None def handle_data(self, data):
# Display item if not self.current_item is None:
if self.current_item: if self.size_found:
if self.current_item['id'] in PREVIOUS_IDS: #with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
self.results = [] temp = data.split()
self.reset() if 'Size' in temp:
return sizeIn = temp.index('Size')
self.current_item['engine_url'] = self.url self.current_item['size'] = temp[sizeIn + 1]
if not self.current_item['seeds'].isdigit(): self.size_found = False
self.current_item['seeds'] = 0 self.unit_found = True
if not self.current_item['leech'].isdigit(): elif self.unit_found:
self.current_item['leech'] = 0 temp = data.split()
prettyPrinter(self.current_item) self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
PREVIOUS_IDS.add(self.current_item['id']) self.unit_found = False
self.results.append('a') elif self.seed_found:
def search(self, what, cat='all'): self.current_item['seeds'] += data.rstrip()
ret = [] elif self.leech_found:
i = 0 self.current_item['leech'] += data.rstrip()
order = 'se' self.current_item['engine_url'] = self.url
while True and i<11: prettyPrinter(self.current_item)
results = [] PREVIOUS_IDS.add(self.current_item['id'])
parser = self.SimpleSGMLParser(results, self.url) self.results.append('a')
dat = retrieve_url(self.url+'/search/%s/%d/7/%s' % (what, i, self.supported_categories[cat])) self.current_item = None
parser.feed(dat) self.size_found = False
parser.close() self.unit_found = False
if len(results) <= 0: self.seed_found = False
break self.leech_found = False
i += 1
def search(self, what, cat='all'):
ret = []
i = 0
while i < 11:
results = []
parser = self.MyHtmlParseWithBlackJack(results, self.url)
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat])
dat = urllib2.urlopen(query)
parser.feed(dat.read().decode('utf-8'))
parser.close()
if len(results) <= 0:
break
i += 1

2
src/searchengine/nova/engines/versions.txt

@ -1,6 +1,6 @@
torrentreactor: 1.33 torrentreactor: 1.33
mininova: 1.50 mininova: 1.50
piratebay: 1.53 piratebay: 2.00
vertor: 1.3 vertor: 1.3
extratorrent: 1.2 extratorrent: 1.2
kickasstorrents: 1.24 kickasstorrents: 1.24

183
src/searchengine/nova3/engines/piratebay.py

@ -1,6 +1,7 @@
#VERSION: 1.53 #VERSION: 2.00
#AUTHORS: Fabien Devaux (fab@gnux.info) #AUTHORS: Fabien Devaux (fab@gnux.info)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Arthur (custparasite@gmx.se)
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: # modification, are permitted provided that the following conditions are met:
@ -27,94 +28,112 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
import sgmllib3 from html.parser import HTMLParser
from helpers import retrieve_url, download_file from helpers import download_file
import urllib.request
PREVIOUS_IDS = set() PREVIOUS_IDS = set()
class piratebay(object): class piratebay(object):
url = 'https://thepiratebay.se' url = 'http://thepiratebay.se'
name = 'The Pirate Bay' name = 'The Pirate Bay'
supported_categories = {'all': '0', 'movies': '200', 'music': '100', 'games': '400', 'software': '300'} supported_categories = {'all': '0', 'music': '100', 'movies': '200', 'games': '400', 'software': '300'}
def __init__(self): def download_torrent(self, info):
self.results = [] print(download_file(info))
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info): class MyHtmlParseWithBlackJack(HTMLParser):
print(download_file(info)) def __init__(self, results, url):
super().__init__()
self.url = url
self.results = results
self.current_item = None
self.size_found = False
self.unit_found = False
self.seed_found = False
self.skip_td = False
self.leech_found = False
self.dispatcher = {'a' : self.handle_tag_a_ref,
'font' : self.handle_tag_font_size,
'td' : self.handle_tag_td_sl }
class SimpleSGMLParser(sgmllib3.SGMLParser): def handle_tag_a_ref(self, attrs):
def __init__(self, results, url, *args): params = dict(attrs)
sgmllib3.SGMLParser.__init__(self) #1
self.td_counter = None if params['href'].startswith('/torrent/'):
self.current_item = None get_id = params['href'].split('/')[2]
self.results = results if not get_id in PREVIOUS_IDS:
self.url = url self.current_item = {}
self.code = 0 self.current_item['desc_link'] = self.url + params['href'].strip()
self.in_name = None self.current_item['name'] = params['title'][12:].strip()
self.current_item['id'] = get_id
#2
elif (not self.current_item is None) and (params['href'].startswith('magnet:')):
self.current_item['link'] = params['href'].strip()
def start_a(self, attr): def handle_tag_font_size(self, attrs):
params = dict(attr) if not self.current_item is None:
if params['href'].startswith('/torrent/'): params = dict(attrs)
self.current_item = {} #3
self.td_counter = 0 if params['class'] == "detDesc":
self.current_item['desc_link'] = self.url + params['href'].strip() self.size_found = True
self.in_name = True
self.current_item['id'] = params['href'].split('/')[2]
elif params['href'].startswith('magnet:'):
self.current_item['link']=params['href'].strip()
self.in_name = False
def handle_data(self, data): def handle_tag_td_sl(self, attrs):
if self.td_counter == 0: if not self.current_item is None:
if self.in_name: params = dict(attrs)
if 'name' not in self.current_item: if not self.current_item is None:
self.current_item['name'] = '' if self.seed_found:
self.current_item['name']+= data.strip() #5
else: self.current_item['leech'] = ''
#Parse size self.leech_found = True
if 'Size' in data: self.seed_found = False
self.current_item['size'] = data[data.index("Size")+5:] else:
self.current_item['size'] = self.current_item['size'][:self.current_item['size'].index(',')] #4
elif self.td_counter == 1: self.current_item['seeds'] = ''
if 'seeds' not in self.current_item: self.seed_found = True
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 2:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr): def handle_starttag(self, tag, attrs):
if isinstance(self.td_counter,int): if tag in self.dispatcher:
self.td_counter += 1 self.dispatcher[tag](attrs)
if self.td_counter > 3:
self.td_counter = None def handle_data(self, data):
# Display item if not self.current_item is None:
if self.current_item: if self.size_found:
if self.current_item['id'] in PREVIOUS_IDS: #with utf-8 you're going to have something like that: ['Uploaded', '10-02'], ['15:31,', 'Size', '240.34'], ['MiB,', 'ULed', 'by']
self.results = [] temp = data.split()
self.reset() if 'Size' in temp:
return sizeIn = temp.index('Size')
self.current_item['engine_url'] = self.url self.current_item['size'] = temp[sizeIn + 1]
if not self.current_item['seeds'].isdigit(): self.size_found = False
self.current_item['seeds'] = 0 self.unit_found = True
if not self.current_item['leech'].isdigit(): elif self.unit_found:
self.current_item['leech'] = 0 temp = data.split()
prettyPrinter(self.current_item) self.current_item['size'] = ' '.join((self.current_item['size'], temp[0]))
PREVIOUS_IDS.add(self.current_item['id']) self.unit_found = False
self.results.append('a') elif self.seed_found:
def search(self, what, cat='all'): self.current_item['seeds'] += data.rstrip()
ret = [] elif self.leech_found:
i = 0 self.current_item['leech'] += data.rstrip()
order = 'se' self.current_item['engine_url'] = self.url
while True and i<11: prettyPrinter(self.current_item)
results = [] PREVIOUS_IDS.add(self.current_item['id'])
parser = self.SimpleSGMLParser(results, self.url) self.results.append('a')
dat = retrieve_url(self.url+'/search/%s/%d/7/%s' % (what, i, self.supported_categories[cat])) self.current_item = None
parser.feed(dat) self.size_found = False
parser.close() self.unit_found = False
if len(results) <= 0: self.seed_found = False
break self.leech_found = False
i += 1
def search(self, what, cat='all'):
ret = []
i = 0
while i < 11:
results = []
parser = self.MyHtmlParseWithBlackJack(results, self.url)
query = '%s/search/%s/%d/99/%s' % (self.url, what, i, self.supported_categories[cat])
dat = urllib.request.urlopen(query)
parser.feed(dat.read().decode('utf-8'))
parser.close()
if len(results) <= 0:
break
i += 1

2
src/searchengine/nova3/engines/versions.txt

@ -1,6 +1,6 @@
torrentreactor: 1.33 torrentreactor: 1.33
mininova: 1.50 mininova: 1.50
piratebay: 1.53 piratebay: 2.00
vertor: 1.3 vertor: 1.3
extratorrent: 1.2 extratorrent: 1.2
kickasstorrents: 1.24 kickasstorrents: 1.24

Loading…
Cancel
Save