Browse Source

Merge pull request #2550 from DoumanAsh/multiprocessor_search

[search engine] Replace threading with multiprocessing
adaptive-webui-19844
sledgehammer999 10 years ago
parent
commit
3d40834c57
  1. 217
      src/searchengine/nova/engines/extratorrent.py
  2. 6
      src/searchengine/nova/engines/legittorrents.py
  3. 201
      src/searchengine/nova/engines/mininova.py
  4. BIN
      src/searchengine/nova/engines/torrentreactor.png
  5. 150
      src/searchengine/nova/engines/torrentreactor.py
  6. 4
      src/searchengine/nova/engines/torrentz.py
  7. 11
      src/searchengine/nova/engines/versions.txt
  8. 246
      src/searchengine/nova/nova2.py
  9. 44
      src/searchengine/nova/nova2dl.py
  10. 67
      src/searchengine/nova/novaprinter.py
  11. 652
      src/searchengine/nova/socks.py
  12. 217
      src/searchengine/nova3/engines/extratorrent.py
  13. 6
      src/searchengine/nova3/engines/legittorrents.py
  14. 201
      src/searchengine/nova3/engines/mininova.py
  15. BIN
      src/searchengine/nova3/engines/torrentreactor.png
  16. 149
      src/searchengine/nova3/engines/torrentreactor.py
  17. 2
      src/searchengine/nova3/engines/torrentz.py
  18. 11
      src/searchengine/nova3/engines/versions.txt
  19. 240
      src/searchengine/nova3/nova2.py
  20. 44
      src/searchengine/nova3/nova2dl.py
  21. 66
      src/searchengine/nova3/novaprinter.py
  22. 652
      src/searchengine/nova3/socks.py

217
src/searchengine/nova/engines/extratorrent.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.2
#VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@ -25,92 +25,139 @@ @@ -25,92 +25,139 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
#qBt
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
import sgmllib
import re
from helpers import download_file
class extratorrent(object):
url = 'http://extratorrent.cc'
name = 'extratorrent'
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info):
print download_file(info)
class SimpleSGMLParser(sgmllib.SGMLParser):
def __init__(self, results, url, *args):
sgmllib.SGMLParser.__init__(self)
self.url = url
self.td_counter = None
self.current_item = None
self.start_name = False
self.results = results
def start_a(self, attr):
params = dict(attr)
#print params
if params.has_key('href') and params['href'].startswith("/torrent_download/"):
self.current_item = {}
self.td_counter = 0
self.start_name = False
torrent_id = '/'.join(params['href'].split('/')[2:])
self.current_item['link']=self.url+'/download/'+torrent_id
elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
self.current_item['desc_link'] = self.url + params['href'].strip()
self.start_name = True
def handle_data(self, data):
if self.td_counter == 2:
if not self.current_item.has_key('name') and self.start_name:
self.current_item['name'] = data.strip()
elif self.td_counter == 3:
if not self.current_item.has_key('size'):
self.current_item['size'] = ''
self.current_item['size']+= data.replace(" ", " ").strip()
elif self.td_counter == 4:
if not self.current_item.has_key('seeds'):
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 5:
if not self.current_item.has_key('leech'):
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 5:
self.td_counter = None
# Display item
""" Search engine class """
url = 'http://extratorrent.cc'
name = 'ExtraTorrent'
supported_categories = {'all' : '0',
'movies' : '4',
'tv' : '8',
'music' : '5',
'games' : '3',
'anime' : '1',
'software' : '7',
'books' : '2',
'pictures' : '6'}
def download_torrent(self, info):
""" Downloader """
print(download_file(info))
class MyHtmlParseWithBlackJack(HTMLParser):
""" Parser class """
def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.url = url
self.list_searches = list_searches
self.current_item = None
self.cur_item_name = None
self.pending_size = False
self.next_queries = True
self.pending_next_queries = False
self.next_queries_set = set()
def handle_starttag(self, tag, attrs):
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.results.append('a')
def search(self, what, cat='all'):
ret = []
i = 1
while True and i<11:
results = []
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<table class="tl"><thead>.*')
for match in results_re.finditer(dat):
res_tab = match.group(0)
parser.feed(res_tab)
if tag == "a":
params = dict(attrs)
link = params['href']
if not link.startswith("/torrent"):
return
if link[8] == "/":
#description
self.current_item["desc_link"] = "".join((self.url, link))
#remove view at the beginning
self.current_item["name"] = params["title"][5:].replace("&amp;", "&")
self.pending_size = True
elif link[8] == "_":
#download link
link = link.replace("torrent_", "", 1)
self.current_item["link"] = "".join((self.url, link))
elif tag == "td":
if self.pending_size:
self.cur_item_name = "size"
self.current_item["size"] = ""
self.pending_size = False
for attr in attrs:
if attr[0] == "class":
if attr[1][0] == "s":
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif attr[1][0] == "l":
self.cur_item_name = "leech"
self.current_item["leech"] = ""
break
elif tag == "tr":
for attr in attrs:
if attr[0] == "class" and attr[1].startswith("tl"):
self.current_item = dict()
self.current_item["engine_url"] = self.url
break
elif self.pending_next_queries:
if tag == "a":
params = dict(attrs)
if params["title"] in self.next_queries_set:
return
self.list_searches.append(params['href'])
self.next_queries_set.add(params["title"])
if params["title"] == "10":
self.pending_next_queries = False
else:
self.pending_next_queries = False
elif self.next_queries:
if tag == "b" and ("class", "pager_no_link") in attrs:
self.next_queries = False
self.pending_next_queries = True
def handle_data(self, data):
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
#Due to utf-8 we need to handle data two times if there is space
if not self.cur_item_name == "size":
self.cur_item_name = None
def handle_endtag(self, tag):
if self.current_item:
if tag == "tr":
prettyPrinter(self.current_item)
self.current_item = None
def search(self, what, cat="all"):
""" Performs search """
connection = http("extratorrent.cc")
query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
break
if len(results) <= 0:
break
i += 1
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

6
src/searchengine/nova/engines/legittorrents.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.02
#VERSION: 1.03
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object): @@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info):
print download_file(info)

201
src/searchengine/nova/engines/mininova.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.51
#VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,123 @@ @@ -26,90 +26,123 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from HTMLParser import HTMLParser
from httplib import HTTPConnection as http
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
import sgmllib
import re
from helpers import download_file
class mininova(object):
# Mandatory properties
url = 'http://www.mininova.org'
name = 'Mininova'
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info):
print download_file(info)
class SimpleSGMLParser(sgmllib.SGMLParser):
def __init__(self, results, url, *args):
sgmllib.SGMLParser.__init__(self)
self.url = url
self.td_counter = None
self.current_item = None
self.results = results
def start_a(self, attr):
params = dict(attr)
#print params
if params.has_key('href'):
if params['href'].startswith("/get/"):
self.current_item = {}
self.td_counter = 0
self.current_item['link']=self.url+params['href'].strip()
elif params['href'].startswith("/tor/") and self.current_item is not None:
self.current_item['desc_link']=self.url+params['href'].strip()
def handle_data(self, data):
if self.td_counter == 0:
if not self.current_item.has_key('name'):
self.current_item['name'] = ''
self.current_item['name']+= data
elif self.td_counter == 1:
if not self.current_item.has_key('size'):
self.current_item['size'] = ''
self.current_item['size']+= data.strip()
elif self.td_counter == 2:
if not self.current_item.has_key('seeds'):
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if not self.current_item.has_key('leech'):
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 4:
self.td_counter = None
# Display item
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.results.append('a')
def search(self, what, cat='all'):
ret = []
i = 1
while True and i<11:
results = []
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<h1>Search results for.*')
for match in results_re.finditer(dat):
res_tab = match.group(0)
parser.feed(res_tab)
""" Search engine class """
url = 'http://www.mininova.org'
name = 'Mininova'
supported_categories = {'all' : '0',
'movies' : '4',
'tv' : '8',
'music' : '5',
'games' : '3',
'anime' : '1',
'software' : '7',
'pictures' : '6',
'books' : '2'}
def download_torrent(self, info):
print(download_file(info))
class MyHtmlParseWithBlackJack(HTMLParser):
""" Parser class """
def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.list_searches = list_searches
self.url = url
self.table_results = False
self.current_item = None
self.cur_item_name = None
self.next_queries = True
def handle_starttag_tr(self, _):
""" Handler of tr start tag """
self.current_item = dict()
def handle_starttag_a(self, attrs):
""" Handler of a start tag """
params = dict(attrs)
link = params["href"]
if link.startswith("/tor/"):
#description
self.current_item["desc_link"] = "".join((self.url, link))
#get download link from description by id
self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
self.cur_item_name = "name"
self.current_item["name"] = ""
elif self.next_queries and link.startswith("/search"):
if params["title"].startswith("Page"):
self.list_searches.append(link)
def handle_starttag_td(self, attrs):
""" Handler of td start tag """
if ("align", "right") in attrs:
if not "size" in self.current_item:
self.cur_item_name = "size"
self.current_item["size"] = ""
def handle_starttag_span(self, attrs):
""" Handler of span start tag """
if ("class", "g") in attrs:
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif ("class", "b") in attrs:
self.cur_item_name = "leech"
self.current_item["leech"] = ""
def handle_starttag(self, tag, attrs):
""" Parser's start tag handler """
if self.table_results:
dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
if dispatcher:
dispatcher(attrs)
elif tag == "table":
self.table_results = ("class", "maintable") in attrs
def handle_endtag(self, tag):
""" Parser's end tag handler """
if tag == "tr" and self.current_item:
self.current_item["engine_url"] = self.url
prettyPrinter(self.current_item)
self.current_item = None
elif self.cur_item_name:
if tag == "a" or tag == "td":
self.cur_item_name = None
def handle_data(self, data):
""" Parser's data handler """
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
def search(self, what, cat="all"):
""" Performs search """
connection = http("www.mininova.org")
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
break
if len(results) <= 0:
break
i += 1
parser.next_queries = False
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

BIN
src/searchengine/nova/engines/torrentreactor.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 252 B

After

Width:  |  Height:  |  Size: 951 B

150
src/searchengine/nova/engines/torrentreactor.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.33
#VERSION: 1.36
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com)
@ -28,92 +28,84 @@ @@ -28,92 +28,84 @@
# POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
from urllib2 import HTTPError
from HTMLParser import HTMLParser
from helpers import download_file, retrieve_url
import urllib
import re
from HTMLParser import HTMLParser
from re import compile as re_compile
class torrentreactor(object):
url = 'http://www.torrentreactor.net'
name = 'TorrentReactor.Net'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def download_torrent(self, info):
print download_file(info)
class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args):
HTMLParser.__init__(self)
self.td_counter = None
self.current_item = None
self.results = results
self.id = None
self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
url = 'http://www.torrentreactor.net'
name = 'TorrentReactor'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def handle_starttag(self, tag, attrs):
if tag in self.dispatcher:
self.dispatcher[tag](attrs)
def download_torrent(self, info):
print(download_file(info))
def start_a(self, attr):
params = dict(attr)
if re.match("/torrents/\d+.*", params['href']):
self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']:
self.td_counter = 0
self.current_item['link'] = params['href'].strip()
self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args):
HTMLParser.__init__(self)
self.td_counter = None
self.current_item = None
self.results = results
self.id = None
self.url = url
self.torrents_matcher = re_compile("/torrents/\d+.*")
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_data(self, data):
if self.td_counter == 1:
if not self.current_item.has_key('size'):
self.current_item['size'] = ''
self.current_item['size']+= data.strip()
elif self.td_counter == 2:
if not self.current_item.has_key('seeds'):
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if not self.current_item.has_key('leech'):
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def handle_starttag(self, tag, attrs):
if tag in self.dispatcher:
self.dispatcher[tag](attrs)
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 3:
self.td_counter = None
# add item to results
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.has_results = True
self.results.append('a')
def start_a(self, attr):
params = dict(attr)
if self.torrents_matcher.match(params['href']):
self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']:
self.td_counter = 0
self.current_item['link'] = params['href'].strip()
self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1])
def __init__(self):
self.results = []
self.parser = self.SimpleHTMLParser(self.results, self.url)
def handle_data(self, data):
if self.td_counter == 1:
if 'size' not in self.current_item:
self.current_item['size'] = ''
self.current_item['size']+= data.strip()
elif self.td_counter == 2:
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def search(self, what, cat='all'):
i = 0
dat = ''
while True and i<11:
results = []
parser = self.SimpleHTMLParser(results, self.url)
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 3:
self.td_counter = None
# add item to results
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.has_results = True
self.results.append('a')
try:
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
except HTTPError:
break
def search(self, what, cat='all'):
i = 0
dat = ''
parser.feed(dat)
parser.close()
if len(results) <= 0:
break
i += 1
while i < 11:
results = []
parser = self.SimpleHTMLParser(results, self.url)
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
parser.feed(dat)
parser.close()
if len(results) <= 0:
break
i += 1

4
src/searchengine/nova/engines/torrentz.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 2.13
#VERSION: 2.14
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
# Redistribution and use in source and binary forms, with or without
@ -105,7 +105,7 @@ class torrentz(object): @@ -105,7 +105,7 @@ class torrentz(object):
while i < 6:
results_list = []
# "what" is already urlencoded
html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i))
html = retrieve_url('%s/any?f=%s&p=%d' % (self.url, what, i))
parser = self.MyHtmlParser(results_list, self.url, trackers)
parser.feed(html)
parser.close()

11
src/searchengine/nova/engines/versions.txt

@ -1,8 +1,9 @@ @@ -1,8 +1,9 @@
torrentreactor: 1.33
mininova: 1.51
piratebay: 2.11
extratorrent: 1.2
torrentreactor: 1.36
mininova: 2.00
piratebay: 2.11
extratorrent: 2.0
kickasstorrents: 1.26
btdigg: 1.24
legittorrents: 1.02
torrentz: 2.13
torrentz: 2.14
legittorrents: 1.03

246
src/searchengine/nova/nova2.py

@ -26,7 +26,7 @@ @@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.32
#VERSION: 1.40
# Author:
# Fabien Devaux <fab AT gnux DOT info>
@ -37,16 +37,15 @@ @@ -37,16 +37,15 @@
#
# Licence: BSD
import sys
import threading
import os
import glob
import urllib
import fix_encoding
from os import path
from glob import glob
from sys import argv
from multiprocessing import Pool, cpu_count
from fix_encoding import fix_encoding
THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################
# Every engine should have a "search" method taking
@ -56,108 +55,129 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic @@ -56,108 +55,129 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar
################################################################################
supported_engines = []
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
for engine in engines:
e = engine.split(os.sep)[-1][:-3]
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec "from engines.%s import %s"%(e,e)
supported_engines.append(e)
except:
pass
def engineToXml(short_name):
xml = "<%s>\n"%short_name
exec "engine = %s()"%short_name
xml += "<name>%s</name>\n"%engine.name
xml += "<url>%s</url>\n"%engine.url
xml += "<categories>"
if hasattr(engine, 'supported_categories'):
supported_categories = engine.supported_categories.keys()
supported_categories.remove('all')
xml += " ".join(supported_categories)
xml += "</categories>\n"
xml += "</%s>\n"%short_name
return xml
def displayCapabilities():
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "<capabilities>"
for short_name in supported_engines:
xml += engineToXml(short_name)
xml += "</capabilities>"
print xml
class EngineLauncher(threading.Thread):
def __init__(self, engine, what, cat='all'):
threading.Thread.__init__(self)
self.engine = engine
self.what = what
self.cat = cat
def run(self):
if hasattr(self.engine, 'supported_categories'):
if self.cat == 'all' or self.cat in self.engine.supported_categories.keys():
self.engine.search(self.what, self.cat)
elif self.cat == 'all':
self.engine.search(self.what)
if __name__ == '__main__':
# Make sure we enforce utf-8 encoding
fix_encoding.fix_encoding()
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2:
if sys.argv[1] == "--capabilities":
displayCapabilities()
sys.exit(0)
else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
if 'all' in engines_list:
engines_list = supported_engines
cat = sys.argv[2].lower()
if cat not in CATEGORIES:
raise SystemExit('Invalid category!')
what = urllib.quote(' '.join(sys.argv[3:]))
threads = []
for engine in engines_list:
try:
if THREADED:
exec "l = EngineLauncher(%s(), what, cat)"%engine
threads.append(l)
l.start()
else:
exec "e = %s()"%engine
if hasattr(engine, 'supported_categories'):
if cat == 'all' or cat in e.supported_categories.keys():
e.search(what, cat)
elif self.cat == 'all':
e.search(what)
engine().search(what, cat)
except:
pass
if THREADED:
for t in threads:
t.join()
def initialize_engines():
""" Import available engines
Return list of available engines
"""
supported_engines = []
engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
for engine in engines:
engi = path.basename(engine).split('.')[0].strip()
if len(engi) == 0 or engi.startswith('_'):
continue
try:
#import engines.[engine]
engine_module = __import__(".".join(("engines", engi)))
#get low-level module
engine_module = getattr(engine_module, engi)
#bind class name
globals()[engi] = getattr(engine_module, engi)
supported_engines.append(engi)
except:
pass
return supported_engines
def engines_to_xml(supported_engines):
""" Generates xml for supported engines """
tab = " " * 4
for short_name in supported_engines:
search_engine = globals()[short_name]()
supported_categories = ""
if hasattr(search_engine, "supported_categories"):
supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
if key is not "all"))
yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
def displayCapabilities(supported_engines):
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "".join(("<capabilities>\n",
"".join(engines_to_xml(supported_engines)),
"</capabilities>"))
print(xml)
def run_search(engine_list):
""" Run search in engine
@param engine_list List with engine, query and category
@retval False if any exceptions occured
@retval True otherwise
"""
engine, what, cat = engine_list
try:
engine = engine()
#avoid exceptions due to invalid category
if hasattr(engine, 'supported_categories'):
cat = cat if cat in engine.supported_categories else "all"
engine.search(what, cat)
else:
engine.search(what)
return True
except:
return False
def main(args):
fix_encoding()
supported_engines = initialize_engines()
if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
#get only unique engines with set
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list:
engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.quote(' '.join(args[2:]))
if THREADED:
#child process spawning is controlled min(number of searches, number of cpu)
pool = Pool(min(len(engines_list), cpu_count()))
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else:
map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
if __name__ == "__main__":
main(argv[1:])

44
src/searchengine/nova/nova2dl.py

@ -25,7 +25,7 @@ @@ -25,7 +25,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.10
#VERSION: 1.20
# Author:
# Christophe DUMEZ (chris@qbittorrent.org)
@ -39,26 +39,26 @@ supported_engines = dict() @@ -39,26 +39,26 @@ supported_engines = dict()
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
for engine in engines:
e = engine.split(os.sep)[-1][:-3]
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec "from engines.%s import %s"%(e,e)
exec "engine_url = %s.url"%e
supported_engines[engine_url] = e
except:
pass
e = engine.split(os.sep)[-1][:-3]
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec("from engines.%s import %s"%(e,e))
exec("engine_url = %s.url"%e)
supported_engines[engine_url] = e
except:
pass
if __name__ == '__main__':
if len(sys.argv) < 3:
raise SystemExit('./nova2dl.py engine_url download_parameter')
engine_url = sys.argv[1].strip()
download_param = sys.argv[2].strip()
if engine_url not in supported_engines.keys():
raise SystemExit('./nova2dl.py: this engine_url was not recognized')
exec "engine = %s()"%supported_engines[engine_url]
if hasattr(engine, 'download_torrent'):
engine.download_torrent(download_param)
else:
print download_file(download_param)
sys.exit(0)
if len(sys.argv) < 3:
raise SystemExit('./nova2dl.py engine_url download_parameter')
engine_url = sys.argv[1].strip()
download_param = sys.argv[2].strip()
if engine_url not in list(supported_engines.keys()):
raise SystemExit('./nova2dl.py: this engine_url was not recognized')
exec("engine = %s()"%supported_engines[engine_url])
if hasattr(engine, 'download_torrent'):
engine.download_torrent(download_param)
else:
print(download_file(download_param))
sys.exit(0)

67
src/searchengine/nova/novaprinter.py

@ -25,45 +25,44 @@ @@ -25,45 +25,44 @@
# POSSIBILITY OF SUCH DAMAGE.
import sys, codecs
from io import open
# Force UTF-8 printing
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
def prettyPrinter(dictionary):
# Convert everything to unicode for safe printing
for key,value in dictionary.items():
if isinstance(dictionary[key], str):
dictionary[key] = unicode(dictionary[key], 'utf-8')
dictionary['size'] = anySizeToBytes(dictionary['size'])
if dictionary.has_key('desc_link'):
print u"%s|%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link'])
else:
print u"%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
dictionary['size'] = anySizeToBytes(dictionary['size'])
outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
if 'desc_link' in dictionary:
outtext = "|".join((outtext, dictionary["desc_link"]))
with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout:
utf8_stdout.write(unicode("".join((outtext, "\n"))))
def anySizeToBytes(size_string):
"""
Convert a string like '1 KB' to '1024' (bytes)
"""
# separate integer from unit
try:
size, unit = size_string.split()
except:
try:
size = size_string.strip()
unit = ''.join([c for c in size if c.isalpha()])
if len(unit) > 0:
size = size[:-len(unit)]
except:
return -1
if len(size) == 0:
return -1
size = float(size)
if len(unit) == 0:
return int(size)
short_unit = unit.upper()[0]
"""
Convert a string like '1 KB' to '1024' (bytes)
"""
# separate integer from unit
try:
size, unit = size_string.split()
except:
try:
size = size_string.strip()
unit = ''.join([c for c in size if c.isalpha()])
if len(unit) > 0:
size = size[:-len(unit)]
except:
return -1
if len(size) == 0:
return -1
size = float(size)
if len(unit) == 0:
return int(size)
short_unit = unit.upper()[0]
# convert
units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
if units_dict.has_key( short_unit ):
size = size * 2**units_dict[short_unit]
return int(size)
# convert
units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
if units_dict.has_key(short_unit):
size = size * 2**units_dict[short_unit]
return int(size)

652
src/searchengine/nova/socks.py

@ -42,350 +42,350 @@ _defaultproxy = None @@ -42,350 +42,350 @@ _defaultproxy = None
_orgsocket = socket.socket
class ProxyError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class GeneralProxyError(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Socks5AuthError(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Socks5Error(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Socks4Error(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class HTTPError(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
_generalerrors = ("success",
"invalid data",
"not connected",
"not available",
"bad proxy type",
"bad input")
"invalid data",
"not connected",
"not available",
"bad proxy type",
"bad input")
_socks5errors = ("succeeded",
"general SOCKS server failure",
"connection not allowed by ruleset",
"Network unreachable",
"Host unreachable",
"Connection refused",
"TTL expired",
"Command not supported",
"Address type not supported",
"Unknown error")
"general SOCKS server failure",
"connection not allowed by ruleset",
"Network unreachable",
"Host unreachable",
"Connection refused",
"TTL expired",
"Command not supported",
"Address type not supported",
"Unknown error")
_socks5autherrors = ("succeeded",
"authentication is required",
"all offered authentication methods were rejected",
"unknown username or invalid password",
"unknown error")
"authentication is required",
"all offered authentication methods were rejected",
"unknown username or invalid password",
"unknown error")
_socks4errors = ("request granted",
"request rejected or failed",
"request rejected because SOCKS server cannot connect to identd on the client",
"request rejected because the client program and identd report different user-ids",
"unknown error")
"request rejected or failed",
"request rejected because SOCKS server cannot connect to identd on the client",
"request rejected because the client program and identd report different user-ids",
"unknown error")
def setdefaultproxy(proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
"""setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets a default proxy which all further socksocket objects will use,
unless explicitly changed.
"""
global _defaultproxy
_defaultproxy = (proxytype,addr,port,rdns,username,password)
"""setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets a default proxy which all further socksocket objects will use,
unless explicitly changed.
"""
global _defaultproxy
_defaultproxy = (proxytype,addr,port,rdns,username,password)
class socksocket(socket.socket):
"""socksocket([family[, type[, proto]]]) -> socket object
Open a SOCKS enabled socket. The parameters are the same as
those of the standard socket init. In order for SOCKS to work,
you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
"""
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
_orgsocket.__init__(self,family,type,proto,_sock)
if _defaultproxy != None:
self.__proxy = _defaultproxy
else:
self.__proxy = (None, None, None, None, None, None)
self.__proxysockname = None
self.__proxypeername = None
def __recvall(self, bytes):
"""__recvall(bytes) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = ""
while len(data) < bytes:
d = self.recv(bytes-len(data))
if not d:
raise GeneralProxyError("connection closed unexpectedly")
data = data + d
return data
def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
"""setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets the proxy to be used.
proxytype - The type of the proxy to be used. Three types
are supported: PROXY_TYPE_SOCKS4 (including socks4a),
PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
addr - The address of the server (IP or DNS).
port - The port of the server. Defaults to 1080 for SOCKS
servers and 8080 for HTTP proxy servers.
rdns - Should DNS queries be preformed on the remote side
(rather than the local side). The default is True.
Note: This has no effect with SOCKS4 servers.
username - Username to authenticate with to the server.
The default is no authentication.
password - Password to authenticate with to the server.
Only relevant when username is also provided.
"""
self.__proxy = (proxytype,addr,port,rdns,username,password)
def __negotiatesocks5(self,destaddr,destport):
"""__negotiatesocks5(self,destaddr,destport)
Negotiates a connection through a SOCKS5 server.
"""
# First we'll send the authentication packages we support.
if (self.__proxy[4]!=None) and (self.__proxy[5]!=None):
# The username/password details were supplied to the
# setproxy method so we support the USERNAME/PASSWORD
# authentication (in addition to the standard none).
self.sendall("\x05\x02\x00\x02")
else:
# No username/password were entered, therefore we
# only support connections with no authentication.
self.sendall("\x05\x01\x00")
# We'll receive the server's response to determine which
# method was selected
chosenauth = self.__recvall(2)
if chosenauth[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
# Check the chosen authentication method
if chosenauth[1] == "\x00":
# No authentication is required
pass
elif chosenauth[1] == "\x02":
# Okay, we need to perform a basic username/password
# authentication.
self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5])
authstat = self.__recvall(2)
if authstat[0] != "\x01":
# Bad response
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if authstat[1] != "\x00":
# Authentication failed
self.close()
raise Socks5AuthError,((3,_socks5autherrors[3]))
# Authentication succeeded
else:
# Reaching here is always bad
self.close()
if chosenauth[1] == "\xFF":
raise Socks5AuthError((2,_socks5autherrors[2]))
else:
raise GeneralProxyError((1,_generalerrors[1]))
# Now we can request the actual connection
req = "\x05\x01\x00"
# If the given destination address is an IP address, we'll
# use the IPv4 address request even if remote resolving was specified.
try:
ipaddr = socket.inet_aton(destaddr)
req = req + "\x01" + ipaddr
except socket.error:
# Well it's not an IP number, so it's probably a DNS name.
if self.__proxy[3]==True:
# Resolve remotely
ipaddr = None
req = req + "\x03" + chr(len(destaddr)) + destaddr
else:
# Resolve locally
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
req = req + "\x01" + ipaddr
req = req + struct.pack(">H",destport)
self.sendall(req)
# Get the response
resp = self.__recvall(4)
if resp[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
elif resp[1] != "\x00":
# Connection failed
self.close()
if ord(resp[1])<=8:
raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])]))
else:
raise Socks5Error((9,_generalerrors[9]))
# Get the bound address/port
elif resp[3] == "\x01":
boundaddr = self.__recvall(4)
elif resp[3] == "\x03":
resp = resp + self.recv(1)
boundaddr = self.__recvall(ord(resp[4]))
else:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
boundport = struct.unpack(">H",self.__recvall(2))[0]
self.__proxysockname = (boundaddr,boundport)
if ipaddr != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def getproxysockname(self):
"""getsockname() -> address info
Returns the bound IP address and port number at the proxy.
"""
return self.__proxysockname
def getproxypeername(self):
"""getproxypeername() -> address info
Returns the IP and port number of the proxy.
"""
return _orgsocket.getpeername(self)
def getpeername(self):
"""getpeername() -> address info
Returns the IP address and port number of the destination
machine (note: getproxypeername returns the proxy)
"""
return self.__proxypeername
def __negotiatesocks4(self,destaddr,destport):
"""__negotiatesocks4(self,destaddr,destport)
Negotiates a connection through a SOCKS4 server.
"""
# Check if the destination address provided is an IP address
rmtrslv = False
try:
ipaddr = socket.inet_aton(destaddr)
except socket.error:
# It's a DNS name. Check where it should be resolved.
if self.__proxy[3]==True:
ipaddr = "\x00\x00\x00\x01"
rmtrslv = True
else:
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
# Construct the request packet
req = "\x04\x01" + struct.pack(">H",destport) + ipaddr
# The username parameter is considered userid for SOCKS4
if self.__proxy[4] != None:
req = req + self.__proxy[4]
req = req + "\x00"
# DNS name if remote resolving is required
# NOTE: This is actually an extension to the SOCKS4 protocol
# called SOCKS4A and may not be supported in all cases.
if rmtrslv==True:
req = req + destaddr + "\x00"
self.sendall(req)
# Get the response from the server
resp = self.__recvall(8)
if resp[0] != "\x00":
# Bad data
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if resp[1] != "\x5A":
# Server returned an error
self.close()
if ord(resp[1]) in (91,92,93):
self.close()
raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90]))
else:
raise Socks4Error((94,_socks4errors[4]))
# Get the bound address/port
self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0])
if rmtrslv != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def __negotiatehttp(self,destaddr,destport):
"""__negotiatehttp(self,destaddr,destport)
Negotiates a connection through an HTTP server.
"""
# If we need to resolve locally, we do this now
if self.__proxy[3] == False:
addr = socket.gethostbyname(destaddr)
else:
addr = destaddr
self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n")
# We read the response until we get the string "\r\n\r\n"
resp = self.recv(1)
while resp.find("\r\n\r\n")==-1:
resp = resp + self.recv(1)
# We just need the first line to check if the connection
# was successful
statusline = resp.splitlines()[0].split(" ",2)
if statusline[0] not in ("HTTP/1.0","HTTP/1.1"):
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
try:
statuscode = int(statusline[1])
except ValueError:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if statuscode != 200:
self.close()
raise HTTPError((statuscode,statusline[2]))
self.__proxysockname = ("0.0.0.0",0)
self.__proxypeername = (addr,destport)
def connect(self,destpair):
"""connect(self,despair)
Connects to the specified destination through a proxy.
destpar - A tuple of the IP/DNS address and the port number.
(identical to socket's connect).
To select the proxy server use setproxy().
"""
# Do a minimal input check first
if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int):
raise GeneralProxyError((5,_generalerrors[5]))
if self.__proxy[0] == PROXY_TYPE_SOCKS5:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks5(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks4(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_HTTP:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 8080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatehttp(destpair[0],destpair[1])
elif self.__proxy[0] == None:
_orgsocket.connect(self,(destpair[0],destpair[1]))
else:
raise GeneralProxyError((4,_generalerrors[4]))
"""socksocket([family[, type[, proto]]]) -> socket object
Open a SOCKS enabled socket. The parameters are the same as
those of the standard socket init. In order for SOCKS to work,
you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
"""
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
_orgsocket.__init__(self,family,type,proto,_sock)
if _defaultproxy != None:
self.__proxy = _defaultproxy
else:
self.__proxy = (None, None, None, None, None, None)
self.__proxysockname = None
self.__proxypeername = None
def __recvall(self, bytes):
"""__recvall(bytes) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = ""
while len(data) < bytes:
d = self.recv(bytes-len(data))
if not d:
raise GeneralProxyError("connection closed unexpectedly")
data = data + d
return data
def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
"""setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets the proxy to be used.
proxytype - The type of the proxy to be used. Three types
are supported: PROXY_TYPE_SOCKS4 (including socks4a),
PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
addr - The address of the server (IP or DNS).
port - The port of the server. Defaults to 1080 for SOCKS
servers and 8080 for HTTP proxy servers.
rdns - Should DNS queries be preformed on the remote side
(rather than the local side). The default is True.
Note: This has no effect with SOCKS4 servers.
username - Username to authenticate with to the server.
The default is no authentication.
password - Password to authenticate with to the server.
Only relevant when username is also provided.
"""
self.__proxy = (proxytype,addr,port,rdns,username,password)
def __negotiatesocks5(self,destaddr,destport):
"""__negotiatesocks5(self,destaddr,destport)
Negotiates a connection through a SOCKS5 server.
"""
# First we'll send the authentication packages we support.
if (self.__proxy[4]!=None) and (self.__proxy[5]!=None):
# The username/password details were supplied to the
# setproxy method so we support the USERNAME/PASSWORD
# authentication (in addition to the standard none).
self.sendall("\x05\x02\x00\x02")
else:
# No username/password were entered, therefore we
# only support connections with no authentication.
self.sendall("\x05\x01\x00")
# We'll receive the server's response to determine which
# method was selected
chosenauth = self.__recvall(2)
if chosenauth[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
# Check the chosen authentication method
if chosenauth[1] == "\x00":
# No authentication is required
pass
elif chosenauth[1] == "\x02":
# Okay, we need to perform a basic username/password
# authentication.
self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5])
authstat = self.__recvall(2)
if authstat[0] != "\x01":
# Bad response
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if authstat[1] != "\x00":
# Authentication failed
self.close()
raise Socks5AuthError,((3,_socks5autherrors[3]))
# Authentication succeeded
else:
# Reaching here is always bad
self.close()
if chosenauth[1] == "\xFF":
raise Socks5AuthError((2,_socks5autherrors[2]))
else:
raise GeneralProxyError((1,_generalerrors[1]))
# Now we can request the actual connection
req = "\x05\x01\x00"
# If the given destination address is an IP address, we'll
# use the IPv4 address request even if remote resolving was specified.
try:
ipaddr = socket.inet_aton(destaddr)
req = req + "\x01" + ipaddr
except socket.error:
# Well it's not an IP number, so it's probably a DNS name.
if self.__proxy[3]==True:
# Resolve remotely
ipaddr = None
req = req + "\x03" + chr(len(destaddr)) + destaddr
else:
# Resolve locally
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
req = req + "\x01" + ipaddr
req = req + struct.pack(">H",destport)
self.sendall(req)
# Get the response
resp = self.__recvall(4)
if resp[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
elif resp[1] != "\x00":
# Connection failed
self.close()
if ord(resp[1])<=8:
raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])]))
else:
raise Socks5Error((9,_generalerrors[9]))
# Get the bound address/port
elif resp[3] == "\x01":
boundaddr = self.__recvall(4)
elif resp[3] == "\x03":
resp = resp + self.recv(1)
boundaddr = self.__recvall(ord(resp[4]))
else:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
boundport = struct.unpack(">H",self.__recvall(2))[0]
self.__proxysockname = (boundaddr,boundport)
if ipaddr != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def getproxysockname(self):
"""getsockname() -> address info
Returns the bound IP address and port number at the proxy.
"""
return self.__proxysockname
def getproxypeername(self):
"""getproxypeername() -> address info
Returns the IP and port number of the proxy.
"""
return _orgsocket.getpeername(self)
def getpeername(self):
"""getpeername() -> address info
Returns the IP address and port number of the destination
machine (note: getproxypeername returns the proxy)
"""
return self.__proxypeername
def __negotiatesocks4(self,destaddr,destport):
"""__negotiatesocks4(self,destaddr,destport)
Negotiates a connection through a SOCKS4 server.
"""
# Check if the destination address provided is an IP address
rmtrslv = False
try:
ipaddr = socket.inet_aton(destaddr)
except socket.error:
# It's a DNS name. Check where it should be resolved.
if self.__proxy[3]==True:
ipaddr = "\x00\x00\x00\x01"
rmtrslv = True
else:
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
# Construct the request packet
req = "\x04\x01" + struct.pack(">H",destport) + ipaddr
# The username parameter is considered userid for SOCKS4
if self.__proxy[4] != None:
req = req + self.__proxy[4]
req = req + "\x00"
# DNS name if remote resolving is required
# NOTE: This is actually an extension to the SOCKS4 protocol
# called SOCKS4A and may not be supported in all cases.
if rmtrslv==True:
req = req + destaddr + "\x00"
self.sendall(req)
# Get the response from the server
resp = self.__recvall(8)
if resp[0] != "\x00":
# Bad data
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if resp[1] != "\x5A":
# Server returned an error
self.close()
if ord(resp[1]) in (91,92,93):
self.close()
raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90]))
else:
raise Socks4Error((94,_socks4errors[4]))
# Get the bound address/port
self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0])
if rmtrslv != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def __negotiatehttp(self,destaddr,destport):
"""__negotiatehttp(self,destaddr,destport)
Negotiates a connection through an HTTP server.
"""
# If we need to resolve locally, we do this now
if self.__proxy[3] == False:
addr = socket.gethostbyname(destaddr)
else:
addr = destaddr
self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n")
# We read the response until we get the string "\r\n\r\n"
resp = self.recv(1)
while resp.find("\r\n\r\n")==-1:
resp = resp + self.recv(1)
# We just need the first line to check if the connection
# was successful
statusline = resp.splitlines()[0].split(" ",2)
if statusline[0] not in ("HTTP/1.0","HTTP/1.1"):
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
try:
statuscode = int(statusline[1])
except ValueError:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if statuscode != 200:
self.close()
raise HTTPError((statuscode,statusline[2]))
self.__proxysockname = ("0.0.0.0",0)
self.__proxypeername = (addr,destport)
def connect(self,destpair):
"""connect(self,despair)
Connects to the specified destination through a proxy.
destpar - A tuple of the IP/DNS address and the port number.
(identical to socket's connect).
To select the proxy server use setproxy().
"""
# Do a minimal input check first
if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int):
raise GeneralProxyError((5,_generalerrors[5]))
if self.__proxy[0] == PROXY_TYPE_SOCKS5:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks5(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks4(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_HTTP:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 8080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatehttp(destpair[0],destpair[1])
elif self.__proxy[0] == None:
_orgsocket.connect(self,(destpair[0],destpair[1]))
else:
raise GeneralProxyError((4,_generalerrors[4]))

217
src/searchengine/nova3/engines/extratorrent.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.2
#VERSION: 2.0
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@ -25,92 +25,139 @@ @@ -25,92 +25,139 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from html.parser import HTMLParser
from http.client import HTTPConnection as http
#qBt
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
import sgmllib3
import re
from helpers import download_file
class extratorrent(object):
url = 'http://extratorrent.cc'
name = 'extratorrent'
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info):
print(download_file(info))
class SimpleSGMLParser(sgmllib3.SGMLParser):
def __init__(self, results, url, *args):
sgmllib3.SGMLParser.__init__(self)
self.url = url
self.td_counter = None
self.current_item = None
self.start_name = False
self.results = results
def start_a(self, attr):
params = dict(attr)
#print params
if 'href' in params and params['href'].startswith("/torrent_download/"):
self.current_item = {}
self.td_counter = 0
self.start_name = False
torrent_id = '/'.join(params['href'].split('/')[2:])
self.current_item['link']=self.url+'/download/'+torrent_id
elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
self.current_item['desc_link'] = self.url + params['href'].strip()
self.start_name = True
def handle_data(self, data):
if self.td_counter == 2:
if 'name' not in self.current_item and self.start_name:
self.current_item['name'] = data.strip()
elif self.td_counter == 3:
if 'size' not in self.current_item:
self.current_item['size'] = ''
self.current_item['size']+= data.replace("&nbsp;", " ").strip()
elif self.td_counter == 4:
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 5:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 5:
self.td_counter = None
# Display item
""" Search engine class """
url = 'http://extratorrent.cc'
name = 'ExtraTorrent'
supported_categories = {'all' : '0',
'movies' : '4',
'tv' : '8',
'music' : '5',
'games' : '3',
'anime' : '1',
'software' : '7',
'books' : '2',
'pictures' : '6'}
def download_torrent(self, info):
""" Downloader """
print(download_file(info))
class MyHtmlParseWithBlackJack(HTMLParser):
""" Parser class """
def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.url = url
self.list_searches = list_searches
self.current_item = None
self.cur_item_name = None
self.pending_size = False
self.next_queries = True
self.pending_next_queries = False
self.next_queries_set = set()
def handle_starttag(self, tag, attrs):
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.results.append('a')
def search(self, what, cat='all'):
ret = []
i = 1
while True and i<11:
results = []
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<table class="tl"><thead>.*')
for match in results_re.finditer(dat):
res_tab = match.group(0)
parser.feed(res_tab)
if tag == "a":
params = dict(attrs)
link = params['href']
if not link.startswith("/torrent"):
return
if link[8] == "/":
#description
self.current_item["desc_link"] = "".join((self.url, link))
#remove view at the beginning
self.current_item["name"] = params["title"][5:].replace("&amp;", "&")
self.pending_size = True
elif link[8] == "_":
#download link
link = link.replace("torrent_", "", 1)
self.current_item["link"] = "".join((self.url, link))
elif tag == "td":
if self.pending_size:
self.cur_item_name = "size"
self.current_item["size"] = ""
self.pending_size = False
for attr in attrs:
if attr[0] == "class":
if attr[1][0] == "s":
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif attr[1][0] == "l":
self.cur_item_name = "leech"
self.current_item["leech"] = ""
break
elif tag == "tr":
for attr in attrs:
if attr[0] == "class" and attr[1].startswith("tl"):
self.current_item = dict()
self.current_item["engine_url"] = self.url
break
elif self.pending_next_queries:
if tag == "a":
params = dict(attrs)
if params["title"] in self.next_queries_set:
return
self.list_searches.append(params['href'])
self.next_queries_set.add(params["title"])
if params["title"] == "10":
self.pending_next_queries = False
else:
self.pending_next_queries = False
elif self.next_queries:
if tag == "b" and ("class", "pager_no_link") in attrs:
self.next_queries = False
self.pending_next_queries = True
def handle_data(self, data):
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
#Due to utf-8 we need to handle data two times if there is space
if not self.cur_item_name == "size":
self.cur_item_name = None
def handle_endtag(self, tag):
if self.current_item:
if tag == "tr":
prettyPrinter(self.current_item)
self.current_item = None
def search(self, what, cat="all"):
""" Performs search """
connection = http("extratorrent.cc")
query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
break
if len(results) <= 0:
break
i += 1
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

6
src/searchengine/nova3/engines/legittorrents.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.03
#VERSION: 1.04
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
# Redistribution and use in source and binary forms, with or without
@ -36,10 +36,6 @@ class legittorrents(object): @@ -36,10 +36,6 @@ class legittorrents(object):
name = 'legittorrents'
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info):
print(download_file(info))

201
src/searchengine/nova3/engines/mininova.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.51
#VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
@ -26,90 +26,123 @@ @@ -26,90 +26,123 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from html.parser import HTMLParser
from http.client import HTTPConnection as http
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
import sgmllib3
import re
from helpers import download_file
class mininova(object):
# Mandatory properties
url = 'http://www.mininova.org'
name = 'Mininova'
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
def __init__(self):
self.results = []
self.parser = self.SimpleSGMLParser(self.results, self.url)
def download_torrent(self, info):
print(download_file(info))
class SimpleSGMLParser(sgmllib3.SGMLParser):
def __init__(self, results, url, *args):
sgmllib3.SGMLParser.__init__(self)
self.url = url
self.td_counter = None
self.current_item = None
self.results = results
def start_a(self, attr):
params = dict(attr)
#print params
if 'href' in params:
if params['href'].startswith("/get/"):
self.current_item = {}
self.td_counter = 0
self.current_item['link']=self.url+params['href'].strip()
elif params['href'].startswith("/tor/") and self.current_item is not None:
self.current_item['desc_link']=self.url+params['href'].strip()
def handle_data(self, data):
if self.td_counter == 0:
if 'name' not in self.current_item:
self.current_item['name'] = ''
self.current_item['name']+= data
elif self.td_counter == 1:
if 'size' not in self.current_item:
self.current_item['size'] = ''
self.current_item['size']+= data.strip()
elif self.td_counter == 2:
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 4:
self.td_counter = None
# Display item
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.results.append('a')
def search(self, what, cat='all'):
ret = []
i = 1
while True and i<11:
results = []
parser = self.SimpleSGMLParser(results, self.url)
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
results_re = re.compile('(?s)<h1>Search results for.*')
for match in results_re.finditer(dat):
res_tab = match.group(0)
parser.feed(res_tab)
""" Search engine class """
url = 'http://www.mininova.org'
name = 'Mininova'
supported_categories = {'all' : '0',
'movies' : '4',
'tv' : '8',
'music' : '5',
'games' : '3',
'anime' : '1',
'software' : '7',
'pictures' : '6',
'books' : '2'}
def download_torrent(self, info):
print(download_file(info))
class MyHtmlParseWithBlackJack(HTMLParser):
""" Parser class """
def __init__(self, list_searches, url):
HTMLParser.__init__(self)
self.list_searches = list_searches
self.url = url
self.table_results = False
self.current_item = None
self.cur_item_name = None
self.next_queries = True
def handle_starttag_tr(self, _):
""" Handler of tr start tag """
self.current_item = dict()
def handle_starttag_a(self, attrs):
""" Handler of a start tag """
params = dict(attrs)
link = params["href"]
if link.startswith("/tor/"):
#description
self.current_item["desc_link"] = "".join((self.url, link))
#get download link from description by id
self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
self.cur_item_name = "name"
self.current_item["name"] = ""
elif self.next_queries and link.startswith("/search"):
if params["title"].startswith("Page"):
self.list_searches.append(link)
def handle_starttag_td(self, attrs):
""" Handler of td start tag """
if ("align", "right") in attrs:
if not "size" in self.current_item:
self.cur_item_name = "size"
self.current_item["size"] = ""
def handle_starttag_span(self, attrs):
""" Handler of span start tag """
if ("class", "g") in attrs:
self.cur_item_name = "seeds"
self.current_item["seeds"] = ""
elif ("class", "b") in attrs:
self.cur_item_name = "leech"
self.current_item["leech"] = ""
def handle_starttag(self, tag, attrs):
""" Parser's start tag handler """
if self.table_results:
dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
if dispatcher:
dispatcher(attrs)
elif tag == "table":
self.table_results = ("class", "maintable") in attrs
def handle_endtag(self, tag):
""" Parser's end tag handler """
if tag == "tr" and self.current_item:
self.current_item["engine_url"] = self.url
prettyPrinter(self.current_item)
self.current_item = None
elif self.cur_item_name:
if tag == "a" or tag == "td":
self.cur_item_name = None
def handle_data(self, data):
""" Parser's data handler """
if self.cur_item_name:
temp = self.current_item[self.cur_item_name]
self.current_item[self.cur_item_name] = " ".join((temp, data))
def search(self, what, cat="all"):
""" Performs search """
connection = http("www.mininova.org")
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
connection.request("GET", query)
response = connection.getresponse()
if response.status != 200:
return
list_searches = []
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
parser.feed(response.read().decode('utf-8'))
parser.close()
break
if len(results) <= 0:
break
i += 1
parser.next_queries = False
for search_query in list_searches:
connection.request("GET", search_query)
response = connection.getresponse()
parser.feed(response.read().decode('utf-8'))
parser.close()
connection.close()
return

BIN
src/searchengine/nova3/engines/torrentreactor.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 252 B

After

Width:  |  Height:  |  Size: 951 B

149
src/searchengine/nova3/engines/torrentreactor.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 1.33
#VERSION: 1.36
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
# Bruno Barbieri (brunorex@gmail.com)
@ -28,91 +28,84 @@ @@ -28,91 +28,84 @@
# POSSIBILITY OF SUCH DAMAGE.
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
from urllib import error, parse
from helpers import download_file, retrieve_url
from urllib import parse
from html.parser import HTMLParser
import re
from re import compile as re_compile
class torrentreactor(object):
url = 'http://www.torrentreactor.net'
name = 'TorrentReactor.Net'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
url = 'http://www.torrentreactor.net'
name = 'TorrentReactor'
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
def download_torrent(self, info):
print(download_file(info))
def download_torrent(self, info):
print(download_file(info))
class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args):
HTMLParser.__init__(self)
self.td_counter = None
self.current_item = None
self.results = results
self.id = None
self.url = url
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args):
HTMLParser.__init__(self)
self.td_counter = None
self.current_item = None
self.results = results
self.id = None
self.url = url
self.torrents_matcher = re_compile("/torrents/\d+.*")
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
def handle_starttag(self, tag, attrs):
if tag in self.dispatcher:
self.dispatcher[tag](attrs)
def handle_starttag(self, tag, attrs):
if tag in self.dispatcher:
self.dispatcher[tag](attrs)
def start_a(self, attr):
params = dict(attr)
if re.match("/torrents/\d+.*", params['href']):
self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']:
self.td_counter = 0
self.current_item['link'] = params['href'].strip()
self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
def start_a(self, attr):
params = dict(attr)
if self.torrents_matcher.match(params['href']):
self.current_item = {}
self.current_item['desc_link'] = self.url+params['href'].strip()
elif 'torrentreactor.net/download.php' in params['href']:
self.td_counter = 0
self.current_item['link'] = params['href'].strip()
self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1])
def handle_data(self, data):
if self.td_counter == 1:
if 'size' not in self.current_item:
self.current_item['size'] = ''
self.current_item['size']+= data.strip()
elif self.td_counter == 2:
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def handle_data(self, data):
if self.td_counter == 1:
if 'size' not in self.current_item:
self.current_item['size'] = ''
self.current_item['size']+= data.strip()
elif self.td_counter == 2:
if 'seeds' not in self.current_item:
self.current_item['seeds'] = ''
self.current_item['seeds']+= data.strip()
elif self.td_counter == 3:
if 'leech' not in self.current_item:
self.current_item['leech'] = ''
self.current_item['leech']+= data.strip()
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 3:
self.td_counter = None
# add item to results
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.has_results = True
self.results.append('a')
def start_td(self,attr):
if isinstance(self.td_counter,int):
self.td_counter += 1
if self.td_counter > 3:
self.td_counter = None
# add item to results
if self.current_item:
self.current_item['engine_url'] = self.url
if not self.current_item['seeds'].isdigit():
self.current_item['seeds'] = 0
if not self.current_item['leech'].isdigit():
self.current_item['leech'] = 0
prettyPrinter(self.current_item)
self.has_results = True
self.results.append('a')
def __init__(self):
self.results = []
self.parser = self.SimpleHTMLParser(self.results, self.url)
def search(self, what, cat='all'):
i = 0
dat = ''
def search(self, what, cat='all'):
i = 0
dat = ''
while True and i<11:
results = []
parser = self.SimpleHTMLParser(results, self.url)
try:
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
except error.HTTPError:
break
parser.feed(dat)
parser.close()
if len(results) <= 0:
break
i += 1
while i < 11:
results = []
parser = self.SimpleHTMLParser(results, self.url)
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
parser.feed(dat)
parser.close()
if len(results) <= 0:
break
i += 1

2
src/searchengine/nova3/engines/torrentz.py

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
#VERSION: 2.13
#VERSION: 2.14
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
# Redistribution and use in source and binary forms, with or without

11
src/searchengine/nova3/engines/versions.txt

@ -1,8 +1,9 @@ @@ -1,8 +1,9 @@
torrentreactor: 1.33
mininova: 1.51
piratebay: 2.11
extratorrent: 1.2
torrentreactor: 1.36
mininova: 2.00
piratebay: 2.11
extratorrent: 2.0
kickasstorrents: 1.26
btdigg: 1.23
legittorrents: 1.03
torrentz: 2.13
torrentz: 2.14
legittorrents: 1.04

240
src/searchengine/nova3/nova2.py

@ -26,7 +26,7 @@ @@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.24
#VERSION: 1.40
# Author:
# Fabien Devaux <fab AT gnux DOT info>
@ -37,14 +37,14 @@ @@ -37,14 +37,14 @@
#
# Licence: BSD
import sys
import threading
import os
import glob
import urllib.parse
from os import path, cpu_count
from glob import glob
from sys import argv
from multiprocessing import Pool
THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################
# Every engine should have a "search" method taking
@ -54,105 +54,129 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic @@ -54,105 +54,129 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar
################################################################################
supported_engines = []
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
for engine in engines:
e = engine.split(os.sep)[-1][:-3]
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec("from engines.%s import %s"%(e,e))
supported_engines.append(e)
except:
pass
def engineToXml(short_name):
xml = "<%s>\n"%short_name
exec("search_engine = %s()"%short_name, globals())
xml += "<name>%s</name>\n"%search_engine.name
xml += "<url>%s</url>\n"%search_engine.url
xml += "<categories>"
if hasattr(search_engine, 'supported_categories'):
supported_categories = list(search_engine.supported_categories.keys())
supported_categories.remove('all')
xml += " ".join(supported_categories)
xml += "</categories>\n"
xml += "</%s>\n"%short_name
return xml
def displayCapabilities():
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "<capabilities>"
for short_name in supported_engines:
xml += engineToXml(short_name)
xml += "</capabilities>"
print(xml)
class EngineLauncher(threading.Thread):
def __init__(self, engine, what, cat='all'):
threading.Thread.__init__(self)
self.engine = engine
self.what = what
self.cat = cat
def run(self):
if hasattr(self.engine, 'supported_categories'):
if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()):
self.engine.search(self.what, self.cat)
elif self.cat == 'all':
self.engine.search(self.what)
if __name__ == '__main__':
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2:
if sys.argv[1] == "--capabilities":
displayCapabilities()
sys.exit(0)
else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
if 'all' in engines_list:
engines_list = supported_engines
cat = sys.argv[2].lower()
if cat not in CATEGORIES:
raise SystemExit('Invalid category!')
what = urllib.parse.quote(' '.join(sys.argv[3:]))
threads = []
for engine in engines_list:
try:
if THREADED:
exec("l = EngineLauncher(%s(), what, cat)"%engine)
threads.append(l)
l.start()
else:
exec("e = %s()"%engine)
if hasattr(engine, 'supported_categories'):
if cat == 'all' or cat in list(e.supported_categories.keys()):
e.search(what, cat)
elif self.cat == 'all':
e.search(what)
engine().search(what, cat)
except:
pass
if THREADED:
for t in threads:
t.join()
def initialize_engines():
""" Import available engines
Return list of available engines
"""
supported_engines = []
engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
for engine in engines:
engi = path.basename(engine).split('.')[0].strip()
if len(engi) == 0 or engi.startswith('_'):
continue
try:
#import engines.[engine]
engine_module = __import__(".".join(("engines", engi)))
#get low-level module
engine_module = getattr(engine_module, engi)
#bind class name
globals()[engi] = getattr(engine_module, engi)
supported_engines.append(engi)
except:
pass
return supported_engines
def engines_to_xml(supported_engines):
""" Generates xml for supported engines """
tab = " " * 4
for short_name in supported_engines:
search_engine = globals()[short_name]()
supported_categories = ""
if hasattr(search_engine, "supported_categories"):
supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
if key is not "all"))
yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
def displayCapabilities(supported_engines):
"""
Display capabilities in XML format
<capabilities>
<engine_short_name>
<name>long name</name>
<url>http://example.com</url>
<categories>movies music games</categories>
</engine_short_name>
</capabilities>
"""
xml = "".join(("<capabilities>\n",
"".join(engines_to_xml(supported_engines)),
"</capabilities>"))
print(xml)
def run_search(engine_list):
""" Run search in engine
@param engine_list List with engine, query and category
@retval False if any exceptions occured
@retval True otherwise
"""
engine, what, cat = engine_list
try:
engine = engine()
#avoid exceptions due to invalid category
if hasattr(engine, 'supported_categories'):
cat = cat if cat in engine.supported_categories else "all"
engine.search(what, cat)
else:
engine.search(what)
return True
except:
return False
def main(args):
supported_engines = initialize_engines()
if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
#get only unique engines with set
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list:
engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.parse.quote(' '.join(args[2:]))
if THREADED:
#child process spawning is controlled min(number of searches, number of cpu)
with Pool(min(len(engines_list), cpu_count())) as pool:
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else:
#py3 note: map is needed to be evaluated for content to be executed
all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list)))
if __name__ == "__main__":
main(argv[1:])

44
src/searchengine/nova3/nova2dl.py

@ -25,7 +25,7 @@ @@ -25,7 +25,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.10
#VERSION: 1.20
# Author:
# Christophe DUMEZ (chris@qbittorrent.org)
@ -39,26 +39,26 @@ supported_engines = dict() @@ -39,26 +39,26 @@ supported_engines = dict()
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
for engine in engines:
e = engine.split(os.sep)[-1][:-3]
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec("from engines.%s import %s"%(e,e))
exec("engine_url = %s.url"%e)
supported_engines[engine_url] = e
except:
pass
e = engine.split(os.sep)[-1][:-3]
if len(e.strip()) == 0: continue
if e.startswith('_'): continue
try:
exec("from engines.%s import %s"%(e,e))
exec("engine_url = %s.url"%e)
supported_engines[engine_url] = e
except:
pass
if __name__ == '__main__':
if len(sys.argv) < 3:
raise SystemExit('./nova2dl.py engine_url download_parameter')
engine_url = sys.argv[1].strip()
download_param = sys.argv[2].strip()
if engine_url not in list(supported_engines.keys()):
raise SystemExit('./nova2dl.py: this engine_url was not recognized')
exec("engine = %s()"%supported_engines[engine_url])
if hasattr(engine, 'download_torrent'):
engine.download_torrent(download_param)
else:
print(download_file(download_param))
sys.exit(0)
if len(sys.argv) < 3:
raise SystemExit('./nova2dl.py engine_url download_parameter')
engine_url = sys.argv[1].strip()
download_param = sys.argv[2].strip()
if engine_url not in list(supported_engines.keys()):
raise SystemExit('./nova2dl.py: this engine_url was not recognized')
exec("engine = %s()"%supported_engines[engine_url])
if hasattr(engine, 'download_torrent'):
engine.download_torrent(download_param)
else:
print(download_file(download_param))
sys.exit(0)

66
src/searchengine/nova3/novaprinter.py

@ -26,41 +26,39 @@ @@ -26,41 +26,39 @@
def prettyPrinter(dictionary):
outtext = ''
dictionary['size'] = anySizeToBytes(dictionary['size'])
if 'desc_link' in dictionary:
outtext = '%s|%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link'])
else:
outtext = '%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
dictionary['size'] = anySizeToBytes(dictionary['size'])
outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
if 'desc_link' in dictionary:
outtext = "|".join((outtext, dictionary["desc_link"]))
# fd 1 is stdout
with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout:
print(outtext, file=utf8stdout)
# fd 1 is stdout
with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout:
print(outtext, file=utf8stdout)
def anySizeToBytes(size_string):
"""
Convert a string like '1 KB' to '1024' (bytes)
"""
# separate integer from unit
try:
size, unit = size_string.split()
except:
try:
size = size_string.strip()
unit = ''.join([c for c in size if c.isalpha()])
if len(unit) > 0:
size = size[:-len(unit)]
except:
return -1
if len(size) == 0:
return -1
size = float(size)
if len(unit) == 0:
return int(size)
short_unit = unit.upper()[0]
"""
Convert a string like '1 KB' to '1024' (bytes)
"""
# separate integer from unit
try:
size, unit = size_string.split()
except:
try:
size = size_string.strip()
unit = ''.join([c for c in size if c.isalpha()])
if len(unit) > 0:
size = size[:-len(unit)]
except:
return -1
if len(size) == 0:
return -1
size = float(size)
if len(unit) == 0:
return int(size)
short_unit = unit.upper()[0]
# convert
units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
if short_unit in units_dict:
size = size * 2**units_dict[short_unit]
return int(size)
# convert
units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
if short_unit in units_dict:
size = size * 2**units_dict[short_unit]
return int(size)

652
src/searchengine/nova3/socks.py

@ -42,350 +42,350 @@ _defaultproxy = None @@ -42,350 +42,350 @@ _defaultproxy = None
_orgsocket = socket.socket
class ProxyError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class GeneralProxyError(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Socks5AuthError(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Socks5Error(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class Socks4Error(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class HTTPError(ProxyError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
_generalerrors = ("success",
"invalid data",
"not connected",
"not available",
"bad proxy type",
"bad input")
"invalid data",
"not connected",
"not available",
"bad proxy type",
"bad input")
_socks5errors = ("succeeded",
"general SOCKS server failure",
"connection not allowed by ruleset",
"Network unreachable",
"Host unreachable",
"Connection refused",
"TTL expired",
"Command not supported",
"Address type not supported",
"Unknown error")
"general SOCKS server failure",
"connection not allowed by ruleset",
"Network unreachable",
"Host unreachable",
"Connection refused",
"TTL expired",
"Command not supported",
"Address type not supported",
"Unknown error")
_socks5autherrors = ("succeeded",
"authentication is required",
"all offered authentication methods were rejected",
"unknown username or invalid password",
"unknown error")
"authentication is required",
"all offered authentication methods were rejected",
"unknown username or invalid password",
"unknown error")
_socks4errors = ("request granted",
"request rejected or failed",
"request rejected because SOCKS server cannot connect to identd on the client",
"request rejected because the client program and identd report different user-ids",
"unknown error")
"request rejected or failed",
"request rejected because SOCKS server cannot connect to identd on the client",
"request rejected because the client program and identd report different user-ids",
"unknown error")
def setdefaultproxy(proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
"""setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets a default proxy which all further socksocket objects will use,
unless explicitly changed.
"""
global _defaultproxy
_defaultproxy = (proxytype,addr,port,rdns,username,password)
"""setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets a default proxy which all further socksocket objects will use,
unless explicitly changed.
"""
global _defaultproxy
_defaultproxy = (proxytype,addr,port,rdns,username,password)
class socksocket(socket.socket):
"""socksocket([family[, type[, proto]]]) -> socket object
Open a SOCKS enabled socket. The parameters are the same as
those of the standard socket init. In order for SOCKS to work,
you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
"""
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
_orgsocket.__init__(self,family,type,proto,_sock)
if _defaultproxy != None:
self.__proxy = _defaultproxy
else:
self.__proxy = (None, None, None, None, None, None)
self.__proxysockname = None
self.__proxypeername = None
def __recvall(self, bytes):
"""__recvall(bytes) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = ""
while len(data) < bytes:
d = self.recv(bytes-len(data))
if not d:
raise GeneralProxyError("connection closed unexpectedly")
data = data + d
return data
def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
"""setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets the proxy to be used.
proxytype - The type of the proxy to be used. Three types
are supported: PROXY_TYPE_SOCKS4 (including socks4a),
PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
addr - The address of the server (IP or DNS).
port - The port of the server. Defaults to 1080 for SOCKS
servers and 8080 for HTTP proxy servers.
rdns - Should DNS queries be preformed on the remote side
(rather than the local side). The default is True.
Note: This has no effect with SOCKS4 servers.
username - Username to authenticate with to the server.
The default is no authentication.
password - Password to authenticate with to the server.
Only relevant when username is also provided.
"""
self.__proxy = (proxytype,addr,port,rdns,username,password)
def __negotiatesocks5(self,destaddr,destport):
"""__negotiatesocks5(self,destaddr,destport)
Negotiates a connection through a SOCKS5 server.
"""
# First we'll send the authentication packages we support.
if (self.__proxy[4]!=None) and (self.__proxy[5]!=None):
# The username/password details were supplied to the
# setproxy method so we support the USERNAME/PASSWORD
# authentication (in addition to the standard none).
self.sendall("\x05\x02\x00\x02")
else:
# No username/password were entered, therefore we
# only support connections with no authentication.
self.sendall("\x05\x01\x00")
# We'll receive the server's response to determine which
# method was selected
chosenauth = self.__recvall(2)
if chosenauth[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
# Check the chosen authentication method
if chosenauth[1] == "\x00":
# No authentication is required
pass
elif chosenauth[1] == "\x02":
# Okay, we need to perform a basic username/password
# authentication.
self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5])
authstat = self.__recvall(2)
if authstat[0] != "\x01":
# Bad response
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if authstat[1] != "\x00":
# Authentication failed
self.close()
raise Socks5AuthError((3,_socks5autherrors[3]))
# Authentication succeeded
else:
# Reaching here is always bad
self.close()
if chosenauth[1] == "\xFF":
raise Socks5AuthError((2,_socks5autherrors[2]))
else:
raise GeneralProxyError((1,_generalerrors[1]))
# Now we can request the actual connection
req = "\x05\x01\x00"
# If the given destination address is an IP address, we'll
# use the IPv4 address request even if remote resolving was specified.
try:
ipaddr = socket.inet_aton(destaddr)
req = req + "\x01" + ipaddr
except socket.error:
# Well it's not an IP number, so it's probably a DNS name.
if self.__proxy[3]==True:
# Resolve remotely
ipaddr = None
req = req + "\x03" + chr(len(destaddr)) + destaddr
else:
# Resolve locally
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
req = req + "\x01" + ipaddr
req = req + struct.pack(">H",destport)
self.sendall(req)
# Get the response
resp = self.__recvall(4)
if resp[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
elif resp[1] != "\x00":
# Connection failed
self.close()
if ord(resp[1])<=8:
raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])]))
else:
raise Socks5Error((9,_generalerrors[9]))
# Get the bound address/port
elif resp[3] == "\x01":
boundaddr = self.__recvall(4)
elif resp[3] == "\x03":
resp = resp + self.recv(1)
boundaddr = self.__recvall(ord(resp[4]))
else:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
boundport = struct.unpack(">H",self.__recvall(2))[0]
self.__proxysockname = (boundaddr,boundport)
if ipaddr != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def getproxysockname(self):
"""getsockname() -> address info
Returns the bound IP address and port number at the proxy.
"""
return self.__proxysockname
def getproxypeername(self):
"""getproxypeername() -> address info
Returns the IP and port number of the proxy.
"""
return _orgsocket.getpeername(self)
def getpeername(self):
"""getpeername() -> address info
Returns the IP address and port number of the destination
machine (note: getproxypeername returns the proxy)
"""
return self.__proxypeername
def __negotiatesocks4(self,destaddr,destport):
"""__negotiatesocks4(self,destaddr,destport)
Negotiates a connection through a SOCKS4 server.
"""
# Check if the destination address provided is an IP address
rmtrslv = False
try:
ipaddr = socket.inet_aton(destaddr)
except socket.error:
# It's a DNS name. Check where it should be resolved.
if self.__proxy[3]==True:
ipaddr = "\x00\x00\x00\x01"
rmtrslv = True
else:
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
# Construct the request packet
req = "\x04\x01" + struct.pack(">H",destport) + ipaddr
# The username parameter is considered userid for SOCKS4
if self.__proxy[4] != None:
req = req + self.__proxy[4]
req = req + "\x00"
# DNS name if remote resolving is required
# NOTE: This is actually an extension to the SOCKS4 protocol
# called SOCKS4A and may not be supported in all cases.
if rmtrslv==True:
req = req + destaddr + "\x00"
self.sendall(req)
# Get the response from the server
resp = self.__recvall(8)
if resp[0] != "\x00":
# Bad data
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if resp[1] != "\x5A":
# Server returned an error
self.close()
if ord(resp[1]) in (91,92,93):
self.close()
raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90]))
else:
raise Socks4Error((94,_socks4errors[4]))
# Get the bound address/port
self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0])
if rmtrslv != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def __negotiatehttp(self,destaddr,destport):
"""__negotiatehttp(self,destaddr,destport)
Negotiates a connection through an HTTP server.
"""
# If we need to resolve locally, we do this now
if self.__proxy[3] == False:
addr = socket.gethostbyname(destaddr)
else:
addr = destaddr
self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n")
# We read the response until we get the string "\r\n\r\n"
resp = self.recv(1)
while resp.find("\r\n\r\n")==-1:
resp = resp + self.recv(1)
# We just need the first line to check if the connection
# was successful
statusline = resp.splitlines()[0].split(" ",2)
if statusline[0] not in ("HTTP/1.0","HTTP/1.1"):
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
try:
statuscode = int(statusline[1])
except ValueError:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if statuscode != 200:
self.close()
raise HTTPError((statuscode,statusline[2]))
self.__proxysockname = ("0.0.0.0",0)
self.__proxypeername = (addr,destport)
def connect(self,destpair):
"""connect(self,despair)
Connects to the specified destination through a proxy.
destpar - A tuple of the IP/DNS address and the port number.
(identical to socket's connect).
To select the proxy server use setproxy().
"""
# Do a minimal input check first
if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int):
raise GeneralProxyError((5,_generalerrors[5]))
if self.__proxy[0] == PROXY_TYPE_SOCKS5:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks5(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks4(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_HTTP:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 8080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatehttp(destpair[0],destpair[1])
elif self.__proxy[0] == None:
_orgsocket.connect(self,(destpair[0],destpair[1]))
else:
raise GeneralProxyError((4,_generalerrors[4]))
"""socksocket([family[, type[, proto]]]) -> socket object
Open a SOCKS enabled socket. The parameters are the same as
those of the standard socket init. In order for SOCKS to work,
you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
"""
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
_orgsocket.__init__(self,family,type,proto,_sock)
if _defaultproxy != None:
self.__proxy = _defaultproxy
else:
self.__proxy = (None, None, None, None, None, None)
self.__proxysockname = None
self.__proxypeername = None
def __recvall(self, bytes):
"""__recvall(bytes) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = ""
while len(data) < bytes:
d = self.recv(bytes-len(data))
if not d:
raise GeneralProxyError("connection closed unexpectedly")
data = data + d
return data
def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
"""setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets the proxy to be used.
proxytype - The type of the proxy to be used. Three types
are supported: PROXY_TYPE_SOCKS4 (including socks4a),
PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
addr - The address of the server (IP or DNS).
port - The port of the server. Defaults to 1080 for SOCKS
servers and 8080 for HTTP proxy servers.
rdns - Should DNS queries be preformed on the remote side
(rather than the local side). The default is True.
Note: This has no effect with SOCKS4 servers.
username - Username to authenticate with to the server.
The default is no authentication.
password - Password to authenticate with to the server.
Only relevant when username is also provided.
"""
self.__proxy = (proxytype,addr,port,rdns,username,password)
def __negotiatesocks5(self,destaddr,destport):
"""__negotiatesocks5(self,destaddr,destport)
Negotiates a connection through a SOCKS5 server.
"""
# First we'll send the authentication packages we support.
if (self.__proxy[4]!=None) and (self.__proxy[5]!=None):
# The username/password details were supplied to the
# setproxy method so we support the USERNAME/PASSWORD
# authentication (in addition to the standard none).
self.sendall("\x05\x02\x00\x02")
else:
# No username/password were entered, therefore we
# only support connections with no authentication.
self.sendall("\x05\x01\x00")
# We'll receive the server's response to determine which
# method was selected
chosenauth = self.__recvall(2)
if chosenauth[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
# Check the chosen authentication method
if chosenauth[1] == "\x00":
# No authentication is required
pass
elif chosenauth[1] == "\x02":
# Okay, we need to perform a basic username/password
# authentication.
self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5])
authstat = self.__recvall(2)
if authstat[0] != "\x01":
# Bad response
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if authstat[1] != "\x00":
# Authentication failed
self.close()
raise Socks5AuthError((3,_socks5autherrors[3]))
# Authentication succeeded
else:
# Reaching here is always bad
self.close()
if chosenauth[1] == "\xFF":
raise Socks5AuthError((2,_socks5autherrors[2]))
else:
raise GeneralProxyError((1,_generalerrors[1]))
# Now we can request the actual connection
req = "\x05\x01\x00"
# If the given destination address is an IP address, we'll
# use the IPv4 address request even if remote resolving was specified.
try:
ipaddr = socket.inet_aton(destaddr)
req = req + "\x01" + ipaddr
except socket.error:
# Well it's not an IP number, so it's probably a DNS name.
if self.__proxy[3]==True:
# Resolve remotely
ipaddr = None
req = req + "\x03" + chr(len(destaddr)) + destaddr
else:
# Resolve locally
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
req = req + "\x01" + ipaddr
req = req + struct.pack(">H",destport)
self.sendall(req)
# Get the response
resp = self.__recvall(4)
if resp[0] != "\x05":
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
elif resp[1] != "\x00":
# Connection failed
self.close()
if ord(resp[1])<=8:
raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])]))
else:
raise Socks5Error((9,_generalerrors[9]))
# Get the bound address/port
elif resp[3] == "\x01":
boundaddr = self.__recvall(4)
elif resp[3] == "\x03":
resp = resp + self.recv(1)
boundaddr = self.__recvall(ord(resp[4]))
else:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
boundport = struct.unpack(">H",self.__recvall(2))[0]
self.__proxysockname = (boundaddr,boundport)
if ipaddr != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def getproxysockname(self):
"""getsockname() -> address info
Returns the bound IP address and port number at the proxy.
"""
return self.__proxysockname
def getproxypeername(self):
"""getproxypeername() -> address info
Returns the IP and port number of the proxy.
"""
return _orgsocket.getpeername(self)
def getpeername(self):
"""getpeername() -> address info
Returns the IP address and port number of the destination
machine (note: getproxypeername returns the proxy)
"""
return self.__proxypeername
def __negotiatesocks4(self,destaddr,destport):
"""__negotiatesocks4(self,destaddr,destport)
Negotiates a connection through a SOCKS4 server.
"""
# Check if the destination address provided is an IP address
rmtrslv = False
try:
ipaddr = socket.inet_aton(destaddr)
except socket.error:
# It's a DNS name. Check where it should be resolved.
if self.__proxy[3]==True:
ipaddr = "\x00\x00\x00\x01"
rmtrslv = True
else:
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
# Construct the request packet
req = "\x04\x01" + struct.pack(">H",destport) + ipaddr
# The username parameter is considered userid for SOCKS4
if self.__proxy[4] != None:
req = req + self.__proxy[4]
req = req + "\x00"
# DNS name if remote resolving is required
# NOTE: This is actually an extension to the SOCKS4 protocol
# called SOCKS4A and may not be supported in all cases.
if rmtrslv==True:
req = req + destaddr + "\x00"
self.sendall(req)
# Get the response from the server
resp = self.__recvall(8)
if resp[0] != "\x00":
# Bad data
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if resp[1] != "\x5A":
# Server returned an error
self.close()
if ord(resp[1]) in (91,92,93):
self.close()
raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90]))
else:
raise Socks4Error((94,_socks4errors[4]))
# Get the bound address/port
self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0])
if rmtrslv != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
else:
self.__proxypeername = (destaddr,destport)
def __negotiatehttp(self,destaddr,destport):
"""__negotiatehttp(self,destaddr,destport)
Negotiates a connection through an HTTP server.
"""
# If we need to resolve locally, we do this now
if self.__proxy[3] == False:
addr = socket.gethostbyname(destaddr)
else:
addr = destaddr
self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n")
# We read the response until we get the string "\r\n\r\n"
resp = self.recv(1)
while resp.find("\r\n\r\n")==-1:
resp = resp + self.recv(1)
# We just need the first line to check if the connection
# was successful
statusline = resp.splitlines()[0].split(" ",2)
if statusline[0] not in ("HTTP/1.0","HTTP/1.1"):
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
try:
statuscode = int(statusline[1])
except ValueError:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if statuscode != 200:
self.close()
raise HTTPError((statuscode,statusline[2]))
self.__proxysockname = ("0.0.0.0",0)
self.__proxypeername = (addr,destport)
def connect(self,destpair):
"""connect(self,despair)
Connects to the specified destination through a proxy.
destpar - A tuple of the IP/DNS address and the port number.
(identical to socket's connect).
To select the proxy server use setproxy().
"""
# Do a minimal input check first
if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int):
raise GeneralProxyError((5,_generalerrors[5]))
if self.__proxy[0] == PROXY_TYPE_SOCKS5:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks5(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatesocks4(destpair[0],destpair[1])
elif self.__proxy[0] == PROXY_TYPE_HTTP:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 8080
_orgsocket.connect(self,(self.__proxy[1],portnum))
self.__negotiatehttp(destpair[0],destpair[1])
elif self.__proxy[0] == None:
_orgsocket.connect(self,(destpair[0],destpair[1]))
else:
raise GeneralProxyError((4,_generalerrors[4]))

Loading…
Cancel
Save