mirror of
https://github.com/YGGverse/qBt_SE.git
synced 2025-02-08 04:44:18 +00:00
delete 'modules' folder
This commit is contained in:
parent
5fb21b73e9
commit
ecfd5c8473
@ -1,123 +0,0 @@
|
|||||||
#VERSION: 1.43
|
|
||||||
|
|
||||||
# Author:
|
|
||||||
# Christophe DUMEZ (chris@qbittorrent.org)
|
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright notice,
|
|
||||||
# this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of the author nor the names of its contributors may be
|
|
||||||
# used to endorse or promote products derived from this software without
|
|
||||||
# specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
import gzip
|
|
||||||
import html.entities
|
|
||||||
import io
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import socket
|
|
||||||
import socks
|
|
||||||
import tempfile
|
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
|
|
||||||
# Some sites blocks default python User-agent
|
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0'
|
|
||||||
headers = {'User-Agent': user_agent}
|
|
||||||
# SOCKS5 Proxy support
|
|
||||||
if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0:
|
|
||||||
proxy_str = os.environ["sock_proxy"].strip()
|
|
||||||
m = re.match(r"^(?:(?P<username>[^:]+):(?P<password>[^@]+)@)?(?P<host>[^:]+):(?P<port>\w+)$",
|
|
||||||
proxy_str)
|
|
||||||
if m is not None:
|
|
||||||
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, m.group('host'),
|
|
||||||
int(m.group('port')), True, m.group('username'), m.group('password'))
|
|
||||||
socket.socket = socks.socksocket
|
|
||||||
|
|
||||||
|
|
||||||
def htmlentitydecode(s):
|
|
||||||
# First convert alpha entities (such as é)
|
|
||||||
# (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
|
|
||||||
def entity2char(m):
|
|
||||||
entity = m.group(1)
|
|
||||||
if entity in html.entities.name2codepoint:
|
|
||||||
return chr(html.entities.name2codepoint[entity])
|
|
||||||
return " " # Unknown entity: We replace with a space.
|
|
||||||
t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
|
|
||||||
|
|
||||||
# Then convert numerical entities (such as é)
|
|
||||||
t = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), t)
|
|
||||||
|
|
||||||
# Then convert hexa entities (such as é)
|
|
||||||
return re.sub(r'&#x(\w+);', lambda x: chr(int(x.group(1), 16)), t)
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_url(url):
|
|
||||||
""" Return the content of the url page as a string """
|
|
||||||
req = urllib.request.Request(url, headers=headers)
|
|
||||||
try:
|
|
||||||
response = urllib.request.urlopen(req)
|
|
||||||
except urllib.error.URLError as errno:
|
|
||||||
print(" ".join(("Connection error:", str(errno.reason))))
|
|
||||||
return ""
|
|
||||||
dat = response.read()
|
|
||||||
# Check if it is gzipped
|
|
||||||
if dat[:2] == b'\x1f\x8b':
|
|
||||||
# Data is gzip encoded, decode it
|
|
||||||
compressedstream = io.BytesIO(dat)
|
|
||||||
gzipper = gzip.GzipFile(fileobj=compressedstream)
|
|
||||||
extracted_data = gzipper.read()
|
|
||||||
dat = extracted_data
|
|
||||||
info = response.info()
|
|
||||||
charset = 'utf-8'
|
|
||||||
try:
|
|
||||||
ignore, charset = info['Content-Type'].split('charset=')
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
dat = dat.decode(charset, 'replace')
|
|
||||||
dat = htmlentitydecode(dat)
|
|
||||||
# return dat.encode('utf-8', 'replace')
|
|
||||||
return dat
|
|
||||||
|
|
||||||
|
|
||||||
def download_file(url, referer=None):
|
|
||||||
""" Download file at url and write it to a file, return the path to the file and the url """
|
|
||||||
file, path = tempfile.mkstemp()
|
|
||||||
file = os.fdopen(file, "wb")
|
|
||||||
# Download url
|
|
||||||
req = urllib.request.Request(url, headers=headers)
|
|
||||||
if referer is not None:
|
|
||||||
req.add_header('referer', referer)
|
|
||||||
response = urllib.request.urlopen(req)
|
|
||||||
dat = response.read()
|
|
||||||
# Check if it is gzipped
|
|
||||||
if dat[:2] == b'\x1f\x8b':
|
|
||||||
# Data is gzip encoded, decode it
|
|
||||||
compressedstream = io.BytesIO(dat)
|
|
||||||
gzipper = gzip.GzipFile(fileobj=compressedstream)
|
|
||||||
extracted_data = gzipper.read()
|
|
||||||
dat = extracted_data
|
|
||||||
|
|
||||||
# Write it to a file
|
|
||||||
file.write(dat)
|
|
||||||
file.close()
|
|
||||||
# return file path
|
|
||||||
return (path + " " + url)
|
|
190
modules/nova2.py
190
modules/nova2.py
@ -1,190 +0,0 @@
|
|||||||
#VERSION: 1.43
|
|
||||||
|
|
||||||
# Author:
|
|
||||||
# Fabien Devaux <fab AT gnux DOT info>
|
|
||||||
# Contributors:
|
|
||||||
# Christophe Dumez <chris@qbittorrent.org> (qbittorrent integration)
|
|
||||||
# Thanks to gab #gcu @ irc.freenode.net (multipage support on PirateBay)
|
|
||||||
# Thanks to Elias <gekko04@users.sourceforge.net> (torrentreactor and isohunt search engines)
|
|
||||||
#
|
|
||||||
# Licence: BSD
|
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright notice,
|
|
||||||
# this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of the author nor the names of its contributors may be
|
|
||||||
# used to endorse or promote products derived from this software without
|
|
||||||
# specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
import urllib.parse
|
|
||||||
from os import path
|
|
||||||
from glob import glob
|
|
||||||
from sys import argv
|
|
||||||
from multiprocessing import Pool, cpu_count
|
|
||||||
|
|
||||||
THREADED = True
|
|
||||||
try:
|
|
||||||
MAX_THREADS = cpu_count()
|
|
||||||
except NotImplementedError:
|
|
||||||
MAX_THREADS = 1
|
|
||||||
|
|
||||||
CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
# Every engine should have a "search" method taking
|
|
||||||
# a space-free string as parameter (ex. "family+guy")
|
|
||||||
# it should call prettyPrinter() with a dict as parameter.
|
|
||||||
# The keys in the dict must be: link,name,size,seeds,leech,engine_url
|
|
||||||
# As a convention, try to list results by decreasing number of seeds or similar
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
def initialize_engines():
|
|
||||||
""" Import available engines
|
|
||||||
|
|
||||||
Return list of available engines
|
|
||||||
"""
|
|
||||||
supported_engines = []
|
|
||||||
|
|
||||||
engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
|
|
||||||
for engine in engines:
|
|
||||||
engi = path.basename(engine).split('.')[0].strip()
|
|
||||||
if len(engi) == 0 or engi.startswith('_'):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
# import engines.[engine]
|
|
||||||
engine_module = __import__(".".join(("engines", engi)))
|
|
||||||
# get low-level module
|
|
||||||
engine_module = getattr(engine_module, engi)
|
|
||||||
# bind class name
|
|
||||||
globals()[engi] = getattr(engine_module, engi)
|
|
||||||
supported_engines.append(engi)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return supported_engines
|
|
||||||
|
|
||||||
|
|
||||||
def engines_to_xml(supported_engines):
|
|
||||||
""" Generates xml for supported engines """
|
|
||||||
tab = " " * 4
|
|
||||||
|
|
||||||
for short_name in supported_engines:
|
|
||||||
search_engine = globals()[short_name]()
|
|
||||||
|
|
||||||
supported_categories = ""
|
|
||||||
if hasattr(search_engine, "supported_categories"):
|
|
||||||
supported_categories = " ".join((key
|
|
||||||
for key in search_engine.supported_categories.keys()
|
|
||||||
if key != "all"))
|
|
||||||
|
|
||||||
yield "".join((tab, "<", short_name, ">\n",
|
|
||||||
tab, tab, "<name>", search_engine.name, "</name>\n",
|
|
||||||
tab, tab, "<url>", search_engine.url, "</url>\n",
|
|
||||||
tab, tab, "<categories>", supported_categories, "</categories>\n",
|
|
||||||
tab, "</", short_name, ">\n"))
|
|
||||||
|
|
||||||
|
|
||||||
def displayCapabilities(supported_engines):
|
|
||||||
"""
|
|
||||||
Display capabilities in XML format
|
|
||||||
<capabilities>
|
|
||||||
<engine_short_name>
|
|
||||||
<name>long name</name>
|
|
||||||
<url>http://example.com</url>
|
|
||||||
<categories>movies music games</categories>
|
|
||||||
</engine_short_name>
|
|
||||||
</capabilities>
|
|
||||||
"""
|
|
||||||
xml = "".join(("<capabilities>\n",
|
|
||||||
"".join(engines_to_xml(supported_engines)),
|
|
||||||
"</capabilities>"))
|
|
||||||
print(xml)
|
|
||||||
|
|
||||||
|
|
||||||
def run_search(engine_list):
|
|
||||||
""" Run search in engine
|
|
||||||
|
|
||||||
@param engine_list List with engine, query and category
|
|
||||||
|
|
||||||
@retval False if any exceptions occurred
|
|
||||||
@retval True otherwise
|
|
||||||
"""
|
|
||||||
engine, what, cat = engine_list
|
|
||||||
try:
|
|
||||||
engine = engine()
|
|
||||||
# avoid exceptions due to invalid category
|
|
||||||
if hasattr(engine, 'supported_categories'):
|
|
||||||
if cat in engine.supported_categories:
|
|
||||||
engine.search(what, cat)
|
|
||||||
else:
|
|
||||||
engine.search(what)
|
|
||||||
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
supported_engines = initialize_engines()
|
|
||||||
|
|
||||||
if not args:
|
|
||||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
|
||||||
"available engines: %s" % (','.join(supported_engines)))
|
|
||||||
|
|
||||||
elif args[0] == "--capabilities":
|
|
||||||
displayCapabilities(supported_engines)
|
|
||||||
return
|
|
||||||
|
|
||||||
elif len(args) < 3:
|
|
||||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
|
||||||
"available engines: %s" % (','.join(supported_engines)))
|
|
||||||
|
|
||||||
# get only unique engines with set
|
|
||||||
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
|
||||||
|
|
||||||
if 'all' in engines_list:
|
|
||||||
engines_list = supported_engines
|
|
||||||
else:
|
|
||||||
# discard un-supported engines
|
|
||||||
engines_list = [engine for engine in engines_list
|
|
||||||
if engine in supported_engines]
|
|
||||||
|
|
||||||
if not engines_list:
|
|
||||||
# engine list is empty. Nothing to do here
|
|
||||||
return
|
|
||||||
|
|
||||||
cat = args[1].lower()
|
|
||||||
|
|
||||||
if cat not in CATEGORIES:
|
|
||||||
raise SystemExit(" - ".join(('Invalid category', cat)))
|
|
||||||
|
|
||||||
what = urllib.parse.quote(' '.join(args[2:]))
|
|
||||||
if THREADED:
|
|
||||||
# child process spawning is controlled min(number of searches, number of cpu)
|
|
||||||
with Pool(min(len(engines_list), MAX_THREADS)) as pool:
|
|
||||||
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
|
||||||
else:
|
|
||||||
# py3 note: map is needed to be evaluated for content to be executed
|
|
||||||
all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list)))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main(argv[1:])
|
|
@ -1,63 +0,0 @@
|
|||||||
#VERSION: 1.22
|
|
||||||
|
|
||||||
# Author:
|
|
||||||
# Christophe DUMEZ (chris@qbittorrent.org)
|
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright notice,
|
|
||||||
# this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of the author nor the names of its contributors may be
|
|
||||||
# used to endorse or promote products derived from this software without
|
|
||||||
# specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import glob
|
|
||||||
from helpers import download_file
|
|
||||||
|
|
||||||
supported_engines = dict()
|
|
||||||
|
|
||||||
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines', '*.py'))
|
|
||||||
for engine in engines:
|
|
||||||
e = engine.split(os.sep)[-1][:-3]
|
|
||||||
if len(e.strip()) == 0:
|
|
||||||
continue
|
|
||||||
if e.startswith('_'):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
exec("from engines.%s import %s" % (e, e))
|
|
||||||
exec("engine_url = %s.url" % e)
|
|
||||||
supported_engines[engine_url] = e
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
if len(sys.argv) < 3:
|
|
||||||
raise SystemExit('./nova2dl.py engine_url download_parameter')
|
|
||||||
engine_url = sys.argv[1].strip()
|
|
||||||
download_param = sys.argv[2].strip()
|
|
||||||
if engine_url not in list(supported_engines.keys()):
|
|
||||||
raise SystemExit('./nova2dl.py: this engine_url was not recognized')
|
|
||||||
exec("engine = %s()" % supported_engines[engine_url])
|
|
||||||
if hasattr(engine, 'download_torrent'):
|
|
||||||
engine.download_torrent(download_param)
|
|
||||||
else:
|
|
||||||
print(download_file(download_param))
|
|
||||||
sys.exit(0)
|
|
@ -1,67 +0,0 @@
|
|||||||
#VERSION: 1.46
|
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright notice,
|
|
||||||
# this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of the author nor the names of its contributors may be
|
|
||||||
# used to endorse or promote products derived from this software without
|
|
||||||
# specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
|
|
||||||
def prettyPrinter(dictionary):
|
|
||||||
dictionary['size'] = anySizeToBytes(dictionary['size'])
|
|
||||||
outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "),
|
|
||||||
str(dictionary["size"]), str(dictionary["seeds"]),
|
|
||||||
str(dictionary["leech"]), dictionary["engine_url"]))
|
|
||||||
if 'desc_link' in dictionary:
|
|
||||||
outtext = "|".join((outtext, dictionary["desc_link"]))
|
|
||||||
|
|
||||||
# fd 1 is stdout
|
|
||||||
with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout:
|
|
||||||
print(outtext, file=utf8stdout)
|
|
||||||
|
|
||||||
|
|
||||||
def anySizeToBytes(size_string):
|
|
||||||
"""
|
|
||||||
Convert a string like '1 KB' to '1024' (bytes)
|
|
||||||
"""
|
|
||||||
# separate integer from unit
|
|
||||||
try:
|
|
||||||
size, unit = size_string.split()
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
size = size_string.strip()
|
|
||||||
unit = ''.join([c for c in size if c.isalpha()])
|
|
||||||
if len(unit) > 0:
|
|
||||||
size = size[:-len(unit)]
|
|
||||||
except:
|
|
||||||
return -1
|
|
||||||
if len(size) == 0:
|
|
||||||
return -1
|
|
||||||
size = float(size)
|
|
||||||
if len(unit) == 0:
|
|
||||||
return int(size)
|
|
||||||
short_unit = unit.upper()[0]
|
|
||||||
|
|
||||||
# convert
|
|
||||||
units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
|
|
||||||
if short_unit in units_dict:
|
|
||||||
size = size * 2**units_dict[short_unit]
|
|
||||||
return int(size)
|
|
@ -1,547 +0,0 @@
|
|||||||
"""A parser for SGML, using the derived class as a static DTD."""
|
|
||||||
|
|
||||||
# XXX This only supports those SGML features used by HTML.
|
|
||||||
|
|
||||||
# XXX There should be a way to distinguish between PCDATA (parsed
|
|
||||||
# character data -- the normal case), RCDATA (replaceable character
|
|
||||||
# data -- only char and entity references and end tags are special)
|
|
||||||
# and CDATA (character data -- only end tags are special). RCDATA is
|
|
||||||
# not supported at all.
|
|
||||||
|
|
||||||
import _markupbase
|
|
||||||
import re
|
|
||||||
|
|
||||||
__all__ = ["SGMLParser", "SGMLParseError"]
|
|
||||||
|
|
||||||
# Regular expressions used for parsing
|
|
||||||
|
|
||||||
interesting = re.compile('[&<]')
|
|
||||||
incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
|
|
||||||
'<([a-zA-Z][^<>]*|'
|
|
||||||
'/([a-zA-Z][^<>]*)?|'
|
|
||||||
'![^<>]*)?')
|
|
||||||
|
|
||||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
|
||||||
charref = re.compile('&#([0-9]+)[^0-9]')
|
|
||||||
|
|
||||||
starttagopen = re.compile('<[>a-zA-Z]')
|
|
||||||
shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
|
|
||||||
shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
|
|
||||||
piclose = re.compile('>')
|
|
||||||
endbracket = re.compile('[<>]')
|
|
||||||
tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
|
|
||||||
attrfind = re.compile(
|
|
||||||
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
|
|
||||||
r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
|
|
||||||
|
|
||||||
|
|
||||||
class SGMLParseError(RuntimeError):
|
|
||||||
"""Exception raised for all parse errors."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# SGML parser base class -- find tags and call handler functions.
|
|
||||||
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
|
|
||||||
# The dtd is defined by deriving a class which defines methods
|
|
||||||
# with special names to handle tags: start_foo and end_foo to handle
|
|
||||||
# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
|
|
||||||
# (Tags are converted to lower case for this purpose.) The data
|
|
||||||
# between tags is passed to the parser by calling self.handle_data()
|
|
||||||
# with some data as argument (the data may be split up in arbitrary
|
|
||||||
# chunks). Entity references are passed by calling
|
|
||||||
# self.handle_entityref() with the entity reference as argument.
|
|
||||||
|
|
||||||
class SGMLParser(_markupbase.ParserBase):
|
|
||||||
# Definition of entities -- derived classes may override
|
|
||||||
entity_or_charref = re.compile('&(?:'
|
|
||||||
'([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
|
|
||||||
')(;?)')
|
|
||||||
|
|
||||||
def __init__(self, verbose=0):
|
|
||||||
"""Initialize and reset this instance."""
|
|
||||||
self.verbose = verbose
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
"""Reset this instance. Loses all unprocessed data."""
|
|
||||||
self.__starttag_text = None
|
|
||||||
self.rawdata = ''
|
|
||||||
self.stack = []
|
|
||||||
self.lasttag = '???'
|
|
||||||
self.nomoretags = 0
|
|
||||||
self.literal = 0
|
|
||||||
_markupbase.ParserBase.reset(self)
|
|
||||||
|
|
||||||
def setnomoretags(self):
|
|
||||||
"""Enter literal mode (CDATA) till EOF.
|
|
||||||
|
|
||||||
Intended for derived classes only.
|
|
||||||
"""
|
|
||||||
self.nomoretags = self.literal = 1
|
|
||||||
|
|
||||||
def setliteral(self, *args):
|
|
||||||
"""Enter literal mode (CDATA).
|
|
||||||
|
|
||||||
Intended for derived classes only.
|
|
||||||
"""
|
|
||||||
self.literal = 1
|
|
||||||
|
|
||||||
def feed(self, data):
|
|
||||||
"""Feed some data to the parser.
|
|
||||||
|
|
||||||
Call this as often as you want, with as little or as much text
|
|
||||||
as you want (may include '\n'). (This just saves the text,
|
|
||||||
all the processing is done by goahead().)
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.rawdata = self.rawdata + data
|
|
||||||
self.goahead(0)
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
"""Handle the remaining data."""
|
|
||||||
self.goahead(1)
|
|
||||||
|
|
||||||
def error(self, message):
|
|
||||||
raise SGMLParseError(message)
|
|
||||||
|
|
||||||
# Internal -- handle data as far as reasonable. May leave state
|
|
||||||
# and data to be processed by a subsequent call. If 'end' is
|
|
||||||
# true, force handling all data as if followed by EOF marker.
|
|
||||||
def goahead(self, end):
|
|
||||||
rawdata = self.rawdata
|
|
||||||
i = 0
|
|
||||||
n = len(rawdata)
|
|
||||||
while i < n:
|
|
||||||
if self.nomoretags:
|
|
||||||
self.handle_data(rawdata[i:n])
|
|
||||||
i = n
|
|
||||||
break
|
|
||||||
match = interesting.search(rawdata, i)
|
|
||||||
if match: j = match.start()
|
|
||||||
else: j = n
|
|
||||||
if i < j:
|
|
||||||
self.handle_data(rawdata[i:j])
|
|
||||||
i = j
|
|
||||||
if i == n: break
|
|
||||||
if rawdata[i] == '<':
|
|
||||||
if starttagopen.match(rawdata, i):
|
|
||||||
if self.literal:
|
|
||||||
self.handle_data(rawdata[i])
|
|
||||||
i = i+1
|
|
||||||
continue
|
|
||||||
k = self.parse_starttag(i)
|
|
||||||
if k < 0: break
|
|
||||||
i = k
|
|
||||||
continue
|
|
||||||
if rawdata.startswith("</", i):
|
|
||||||
k = self.parse_endtag(i)
|
|
||||||
if k < 0: break
|
|
||||||
i = k
|
|
||||||
self.literal = 0
|
|
||||||
continue
|
|
||||||
if self.literal:
|
|
||||||
if n > (i + 1):
|
|
||||||
self.handle_data("<")
|
|
||||||
i = i+1
|
|
||||||
else:
|
|
||||||
# incomplete
|
|
||||||
break
|
|
||||||
continue
|
|
||||||
if rawdata.startswith("<!--", i):
|
|
||||||
# Strictly speaking, a comment is --.*--
|
|
||||||
# within a declaration tag <!...>.
|
|
||||||
# This should be removed,
|
|
||||||
# and comments handled only in parse_declaration.
|
|
||||||
k = self.parse_comment(i)
|
|
||||||
if k < 0: break
|
|
||||||
i = k
|
|
||||||
continue
|
|
||||||
if rawdata.startswith("<?", i):
|
|
||||||
k = self.parse_pi(i)
|
|
||||||
if k < 0: break
|
|
||||||
i = i+k
|
|
||||||
continue
|
|
||||||
if rawdata.startswith("<!", i):
|
|
||||||
# This is some sort of declaration; in "HTML as
|
|
||||||
# deployed," this should only be the document type
|
|
||||||
# declaration ("<!DOCTYPE html...>").
|
|
||||||
k = self.parse_declaration(i)
|
|
||||||
if k < 0: break
|
|
||||||
i = k
|
|
||||||
continue
|
|
||||||
elif rawdata[i] == '&':
|
|
||||||
if self.literal:
|
|
||||||
self.handle_data(rawdata[i])
|
|
||||||
i = i+1
|
|
||||||
continue
|
|
||||||
match = charref.match(rawdata, i)
|
|
||||||
if match:
|
|
||||||
name = match.group(1)
|
|
||||||
self.handle_charref(name)
|
|
||||||
i = match.end(0)
|
|
||||||
if rawdata[i-1] != ';': i = i-1
|
|
||||||
continue
|
|
||||||
match = entityref.match(rawdata, i)
|
|
||||||
if match:
|
|
||||||
name = match.group(1)
|
|
||||||
self.handle_entityref(name)
|
|
||||||
i = match.end(0)
|
|
||||||
if rawdata[i-1] != ';': i = i-1
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
self.error('neither < nor & ??')
|
|
||||||
# We get here only if incomplete matches but
|
|
||||||
# nothing else
|
|
||||||
match = incomplete.match(rawdata, i)
|
|
||||||
if not match:
|
|
||||||
self.handle_data(rawdata[i])
|
|
||||||
i = i+1
|
|
||||||
continue
|
|
||||||
j = match.end(0)
|
|
||||||
if j == n:
|
|
||||||
break # Really incomplete
|
|
||||||
self.handle_data(rawdata[i:j])
|
|
||||||
i = j
|
|
||||||
# end while
|
|
||||||
if end and i < n:
|
|
||||||
self.handle_data(rawdata[i:n])
|
|
||||||
i = n
|
|
||||||
self.rawdata = rawdata[i:]
|
|
||||||
# XXX if end: check for empty stack
|
|
||||||
|
|
||||||
# Extensions for the DOCTYPE scanner:
|
|
||||||
_decl_otherchars = '='
|
|
||||||
|
|
||||||
# Internal -- parse processing instr, return length or -1 if not terminated
|
|
||||||
def parse_pi(self, i):
|
|
||||||
rawdata = self.rawdata
|
|
||||||
if rawdata[i:i+2] != '<?':
|
|
||||||
self.error('unexpected call to parse_pi()')
|
|
||||||
match = piclose.search(rawdata, i+2)
|
|
||||||
if not match:
|
|
||||||
return -1
|
|
||||||
j = match.start(0)
|
|
||||||
self.handle_pi(rawdata[i+2: j])
|
|
||||||
j = match.end(0)
|
|
||||||
return j-i
|
|
||||||
|
|
||||||
def get_starttag_text(self):
|
|
||||||
return self.__starttag_text
|
|
||||||
|
|
||||||
# Internal -- handle starttag, return length or -1 if not terminated
|
|
||||||
def parse_starttag(self, i):
|
|
||||||
self.__starttag_text = None
|
|
||||||
start_pos = i
|
|
||||||
rawdata = self.rawdata
|
|
||||||
if shorttagopen.match(rawdata, i):
|
|
||||||
# SGML shorthand: <tag/data/ == <tag>data</tag>
|
|
||||||
# XXX Can data contain &... (entity or char refs)?
|
|
||||||
# XXX Can data contain < or > (tag characters)?
|
|
||||||
# XXX Can there be whitespace before the first /?
|
|
||||||
match = shorttag.match(rawdata, i)
|
|
||||||
if not match:
|
|
||||||
return -1
|
|
||||||
tag, data = match.group(1, 2)
|
|
||||||
self.__starttag_text = '<%s/' % tag
|
|
||||||
tag = tag.lower()
|
|
||||||
k = match.end(0)
|
|
||||||
self.finish_shorttag(tag, data)
|
|
||||||
self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
|
|
||||||
return k
|
|
||||||
# XXX The following should skip matching quotes (' or ")
|
|
||||||
# As a shortcut way to exit, this isn't so bad, but shouldn't
|
|
||||||
# be used to locate the actual end of the start tag since the
|
|
||||||
# < or > characters may be embedded in an attribute value.
|
|
||||||
match = endbracket.search(rawdata, i+1)
|
|
||||||
if not match:
|
|
||||||
return -1
|
|
||||||
j = match.start(0)
|
|
||||||
# Now parse the data between i+1 and j into a tag and attrs
|
|
||||||
attrs = []
|
|
||||||
if rawdata[i:i+2] == '<>':
|
|
||||||
# SGML shorthand: <> == <last open tag seen>
|
|
||||||
k = j
|
|
||||||
tag = self.lasttag
|
|
||||||
else:
|
|
||||||
match = tagfind.match(rawdata, i+1)
|
|
||||||
if not match:
|
|
||||||
self.error('unexpected call to parse_starttag')
|
|
||||||
k = match.end(0)
|
|
||||||
tag = rawdata[i+1:k].lower()
|
|
||||||
self.lasttag = tag
|
|
||||||
while k < j:
|
|
||||||
match = attrfind.match(rawdata, k)
|
|
||||||
if not match: break
|
|
||||||
attrname, rest, attrvalue = match.group(1, 2, 3)
|
|
||||||
if not rest:
|
|
||||||
attrvalue = attrname
|
|
||||||
else:
|
|
||||||
if (attrvalue[:1] == "'" == attrvalue[-1:] or
|
|
||||||
attrvalue[:1] == '"' == attrvalue[-1:]):
|
|
||||||
# strip quotes
|
|
||||||
attrvalue = attrvalue[1:-1]
|
|
||||||
attrvalue = self.entity_or_charref.sub(
|
|
||||||
self._convert_ref, attrvalue)
|
|
||||||
attrs.append((attrname.lower(), attrvalue))
|
|
||||||
k = match.end(0)
|
|
||||||
if rawdata[j] == '>':
|
|
||||||
j = j+1
|
|
||||||
self.__starttag_text = rawdata[start_pos:j]
|
|
||||||
self.finish_starttag(tag, attrs)
|
|
||||||
return j
|
|
||||||
|
|
||||||
# Internal -- convert entity or character reference
|
|
||||||
def _convert_ref(self, match):
|
|
||||||
if match.group(2):
|
|
||||||
return self.convert_charref(match.group(2)) or \
|
|
||||||
'&#%s%s' % match.groups()[1:]
|
|
||||||
elif match.group(3):
|
|
||||||
return self.convert_entityref(match.group(1)) or \
|
|
||||||
'&%s;' % match.group(1)
|
|
||||||
else:
|
|
||||||
return '&%s' % match.group(1)
|
|
||||||
|
|
||||||
# Internal -- parse endtag
|
|
||||||
def parse_endtag(self, i):
|
|
||||||
rawdata = self.rawdata
|
|
||||||
match = endbracket.search(rawdata, i+1)
|
|
||||||
if not match:
|
|
||||||
return -1
|
|
||||||
j = match.start(0)
|
|
||||||
tag = rawdata[i+2:j].strip().lower()
|
|
||||||
if rawdata[j] == '>':
|
|
||||||
j = j+1
|
|
||||||
self.finish_endtag(tag)
|
|
||||||
return j
|
|
||||||
|
|
||||||
# Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
|
|
||||||
def finish_shorttag(self, tag, data):
|
|
||||||
self.finish_starttag(tag, [])
|
|
||||||
self.handle_data(data)
|
|
||||||
self.finish_endtag(tag)
|
|
||||||
|
|
||||||
# Internal -- finish processing of start tag
|
|
||||||
# Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
|
|
||||||
def finish_starttag(self, tag, attrs):
|
|
||||||
try:
|
|
||||||
method = getattr(self, 'start_' + tag)
|
|
||||||
except AttributeError:
|
|
||||||
try:
|
|
||||||
method = getattr(self, 'do_' + tag)
|
|
||||||
except AttributeError:
|
|
||||||
self.unknown_starttag(tag, attrs)
|
|
||||||
return -1
|
|
||||||
else:
|
|
||||||
self.handle_starttag(tag, method, attrs)
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
self.stack.append(tag)
|
|
||||||
self.handle_starttag(tag, method, attrs)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Internal -- finish processing of end tag
|
|
||||||
def finish_endtag(self, tag):
|
|
||||||
if not tag:
|
|
||||||
found = len(self.stack) - 1
|
|
||||||
if found < 0:
|
|
||||||
self.unknown_endtag(tag)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
if tag not in self.stack:
|
|
||||||
try:
|
|
||||||
method = getattr(self, 'end_' + tag)
|
|
||||||
except AttributeError:
|
|
||||||
self.unknown_endtag(tag)
|
|
||||||
else:
|
|
||||||
self.report_unbalanced(tag)
|
|
||||||
return
|
|
||||||
found = len(self.stack)
|
|
||||||
for i in range(found):
|
|
||||||
if self.stack[i] == tag: found = i
|
|
||||||
while len(self.stack) > found:
|
|
||||||
tag = self.stack[-1]
|
|
||||||
try:
|
|
||||||
method = getattr(self, 'end_' + tag)
|
|
||||||
except AttributeError:
|
|
||||||
method = None
|
|
||||||
if method:
|
|
||||||
self.handle_endtag(tag, method)
|
|
||||||
else:
|
|
||||||
self.unknown_endtag(tag)
|
|
||||||
del self.stack[-1]
|
|
||||||
|
|
||||||
# Overridable -- handle start tag
|
|
||||||
def handle_starttag(self, tag, method, attrs):
|
|
||||||
method(attrs)
|
|
||||||
|
|
||||||
# Overridable -- handle end tag
|
|
||||||
def handle_endtag(self, tag, method):
|
|
||||||
method()
|
|
||||||
|
|
||||||
# Example -- report an unbalanced </...> tag.
|
|
||||||
def report_unbalanced(self, tag):
|
|
||||||
if self.verbose:
|
|
||||||
print('*** Unbalanced </' + tag + '>')
|
|
||||||
print('*** Stack:', self.stack)
|
|
||||||
|
|
||||||
def convert_charref(self, name):
|
|
||||||
"""Convert character reference, may be overridden."""
|
|
||||||
try:
|
|
||||||
n = int(name)
|
|
||||||
except ValueError:
|
|
||||||
return
|
|
||||||
if not 0 <= n <= 127:
|
|
||||||
return
|
|
||||||
return self.convert_codepoint(n)
|
|
||||||
|
|
||||||
def convert_codepoint(self, codepoint):
|
|
||||||
return chr(codepoint)
|
|
||||||
|
|
||||||
def handle_charref(self, name):
|
|
||||||
"""Handle character reference, no need to override."""
|
|
||||||
replacement = self.convert_charref(name)
|
|
||||||
if replacement is None:
|
|
||||||
self.unknown_charref(name)
|
|
||||||
else:
|
|
||||||
self.handle_data(replacement)
|
|
||||||
|
|
||||||
# Definition of entities -- derived classes may override
|
|
||||||
entitydefs = \
|
|
||||||
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
|
|
||||||
|
|
||||||
def convert_entityref(self, name):
|
|
||||||
"""Convert entity references.
|
|
||||||
|
|
||||||
As an alternative to overriding this method; one can tailor the
|
|
||||||
results by setting up the self.entitydefs mapping appropriately.
|
|
||||||
"""
|
|
||||||
table = self.entitydefs
|
|
||||||
if name in table:
|
|
||||||
return table[name]
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
|
|
||||||
def handle_entityref(self, name):
|
|
||||||
"""Handle entity references, no need to override."""
|
|
||||||
replacement = self.convert_entityref(name)
|
|
||||||
if replacement is None:
|
|
||||||
self.unknown_entityref(name)
|
|
||||||
else:
|
|
||||||
self.handle_data(replacement)
|
|
||||||
|
|
||||||
# Example -- handle data, should be overridden
|
|
||||||
def handle_data(self, data):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Example -- handle comment, could be overridden
|
|
||||||
def handle_comment(self, data):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Example -- handle declaration, could be overridden
|
|
||||||
def handle_decl(self, decl):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Example -- handle processing instruction, could be overridden
|
|
||||||
def handle_pi(self, data):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# To be overridden -- handlers for unknown objects
|
|
||||||
def unknown_starttag(self, tag, attrs): pass
|
|
||||||
def unknown_endtag(self, tag): pass
|
|
||||||
def unknown_charref(self, ref): pass
|
|
||||||
def unknown_entityref(self, ref): pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestSGMLParser(SGMLParser):
|
|
||||||
|
|
||||||
def __init__(self, verbose=0):
|
|
||||||
self.testdata = ""
|
|
||||||
SGMLParser.__init__(self, verbose)
|
|
||||||
|
|
||||||
def handle_data(self, data):
|
|
||||||
self.testdata = self.testdata + data
|
|
||||||
if len(repr(self.testdata)) >= 70:
|
|
||||||
self.flush()
|
|
||||||
|
|
||||||
def flush(self):
|
|
||||||
data = self.testdata
|
|
||||||
if data:
|
|
||||||
self.testdata = ""
|
|
||||||
print('data:', repr(data))
|
|
||||||
|
|
||||||
def handle_comment(self, data):
|
|
||||||
self.flush()
|
|
||||||
r = repr(data)
|
|
||||||
if len(r) > 68:
|
|
||||||
r = r[:32] + '...' + r[-32:]
|
|
||||||
print('comment:', r)
|
|
||||||
|
|
||||||
def unknown_starttag(self, tag, attrs):
|
|
||||||
self.flush()
|
|
||||||
if not attrs:
|
|
||||||
print('start tag: <' + tag + '>')
|
|
||||||
else:
|
|
||||||
print('start tag: <' + tag, end=' ')
|
|
||||||
for name, value in attrs:
|
|
||||||
print(name + '=' + '"' + value + '"', end=' ')
|
|
||||||
print('>')
|
|
||||||
|
|
||||||
def unknown_endtag(self, tag):
|
|
||||||
self.flush()
|
|
||||||
print('end tag: </' + tag + '>')
|
|
||||||
|
|
||||||
def unknown_entityref(self, ref):
|
|
||||||
self.flush()
|
|
||||||
print('*** unknown entity ref: &' + ref + ';')
|
|
||||||
|
|
||||||
def unknown_charref(self, ref):
|
|
||||||
self.flush()
|
|
||||||
print('*** unknown char ref: &#' + ref + ';')
|
|
||||||
|
|
||||||
def unknown_decl(self, data):
|
|
||||||
self.flush()
|
|
||||||
print('*** unknown decl: [' + data + ']')
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
SGMLParser.close(self)
|
|
||||||
self.flush()
|
|
||||||
|
|
||||||
|
|
||||||
def test(args = None):
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if args is None:
|
|
||||||
args = sys.argv[1:]
|
|
||||||
|
|
||||||
if args and args[0] == '-s':
|
|
||||||
args = args[1:]
|
|
||||||
klass = SGMLParser
|
|
||||||
else:
|
|
||||||
klass = TestSGMLParser
|
|
||||||
|
|
||||||
if args:
|
|
||||||
file = args[0]
|
|
||||||
else:
|
|
||||||
file = 'test.html'
|
|
||||||
|
|
||||||
if file == '-':
|
|
||||||
f = sys.stdin
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
f = open(file, 'r')
|
|
||||||
except IOError as msg:
|
|
||||||
print(file, ":", msg)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
data = f.read()
|
|
||||||
if f is not sys.stdin:
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
x = klass()
|
|
||||||
for c in data:
|
|
||||||
x.feed(c)
|
|
||||||
x.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
test()
|
|
391
modules/socks.py
391
modules/socks.py
@ -1,391 +0,0 @@
|
|||||||
"""SocksiPy - Python SOCKS module.
|
|
||||||
Version 1.01
|
|
||||||
|
|
||||||
Copyright 2006 Dan-Haim. All rights reserved.
|
|
||||||
Various fixes by Christophe DUMEZ <chris@qbittorrent.org> - 2010
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
are permitted provided that the following conditions are met:
|
|
||||||
1. Redistributions of source code must retain the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer.
|
|
||||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer in the documentation
|
|
||||||
and/or other materials provided with the distribution.
|
|
||||||
3. Neither the name of Dan Haim nor the names of his contributors may be used
|
|
||||||
to endorse or promote products derived from this software without specific
|
|
||||||
prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY DAN HAIM "AS IS" AND ANY EXPRESS OR IMPLIED
|
|
||||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
||||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
||||||
EVENT SHALL DAN HAIM OR HIS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA
|
|
||||||
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
||||||
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMANGE.
|
|
||||||
|
|
||||||
|
|
||||||
This module provides a standard socket-like interface for Python
|
|
||||||
for tunneling connections through SOCKS proxies.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import socket
|
|
||||||
import struct
|
|
||||||
|
|
||||||
PROXY_TYPE_SOCKS4 = 1
|
|
||||||
PROXY_TYPE_SOCKS5 = 2
|
|
||||||
PROXY_TYPE_HTTP = 3
|
|
||||||
|
|
||||||
_defaultproxy = None
|
|
||||||
_orgsocket = socket.socket
|
|
||||||
|
|
||||||
class ProxyError(Exception):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.value)
|
|
||||||
|
|
||||||
class GeneralProxyError(ProxyError):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.value)
|
|
||||||
|
|
||||||
class Socks5AuthError(ProxyError):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.value)
|
|
||||||
|
|
||||||
class Socks5Error(ProxyError):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.value)
|
|
||||||
|
|
||||||
class Socks4Error(ProxyError):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.value)
|
|
||||||
|
|
||||||
class HTTPError(ProxyError):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.value)
|
|
||||||
|
|
||||||
_generalerrors = ("success",
|
|
||||||
"invalid data",
|
|
||||||
"not connected",
|
|
||||||
"not available",
|
|
||||||
"bad proxy type",
|
|
||||||
"bad input")
|
|
||||||
|
|
||||||
_socks5errors = ("succeeded",
|
|
||||||
"general SOCKS server failure",
|
|
||||||
"connection not allowed by ruleset",
|
|
||||||
"Network unreachable",
|
|
||||||
"Host unreachable",
|
|
||||||
"Connection refused",
|
|
||||||
"TTL expired",
|
|
||||||
"Command not supported",
|
|
||||||
"Address type not supported",
|
|
||||||
"Unknown error")
|
|
||||||
|
|
||||||
_socks5autherrors = ("succeeded",
|
|
||||||
"authentication is required",
|
|
||||||
"all offered authentication methods were rejected",
|
|
||||||
"unknown username or invalid password",
|
|
||||||
"unknown error")
|
|
||||||
|
|
||||||
_socks4errors = ("request granted",
|
|
||||||
"request rejected or failed",
|
|
||||||
"request rejected because SOCKS server cannot connect to identd on the client",
|
|
||||||
"request rejected because the client program and identd report different user-ids",
|
|
||||||
"unknown error")
|
|
||||||
|
|
||||||
def setdefaultproxy(proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
|
|
||||||
"""setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
|
|
||||||
Sets a default proxy which all further socksocket objects will use,
|
|
||||||
unless explicitly changed.
|
|
||||||
"""
|
|
||||||
global _defaultproxy
|
|
||||||
_defaultproxy = (proxytype,addr,port,rdns,username,password)
|
|
||||||
|
|
||||||
class socksocket(socket.socket):
|
|
||||||
"""socksocket([family[, type[, proto]]]) -> socket object
|
|
||||||
|
|
||||||
Open a SOCKS enabled socket. The parameters are the same as
|
|
||||||
those of the standard socket init. In order for SOCKS to work,
|
|
||||||
you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
|
|
||||||
_orgsocket.__init__(self,family,type,proto,_sock)
|
|
||||||
if _defaultproxy != None:
|
|
||||||
self.__proxy = _defaultproxy
|
|
||||||
else:
|
|
||||||
self.__proxy = (None, None, None, None, None, None)
|
|
||||||
self.__proxysockname = None
|
|
||||||
self.__proxypeername = None
|
|
||||||
|
|
||||||
def __recvall(self, bytes):
|
|
||||||
"""__recvall(bytes) -> data
|
|
||||||
Receive EXACTLY the number of bytes requested from the socket.
|
|
||||||
Blocks until the required number of bytes have been received.
|
|
||||||
"""
|
|
||||||
data = ""
|
|
||||||
while len(data) < bytes:
|
|
||||||
d = self.recv(bytes-len(data))
|
|
||||||
if not d:
|
|
||||||
raise GeneralProxyError("connection closed unexpectedly")
|
|
||||||
data = data + d
|
|
||||||
return data
|
|
||||||
|
|
||||||
def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None):
|
|
||||||
"""setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
|
|
||||||
Sets the proxy to be used.
|
|
||||||
proxytype - The type of the proxy to be used. Three types
|
|
||||||
are supported: PROXY_TYPE_SOCKS4 (including socks4a),
|
|
||||||
PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
|
|
||||||
addr - The address of the server (IP or DNS).
|
|
||||||
port - The port of the server. Defaults to 1080 for SOCKS
|
|
||||||
servers and 8080 for HTTP proxy servers.
|
|
||||||
rdns - Should DNS queries be preformed on the remote side
|
|
||||||
(rather than the local side). The default is True.
|
|
||||||
Note: This has no effect with SOCKS4 servers.
|
|
||||||
username - Username to authenticate with to the server.
|
|
||||||
The default is no authentication.
|
|
||||||
password - Password to authenticate with to the server.
|
|
||||||
Only relevant when username is also provided.
|
|
||||||
"""
|
|
||||||
self.__proxy = (proxytype,addr,port,rdns,username,password)
|
|
||||||
|
|
||||||
def __negotiatesocks5(self,destaddr,destport):
|
|
||||||
"""__negotiatesocks5(self,destaddr,destport)
|
|
||||||
Negotiates a connection through a SOCKS5 server.
|
|
||||||
"""
|
|
||||||
# First we'll send the authentication packages we support.
|
|
||||||
if (self.__proxy[4]!=None) and (self.__proxy[5]!=None):
|
|
||||||
# The username/password details were supplied to the
|
|
||||||
# setproxy method so we support the USERNAME/PASSWORD
|
|
||||||
# authentication (in addition to the standard none).
|
|
||||||
self.sendall("\x05\x02\x00\x02")
|
|
||||||
else:
|
|
||||||
# No username/password were entered, therefore we
|
|
||||||
# only support connections with no authentication.
|
|
||||||
self.sendall("\x05\x01\x00")
|
|
||||||
# We'll receive the server's response to determine which
|
|
||||||
# method was selected
|
|
||||||
chosenauth = self.__recvall(2)
|
|
||||||
if chosenauth[0] != "\x05":
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
# Check the chosen authentication method
|
|
||||||
if chosenauth[1] == "\x00":
|
|
||||||
# No authentication is required
|
|
||||||
pass
|
|
||||||
elif chosenauth[1] == "\x02":
|
|
||||||
# Okay, we need to perform a basic username/password
|
|
||||||
# authentication.
|
|
||||||
self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5])
|
|
||||||
authstat = self.__recvall(2)
|
|
||||||
if authstat[0] != "\x01":
|
|
||||||
# Bad response
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
if authstat[1] != "\x00":
|
|
||||||
# Authentication failed
|
|
||||||
self.close()
|
|
||||||
raise Socks5AuthError((3,_socks5autherrors[3]))
|
|
||||||
# Authentication succeeded
|
|
||||||
else:
|
|
||||||
# Reaching here is always bad
|
|
||||||
self.close()
|
|
||||||
if chosenauth[1] == "\xFF":
|
|
||||||
raise Socks5AuthError((2,_socks5autherrors[2]))
|
|
||||||
else:
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
# Now we can request the actual connection
|
|
||||||
req = "\x05\x01\x00"
|
|
||||||
# If the given destination address is an IP address, we'll
|
|
||||||
# use the IPv4 address request even if remote resolving was specified.
|
|
||||||
try:
|
|
||||||
ipaddr = socket.inet_aton(destaddr)
|
|
||||||
req = req + "\x01" + ipaddr
|
|
||||||
except socket.error:
|
|
||||||
# Well it's not an IP number, so it's probably a DNS name.
|
|
||||||
if self.__proxy[3]==True:
|
|
||||||
# Resolve remotely
|
|
||||||
ipaddr = None
|
|
||||||
req = req + "\x03" + chr(len(destaddr)) + destaddr
|
|
||||||
else:
|
|
||||||
# Resolve locally
|
|
||||||
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
|
|
||||||
req = req + "\x01" + ipaddr
|
|
||||||
req = req + struct.pack(">H",destport)
|
|
||||||
self.sendall(req)
|
|
||||||
# Get the response
|
|
||||||
resp = self.__recvall(4)
|
|
||||||
if resp[0] != "\x05":
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
elif resp[1] != "\x00":
|
|
||||||
# Connection failed
|
|
||||||
self.close()
|
|
||||||
if ord(resp[1])<=8:
|
|
||||||
raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])]))
|
|
||||||
else:
|
|
||||||
raise Socks5Error((9,_generalerrors[9]))
|
|
||||||
# Get the bound address/port
|
|
||||||
elif resp[3] == "\x01":
|
|
||||||
boundaddr = self.__recvall(4)
|
|
||||||
elif resp[3] == "\x03":
|
|
||||||
resp = resp + self.recv(1)
|
|
||||||
boundaddr = self.__recvall(ord(resp[4]))
|
|
||||||
else:
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
boundport = struct.unpack(">H",self.__recvall(2))[0]
|
|
||||||
self.__proxysockname = (boundaddr,boundport)
|
|
||||||
if ipaddr != None:
|
|
||||||
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
|
|
||||||
else:
|
|
||||||
self.__proxypeername = (destaddr,destport)
|
|
||||||
|
|
||||||
def getproxysockname(self):
|
|
||||||
"""getsockname() -> address info
|
|
||||||
Returns the bound IP address and port number at the proxy.
|
|
||||||
"""
|
|
||||||
return self.__proxysockname
|
|
||||||
|
|
||||||
def getproxypeername(self):
|
|
||||||
"""getproxypeername() -> address info
|
|
||||||
Returns the IP and port number of the proxy.
|
|
||||||
"""
|
|
||||||
return _orgsocket.getpeername(self)
|
|
||||||
|
|
||||||
def getpeername(self):
|
|
||||||
"""getpeername() -> address info
|
|
||||||
Returns the IP address and port number of the destination
|
|
||||||
machine (note: getproxypeername returns the proxy)
|
|
||||||
"""
|
|
||||||
return self.__proxypeername
|
|
||||||
|
|
||||||
def __negotiatesocks4(self,destaddr,destport):
|
|
||||||
"""__negotiatesocks4(self,destaddr,destport)
|
|
||||||
Negotiates a connection through a SOCKS4 server.
|
|
||||||
"""
|
|
||||||
# Check if the destination address provided is an IP address
|
|
||||||
rmtrslv = False
|
|
||||||
try:
|
|
||||||
ipaddr = socket.inet_aton(destaddr)
|
|
||||||
except socket.error:
|
|
||||||
# It's a DNS name. Check where it should be resolved.
|
|
||||||
if self.__proxy[3]==True:
|
|
||||||
ipaddr = "\x00\x00\x00\x01"
|
|
||||||
rmtrslv = True
|
|
||||||
else:
|
|
||||||
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
|
|
||||||
# Construct the request packet
|
|
||||||
req = "\x04\x01" + struct.pack(">H",destport) + ipaddr
|
|
||||||
# The username parameter is considered userid for SOCKS4
|
|
||||||
if self.__proxy[4] != None:
|
|
||||||
req = req + self.__proxy[4]
|
|
||||||
req = req + "\x00"
|
|
||||||
# DNS name if remote resolving is required
|
|
||||||
# NOTE: This is actually an extension to the SOCKS4 protocol
|
|
||||||
# called SOCKS4A and may not be supported in all cases.
|
|
||||||
if rmtrslv==True:
|
|
||||||
req = req + destaddr + "\x00"
|
|
||||||
self.sendall(req)
|
|
||||||
# Get the response from the server
|
|
||||||
resp = self.__recvall(8)
|
|
||||||
if resp[0] != "\x00":
|
|
||||||
# Bad data
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
if resp[1] != "\x5A":
|
|
||||||
# Server returned an error
|
|
||||||
self.close()
|
|
||||||
if ord(resp[1]) in (91,92,93):
|
|
||||||
self.close()
|
|
||||||
raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90]))
|
|
||||||
else:
|
|
||||||
raise Socks4Error((94,_socks4errors[4]))
|
|
||||||
# Get the bound address/port
|
|
||||||
self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0])
|
|
||||||
if rmtrslv != None:
|
|
||||||
self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
|
|
||||||
else:
|
|
||||||
self.__proxypeername = (destaddr,destport)
|
|
||||||
|
|
||||||
def __negotiatehttp(self,destaddr,destport):
|
|
||||||
"""__negotiatehttp(self,destaddr,destport)
|
|
||||||
Negotiates a connection through an HTTP server.
|
|
||||||
"""
|
|
||||||
# If we need to resolve locally, we do this now
|
|
||||||
if self.__proxy[3] == False:
|
|
||||||
addr = socket.gethostbyname(destaddr)
|
|
||||||
else:
|
|
||||||
addr = destaddr
|
|
||||||
self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n")
|
|
||||||
# We read the response until we get the string "\r\n\r\n"
|
|
||||||
resp = self.recv(1)
|
|
||||||
while resp.find("\r\n\r\n")==-1:
|
|
||||||
resp = resp + self.recv(1)
|
|
||||||
# We just need the first line to check if the connection
|
|
||||||
# was successful
|
|
||||||
statusline = resp.splitlines()[0].split(" ",2)
|
|
||||||
if statusline[0] not in ("HTTP/1.0","HTTP/1.1"):
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
try:
|
|
||||||
statuscode = int(statusline[1])
|
|
||||||
except ValueError:
|
|
||||||
self.close()
|
|
||||||
raise GeneralProxyError((1,_generalerrors[1]))
|
|
||||||
if statuscode != 200:
|
|
||||||
self.close()
|
|
||||||
raise HTTPError((statuscode,statusline[2]))
|
|
||||||
self.__proxysockname = ("0.0.0.0",0)
|
|
||||||
self.__proxypeername = (addr,destport)
|
|
||||||
|
|
||||||
def connect(self,destpair):
|
|
||||||
"""connect(self,despair)
|
|
||||||
Connects to the specified destination through a proxy.
|
|
||||||
destpar - A tuple of the IP/DNS address and the port number.
|
|
||||||
(identical to socket's connect).
|
|
||||||
To select the proxy server use setproxy().
|
|
||||||
"""
|
|
||||||
# Do a minimal input check first
|
|
||||||
if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int):
|
|
||||||
raise GeneralProxyError((5,_generalerrors[5]))
|
|
||||||
if self.__proxy[0] == PROXY_TYPE_SOCKS5:
|
|
||||||
if self.__proxy[2] != None:
|
|
||||||
portnum = self.__proxy[2]
|
|
||||||
else:
|
|
||||||
portnum = 1080
|
|
||||||
_orgsocket.connect(self,(self.__proxy[1],portnum))
|
|
||||||
self.__negotiatesocks5(destpair[0],destpair[1])
|
|
||||||
elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
|
|
||||||
if self.__proxy[2] != None:
|
|
||||||
portnum = self.__proxy[2]
|
|
||||||
else:
|
|
||||||
portnum = 1080
|
|
||||||
_orgsocket.connect(self,(self.__proxy[1],portnum))
|
|
||||||
self.__negotiatesocks4(destpair[0],destpair[1])
|
|
||||||
elif self.__proxy[0] == PROXY_TYPE_HTTP:
|
|
||||||
if self.__proxy[2] != None:
|
|
||||||
portnum = self.__proxy[2]
|
|
||||||
else:
|
|
||||||
portnum = 8080
|
|
||||||
_orgsocket.connect(self,(self.__proxy[1],portnum))
|
|
||||||
self.__negotiatehttp(destpair[0],destpair[1])
|
|
||||||
elif self.__proxy[0] == None:
|
|
||||||
_orgsocket.connect(self,(destpair[0],destpair[1]))
|
|
||||||
else:
|
|
||||||
raise GeneralProxyError((4,_generalerrors[4]))
|
|
Loading…
x
Reference in New Issue
Block a user