qBt_SE/engines/rutor.py

283 lines
9.9 KiB
Python
Raw Normal View History

2022-03-22 19:31:05 +05:00
# VERSION: 1.5
2020-03-12 20:29:28 +05:00
# AUTHORS: imDMG [imdmgg@gmail.com]
2020-03-12 20:32:54 +05:00
# Rutor.org search engine plugin for qBittorrent
2020-03-12 20:29:28 +05:00
import base64
import json
import logging
import re
import sys
2020-03-12 20:29:28 +05:00
import time
from concurrent.futures.thread import ThreadPoolExecutor
from dataclasses import dataclass, field
2020-03-12 20:29:28 +05:00
from html import unescape
2021-03-30 04:38:16 +05:00
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Optional, Union
2020-03-12 20:29:28 +05:00
from urllib.error import URLError, HTTPError
from urllib.parse import unquote
from urllib.request import build_opener, ProxyHandler
try:
from novaprinter import prettyPrinter
except ImportError:
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
from novaprinter import prettyPrinter
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
FILE = Path(__file__)
BASEDIR = FILE.parent.absolute()
FILENAME = FILE.name[:-3]
FILE_J, FILE_C = [BASEDIR / (FILENAME + fl) for fl in [".json", ".cookie"]]
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
PAGES = 100
2020-03-12 20:29:28 +05:00
def rng(t: int) -> range:
2021-03-30 04:38:16 +05:00
return range(1, -(-t // PAGES))
2020-03-12 20:29:28 +05:00
2020-09-20 23:07:37 +05:00
RE_TORRENTS = re.compile(
r'(?:gai|tum)"><td>(.+?)</td.+?href="/(torrent/(\d+).+?)">(.+?)</a.+?right"'
2021-03-30 04:38:16 +05:00
r'>([.\d]+&nbsp;\w+)</td.+?alt="S"\s/>(.+?)</s.+?red">(.+?)</s', re.S
)
RE_RESULTS = re.compile(r"</b>\sРезультатов\sпоиска\s(\d{1,4})\s", re.S)
PATTERNS = ("%ssearch/%i/%i/000/0/%s",)
2020-03-12 20:29:28 +05:00
# base64 encoded image
ICON = ("AAABAAEAEBAAAAEAGABoAwAAFgAAACgAAAAQAAAAIAAAAAEAGAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAc4AAMwHNdcQ4vsN3fYS2fUY3fUe3fMj4fkk4fco4PYo5fgk7f5gp8Zu"
"ZZtsa59FIXZEGm4kh74PyeoLGp8NHK4PHrwQHr8VIb8XJL4bJrUcKJ8optEdtPMBGcQAIc"
"XeZAPVYwdA3MQFf8EDAJoFAMEEAM0AANIAAM4AAM0EAL8CAI8bXaEV1/cBHMsGDNTVWAOo"
"dTIU5/ELuOAJM6sEALsIAMoEALkCBbgFALUGAKshgMcvpNUTzOoFQNIFANqxQgBpkmgKue"
"8IT8UUy+8HO7MHPb8Gt+IG3vQHm9YKi84X4foKI7kRl+AWiMwSDYyxjXZAy84HdNYEALcP"
"guYM+vsL6PgGl/wBWN4K1/EF//8LbdQEALgEVc41zMp0YC+t0N0XxPcCIbwGAMkGGOUGUv"
"QKPPUEANsIU9ENvvAJw/ULnekGAr8FJcIUzfRycEZwzuMFnuYEArQCAdYDANYHAMQFAMwG"
"PcwM2vsHU/QKPegLwvYEEckFBrsOt/Y+kYky5/YGgNAGAKkHAc4JMssSoN0GTb0L2/gHYP"
"kCAPkFKOMP0fIHGc0EAKwLgNAq3OMd/P0Al9ACBqQCAMALbOMG+/8E8v0KjugBAO4CAPAG"
"Q9MNyPYEB8QBAKQCe8cW9//T+/09+/8Aqd8GIbIFAMAKbuUG6f8Ht/IFFeEAAMYPqeYMhO"
"EGB6oCgtUY5fuG0tv//vzs+PlQ9fwAw+4CLLoIALgJR+EFU+wEFcweZNAkquMFMrkArOor"
"4fSrxsvWx8n5/fv5+fn3+/iC8fsLzPIAUscEALMDAL8QPtAsetUFWsUHue1r7/vc6evOzM"
"fFx8n5/fvy+fj89vb/9/e+9/o44/oNi9kBD54CFKQJg9Qu4vu09vr/+ff89fTIz8rFx8n5"
"/fvy+fj59vb49vf/+fbh+vtk6vw1rN03suFn6vnl/f3/+fn49vj18/TIz8rFx8n5/fvy+f"
"j59vb39vf39/f//P3w+fme6/ak8Prv+fj//f369/r39vj18/TIz8rFx8ngBwAA4AMAAMAD"
"AADAAwAAwAMAAMABAACAAQAAgAEAAAAAAAAAAAAAgAEAAMADAADgBwAA+B8AAPw/AAD"
"+fwAA")
# setup logging
logging.basicConfig(
format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s",
2020-09-20 23:07:37 +05:00
datefmt="%m-%d %H:%M",
2021-03-30 04:38:16 +05:00
level=logging.DEBUG
)
2020-09-20 23:07:37 +05:00
2020-03-12 20:29:28 +05:00
logger = logging.getLogger(__name__)
@dataclass
class Config:
# username: str = "USERNAME"
# password: str = "PASSWORD"
torrent_date: bool = True
# magnet: bool = False
proxy: bool = False
# dynamic_proxy: bool = True
proxies: dict = field(default_factory=lambda: {"http": "", "https": ""})
ua: str = ("Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101 "
"Firefox/38.0 ")
def __post_init__(self):
try:
if not self._validate_json(json.loads(FILE_J.read_text())):
raise ValueError("Incorrect json scheme.")
except Exception as e:
logger.error(e)
FILE_J.write_text(self.to_str())
(BASEDIR / f"{FILENAME}.ico").write_bytes(base64.b64decode(ICON))
def to_str(self) -> str:
return json.dumps(self.to_dict(), indent=4, sort_keys=False)
def to_dict(self) -> dict:
return {self._to_camel(k): v for k, v in self.__dict__.items()}
def _validate_json(self, obj: dict) -> bool:
is_valid = True
for k, v in self.__dict__.items():
_val = obj.get(self._to_camel(k))
if type(_val) is not type(v):
is_valid = False
continue
if type(_val) is dict:
for dk, dv in v.items():
if type(_val.get(dk)) is not type(dv):
_val[dk] = dv
is_valid = False
setattr(self, k, _val)
return is_valid
@staticmethod
def _to_camel(s: str) -> str:
return "".join(x.title() if i else x
for i, x in enumerate(s.split("_")))
config = Config()
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
class Rutor:
name = "Rutor"
url = "http://rutor.info/"
2020-09-20 23:07:37 +05:00
url_dl = url.replace("//", "//d.") + "download/"
supported_categories = {"all": 0,
"movies": 1,
"tv": 6,
"music": 2,
"games": 8,
"anime": 10,
"software": 9,
"pictures": 3,
"books": 11}
2020-03-12 20:29:28 +05:00
2022-03-22 19:31:05 +05:00
# error message
error: Optional[str] = None
# establish connection
session = build_opener()
2020-03-12 20:29:28 +05:00
2022-03-22 19:31:05 +05:00
def __init__(self):
2020-03-12 20:29:28 +05:00
# add proxy handler if needed
if config.proxy:
if any(config.proxies.values()):
self.session.add_handler(ProxyHandler(config.proxies))
2020-03-12 20:29:28 +05:00
logger.debug("Proxy is set!")
else:
self.error = "Proxy enabled, but not set!"
# change user-agent
self.session.addheaders = [("User-Agent", config.ua)]
2020-03-12 20:29:28 +05:00
def search(self, what: str, cat: str = "all") -> None:
2020-03-12 20:29:28 +05:00
if self.error:
self.pretty_error(what)
2020-09-20 23:07:37 +05:00
return None
query = PATTERNS[0] % (self.url, 0, self.supported_categories[cat],
2020-03-12 20:29:28 +05:00
what.replace(" ", "+"))
# make first request (maybe it enough)
t0, total = time.time(), self.searching(query, True)
if self.error:
self.pretty_error(what)
2020-09-20 23:07:37 +05:00
return None
2020-03-12 20:29:28 +05:00
# do async requests
2021-03-30 04:38:16 +05:00
if total > PAGES:
2022-03-22 19:31:05 +05:00
query = query.replace("h/0", "h/{}")
qrs = [query.format(x) for x in rng(total)]
2020-03-12 20:29:28 +05:00
with ThreadPoolExecutor(len(qrs)) as executor:
executor.map(self.searching, qrs, timeout=30)
logger.debug(f"--- {time.time() - t0} seconds ---")
logger.info(f"Found torrents: {total}")
def download_torrent(self, url: str) -> None:
2020-03-12 20:29:28 +05:00
# Download url
response = self._request(url)
2020-03-12 20:29:28 +05:00
if self.error:
self.pretty_error(url)
2020-09-20 23:07:37 +05:00
return None
2020-03-12 20:29:28 +05:00
# Create a torrent file
with NamedTemporaryFile(suffix=".torrent", delete=False) as fd:
2020-09-20 23:07:37 +05:00
fd.write(response)
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
# return file path
logger.debug(fd.name + " " + url)
print(fd.name + " " + url)
2020-03-12 20:29:28 +05:00
def searching(self, query: str, first: bool = False) -> Union[None, int]:
response = self._request(query)
if self.error:
2020-03-12 20:29:28 +05:00
return None
2020-09-20 23:07:37 +05:00
page, torrents_found = response.decode(), -1
if first:
# firstly we check if there is a result
result = RE_RESULTS.search(page)
if not result:
self.error = "Unexpected page content"
return None
torrents_found = int(result[1])
2020-09-20 23:07:37 +05:00
if not torrents_found:
return 0
2020-03-12 20:29:28 +05:00
self.draw(page)
2020-09-20 23:07:37 +05:00
return torrents_found
2020-03-12 20:29:28 +05:00
def draw(self, html: str) -> None:
for tor in RE_TORRENTS.findall(html):
2020-03-12 20:29:28 +05:00
torrent_date = ""
if config.torrent_date:
2020-03-12 20:29:28 +05:00
# replace names month
months = ("Янв", "Фев", "Мар", "Апр", "Май", "Июн",
"Июл", "Авг", "Сен", "Окт", "Ноя", "Дек")
2020-09-20 23:07:37 +05:00
ct = [unescape(tor[0].replace(m, f"{i:02d}"))
for i, m in enumerate(months, 1) if m in tor[0]][0]
2020-03-12 20:29:28 +05:00
ct = time.strftime("%y.%m.%d", time.strptime(ct, "%d %m %y"))
torrent_date = f"[{ct}] "
2020-03-12 20:29:28 +05:00
prettyPrinter({
"engine_url": self.url,
"desc_link": self.url + tor[1],
"name": torrent_date + unescape(tor[3]),
2020-09-20 23:07:37 +05:00
"link": self.url_dl + tor[2],
2020-03-12 20:29:28 +05:00
"size": unescape(tor[4]),
"seeds": unescape(tor[5]),
"leech": unescape(tor[6])
})
def _request(
self, url: str, data: Optional[bytes] = None, repeated: bool = False
) -> Union[bytes, None]:
2020-03-12 20:29:28 +05:00
try:
2020-09-20 23:07:37 +05:00
with self.session.open(url, data, 5) as r:
# checking that tracker isn't blocked
if r.geturl().startswith((self.url, self.url_dl)):
2020-09-20 23:07:37 +05:00
return r.read()
self.error = f"{url} is blocked. Try another proxy."
2020-03-12 20:29:28 +05:00
except (URLError, HTTPError) as err:
logger.error(err.reason)
error = str(err.reason)
if "timed out" in error and not repeated:
logger.debug("Repeating request...")
return self._request(url, data, True)
if "no host given" in error:
self.error = "Proxy is bad, try another!"
elif hasattr(err, "code"):
2020-03-12 20:29:28 +05:00
self.error = f"Request to {url} failed with status: {err.code}"
else:
self.error = f"{url} is not response! Maybe it is blocked."
2020-03-12 20:29:28 +05:00
return None
def pretty_error(self, what: str) -> None:
2020-03-12 20:29:28 +05:00
prettyPrinter({"engine_url": self.url,
"desc_link": "https://github.com/imDMG/qBt_SE",
"name": f"[{unquote(what)}][Error]: {self.error}",
"link": self.url + "error",
"size": "1 TB", # lol
"seeds": 100,
"leech": 100})
self.error = None
2021-03-30 04:38:16 +05:00
# pep8
rutor = Rutor
2020-03-12 20:29:28 +05:00
if __name__ == "__main__":
if BASEDIR.parent.joinpath("settings_gui.py").exists():
from settings_gui import EngineSettingsGUI
EngineSettingsGUI(FILENAME)
2020-03-12 20:29:28 +05:00
engine = rutor()
engine.search("doctor")