qBt_SE/engines/rutor.py

286 lines
10 KiB
Python
Raw Normal View History

2023-09-02 21:18:42 +05:00
# VERSION: 1.7
2020-03-12 20:29:28 +05:00
# AUTHORS: imDMG [imdmgg@gmail.com]
2020-03-12 20:32:54 +05:00
# Rutor.org search engine plugin for qBittorrent
2020-03-12 20:29:28 +05:00
import base64
import json
import logging
import re
import sys
2020-03-12 20:29:28 +05:00
import time
from concurrent.futures.thread import ThreadPoolExecutor
from dataclasses import dataclass, field
2020-03-12 20:29:28 +05:00
from html import unescape
2021-03-30 04:38:16 +05:00
from pathlib import Path
from tempfile import NamedTemporaryFile
2023-09-02 21:18:42 +05:00
from typing import Callable
2020-03-12 20:29:28 +05:00
from urllib.error import URLError, HTTPError
from urllib.parse import unquote
from urllib.request import build_opener, ProxyHandler
try:
from novaprinter import prettyPrinter
except ImportError:
sys.path.insert(0, str(Path(__file__).parent.parent.absolute()))
from novaprinter import prettyPrinter
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
FILE = Path(__file__)
BASEDIR = FILE.parent.absolute()
2023-01-05 17:34:09 +05:00
FILENAME = FILE.stem
FILE_J, FILE_C = [BASEDIR / (FILENAME + fl) for fl in (".json", ".cookie")]
2020-03-12 20:29:28 +05:00
2020-09-20 23:07:37 +05:00
RE_TORRENTS = re.compile(
2023-01-05 17:34:09 +05:00
r'(?:gai|tum)"><td>(.+?)</td.+?href="(magnet:.+?)".+?href="/'
r'(torrent/(\d+).+?)">(.+?)</a.+?right">([.\d]+?&nbsp;\w+?)</td.+?alt="S"\s'
r'/>(.+?)</s.+?red">(.+?)</s', re.S
2021-03-30 04:38:16 +05:00
)
RE_RESULTS = re.compile(r"</b>\sРезультатов\sпоиска\s(\d{1,4})\s", re.S)
PATTERNS = ("%ssearch/%i/%i/000/0/%s",)
2020-03-12 20:29:28 +05:00
2023-09-02 21:18:42 +05:00
PAGES = 100
2020-03-12 20:29:28 +05:00
# base64 encoded image
ICON = ("AAABAAEAEBAAAAEAGABoAwAAFgAAACgAAAAQAAAAIAAAAAEAGAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAc4AAMwHNdcQ4vsN3fYS2fUY3fUe3fMj4fkk4fco4PYo5fgk7f5gp8Zu"
"ZZtsa59FIXZEGm4kh74PyeoLGp8NHK4PHrwQHr8VIb8XJL4bJrUcKJ8optEdtPMBGcQAIc"
"XeZAPVYwdA3MQFf8EDAJoFAMEEAM0AANIAAM4AAM0EAL8CAI8bXaEV1/cBHMsGDNTVWAOo"
"dTIU5/ELuOAJM6sEALsIAMoEALkCBbgFALUGAKshgMcvpNUTzOoFQNIFANqxQgBpkmgKue"
"8IT8UUy+8HO7MHPb8Gt+IG3vQHm9YKi84X4foKI7kRl+AWiMwSDYyxjXZAy84HdNYEALcP"
"guYM+vsL6PgGl/wBWN4K1/EF//8LbdQEALgEVc41zMp0YC+t0N0XxPcCIbwGAMkGGOUGUv"
"QKPPUEANsIU9ENvvAJw/ULnekGAr8FJcIUzfRycEZwzuMFnuYEArQCAdYDANYHAMQFAMwG"
"PcwM2vsHU/QKPegLwvYEEckFBrsOt/Y+kYky5/YGgNAGAKkHAc4JMssSoN0GTb0L2/gHYP"
"kCAPkFKOMP0fIHGc0EAKwLgNAq3OMd/P0Al9ACBqQCAMALbOMG+/8E8v0KjugBAO4CAPAG"
"Q9MNyPYEB8QBAKQCe8cW9//T+/09+/8Aqd8GIbIFAMAKbuUG6f8Ht/IFFeEAAMYPqeYMhO"
"EGB6oCgtUY5fuG0tv//vzs+PlQ9fwAw+4CLLoIALgJR+EFU+wEFcweZNAkquMFMrkArOor"
"4fSrxsvWx8n5/fv5+fn3+/iC8fsLzPIAUscEALMDAL8QPtAsetUFWsUHue1r7/vc6evOzM"
"fFx8n5/fvy+fj89vb/9/e+9/o44/oNi9kBD54CFKQJg9Qu4vu09vr/+ff89fTIz8rFx8n5"
"/fvy+fj59vb49vf/+fbh+vtk6vw1rN03suFn6vnl/f3/+fn49vj18/TIz8rFx8n5/fvy+f"
"j59vb39vf39/f//P3w+fme6/ak8Prv+fj//f369/r39vj18/TIz8rFx8ngBwAA4AMAAMAD"
"AADAAwAAwAMAAMABAACAAQAAgAEAAAAAAAAAAAAAgAEAAMADAADgBwAA+B8AAPw/AAD"
"+fwAA")
# setup logging
logging.basicConfig(
format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s",
2020-09-20 23:07:37 +05:00
datefmt="%m-%d %H:%M",
2021-03-30 04:38:16 +05:00
level=logging.DEBUG
)
2020-09-20 23:07:37 +05:00
2020-03-12 20:29:28 +05:00
logger = logging.getLogger(__name__)
2023-09-02 21:18:42 +05:00
def rng(t: int) -> range:
return range(1, -(-t // PAGES))
class EngineError(Exception):
...
@dataclass
class Config:
# username: str = "USERNAME"
# password: str = "PASSWORD"
torrent_date: bool = True
2023-01-05 17:34:09 +05:00
magnet: bool = False
proxy: bool = False
# dynamic_proxy: bool = True
proxies: dict = field(default_factory=lambda: {"http": "", "https": ""})
ua: str = ("Mozilla/5.0 (X11; Linux i686; rv:38.0) Gecko/20100101 "
"Firefox/38.0 ")
def __post_init__(self):
try:
if not self._validate_json(json.loads(FILE_J.read_text())):
raise ValueError("Incorrect json scheme.")
except Exception as e:
logger.error(e)
FILE_J.write_text(self.to_str())
(BASEDIR / f"{FILENAME}.ico").write_bytes(base64.b64decode(ICON))
def to_str(self) -> str:
return json.dumps(self.to_dict(), indent=4, sort_keys=False)
def to_dict(self) -> dict:
return {self._to_camel(k): v for k, v in self.__dict__.items()}
def _validate_json(self, obj: dict) -> bool:
is_valid = True
for k, v in self.__dict__.items():
_val = obj.get(self._to_camel(k))
if type(_val) is not type(v):
is_valid = False
continue
if type(_val) is dict:
for dk, dv in v.items():
if type(_val.get(dk)) is not type(dv):
_val[dk] = dv
is_valid = False
setattr(self, k, _val)
return is_valid
@staticmethod
def _to_camel(s: str) -> str:
return "".join(x.title() if i else x
for i, x in enumerate(s.split("_")))
config = Config()
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
class Rutor:
name = "Rutor"
url = "http://rutor.info/"
2020-09-20 23:07:37 +05:00
url_dl = url.replace("//", "//d.") + "download/"
supported_categories = {"all": 0,
"movies": 1,
"tv": 6,
"music": 2,
"games": 8,
"anime": 10,
"software": 9,
"pictures": 3,
"books": 11}
2020-03-12 20:29:28 +05:00
2022-03-22 19:31:05 +05:00
# establish connection
session = build_opener()
2020-03-12 20:29:28 +05:00
def search(self, what: str, cat: str = "all") -> None:
2023-09-02 21:18:42 +05:00
self._catch_errors(self._search, what, cat)
2020-03-12 20:29:28 +05:00
def download_torrent(self, url: str) -> None:
2023-09-02 21:18:42 +05:00
self._catch_errors(self._download_torrent, url)
2020-03-12 20:29:28 +05:00
2023-09-02 21:18:42 +05:00
def searching(self, query: str, first: bool = False) -> int:
page, torrents_found = self._request(query).decode(), -1
2020-09-20 23:07:37 +05:00
if first:
# firstly we check if there is a result
2023-09-02 21:18:42 +05:00
try:
torrents_found = int(RE_RESULTS.search(page)[1])
except TypeError:
raise EngineError("Unexpected page content")
if torrents_found <= 0:
2020-09-20 23:07:37 +05:00
return 0
2020-03-12 20:29:28 +05:00
self.draw(page)
2020-09-20 23:07:37 +05:00
return torrents_found
2020-03-12 20:29:28 +05:00
def draw(self, html: str) -> None:
for tor in RE_TORRENTS.findall(html):
2020-03-12 20:29:28 +05:00
torrent_date = ""
if config.torrent_date:
2020-03-12 20:29:28 +05:00
# replace names month
months = ("Янв", "Фев", "Мар", "Апр", "Май", "Июн",
"Июл", "Авг", "Сен", "Окт", "Ноя", "Дек")
2020-09-20 23:07:37 +05:00
ct = [unescape(tor[0].replace(m, f"{i:02d}"))
for i, m in enumerate(months, 1) if m in tor[0]][0]
2020-03-12 20:29:28 +05:00
ct = time.strftime("%y.%m.%d", time.strptime(ct, "%d %m %y"))
torrent_date = f"[{ct}] "
2020-03-12 20:29:28 +05:00
prettyPrinter({
"engine_url": self.url,
2023-01-05 17:34:09 +05:00
"desc_link": self.url + tor[2],
"name": torrent_date + unescape(tor[4]),
"link": tor[1] if config.magnet else self.url_dl + tor[3],
"size": unescape(tor[5]),
"seeds": unescape(tor[6]),
"leech": unescape(tor[7])
2020-03-12 20:29:28 +05:00
})
2023-09-02 21:18:42 +05:00
def _catch_errors(self, handler: Callable, *args: str):
try:
self._init()
handler(*args)
except EngineError as ex:
self.pretty_error(args[0], str(ex))
except Exception as ex:
self.pretty_error(args[0], "Unexpected error, please check logs")
logger.exception(ex)
def _init(self) -> None:
# add proxy handler if needed
if config.proxy:
if not any(config.proxies.values()):
raise EngineError("Proxy enabled, but not set!")
self.session.add_handler(ProxyHandler(config.proxies))
logger.debug("Proxy is set!")
# change user-agent
self.session.addheaders = [("User-Agent", config.ua)]
def _search(self, what: str, cat: str = "all") -> None:
query = PATTERNS[0] % (self.url, 0, self.supported_categories[cat],
what.replace(" ", "+"))
# make first request (maybe it enough)
t0, total = time.time(), self.searching(query, True)
# do async requests
if total > PAGES:
query = query.replace("h/0", "h/{}")
qrs = [query.format(x) for x in rng(total)]
with ThreadPoolExecutor(len(qrs)) as executor:
executor.map(self.searching, qrs, timeout=30)
logger.debug(f"--- {time.time() - t0} seconds ---")
logger.info(f"Found torrents: {total}")
def _download_torrent(self, url: str) -> None:
# Download url
response = self._request(url)
# Create a torrent file
with NamedTemporaryFile(suffix=".torrent", delete=False) as fd:
fd.write(response)
# return file path
logger.debug(fd.name + " " + url)
print(fd.name + " " + url)
def _request(
2023-09-02 21:18:42 +05:00
self, url: str, data: bytes = None, repeated: bool = False
) -> bytes:
2020-03-12 20:29:28 +05:00
try:
2020-09-20 23:07:37 +05:00
with self.session.open(url, data, 5) as r:
# checking that tracker isn't blocked
if r.geturl().startswith((self.url, self.url_dl)):
2020-09-20 23:07:37 +05:00
return r.read()
2023-09-02 21:18:42 +05:00
raise EngineError(f"{url} is blocked. Try another proxy.")
2020-03-12 20:29:28 +05:00
except (URLError, HTTPError) as err:
error = str(err.reason)
2023-09-02 21:18:42 +05:00
reason = f"{url} is not response! Maybe it is blocked."
if "timed out" in error and not repeated:
2023-09-02 21:18:42 +05:00
logger.debug("Request timed out. Repeating...")
return self._request(url, data, True)
if "no host given" in error:
2023-09-02 21:18:42 +05:00
reason = "Proxy is bad, try another!"
elif hasattr(err, "code"):
2023-09-02 21:18:42 +05:00
reason = f"Request to {url} failed with status: {err.code}"
raise EngineError(reason)
def pretty_error(self, what: str, error: str) -> None:
prettyPrinter({
"engine_url": self.url,
"desc_link": "https://github.com/imDMG/qBt_SE",
"name": f"[{unquote(what)}][Error]: {error}",
"link": self.url + "error",
"size": "1 TB", # lol
"seeds": 100,
"leech": 100
})
2020-03-12 20:29:28 +05:00
2021-03-30 04:38:16 +05:00
# pep8
rutor = Rutor
2020-03-12 20:29:28 +05:00
if __name__ == "__main__":
if BASEDIR.parent.joinpath("settings_gui.py").exists():
from settings_gui import EngineSettingsGUI
EngineSettingsGUI(FILENAME)
2020-03-12 20:29:28 +05:00
engine = rutor()
engine.search("doctor")