mirror of
https://github.com/YGGverse/qBt_SE.git
synced 2025-03-12 05:11:12 +00:00
v2: regex, threading - speeding up
This commit is contained in:
parent
f81e761095
commit
66270f4ee4
@ -87,7 +87,7 @@ class kinozal(object):
|
||||
|
||||
for tor in torrents:
|
||||
torrent = {"engine_url": self.url,
|
||||
"desc_link": tor[0],
|
||||
"desc_link": self.url + tor[0],
|
||||
"name": tor[1],
|
||||
"link": 'http://dl.kinozal.tv/download.php?id=' + tor[0].split('=')[1],
|
||||
"size": self.units_convert(tor[2]),
|
||||
|
185
nnmclub.py
185
nnmclub.py
@ -1,19 +1,21 @@
|
||||
# VERSION: 1.2
|
||||
# VERSION: 2.0
|
||||
# AUTHORS: imDMG [imdmgg@gmail.com]
|
||||
|
||||
# NoNaMe-Club search engine plugin for qBittorrent
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
|
||||
from urllib.request import build_opener, HTTPCookieProcessor, ProxyHandler
|
||||
from urllib.parse import urlencode # , parse_qs
|
||||
from urllib.error import URLError, HTTPError
|
||||
from http.cookiejar import Cookie, CookieJar
|
||||
from html.parser import HTMLParser
|
||||
from novaprinter import prettyPrinter
|
||||
|
||||
# setup logging into qBittorrent/logs
|
||||
@ -43,7 +45,7 @@ class nnmclub(object):
|
||||
try:
|
||||
# try to load user data from file
|
||||
with open(os.path.abspath(os.path.join(os.path.dirname(__file__), 'nnmclub.json'))) as f:
|
||||
config = json.load(f)
|
||||
config: dict = json.load(f)
|
||||
except OSError as e:
|
||||
# file not found
|
||||
logging.error(e)
|
||||
@ -73,14 +75,11 @@ class nnmclub(object):
|
||||
|
||||
response = self._catch_error_request(self.url + 'login.php')
|
||||
if not self.blocked:
|
||||
parser = self.WorstParser(self.url, True)
|
||||
parser.feed(response.read().decode('cp1251'))
|
||||
parser.close()
|
||||
|
||||
code = re.search(r'code"\svalue="(.+?)"', response.read().decode('cp1251'))[1]
|
||||
form_data = {"username": self.config['username'],
|
||||
"password": self.config['password'],
|
||||
"autologin": "on",
|
||||
"code": parser.login_code,
|
||||
"code": code,
|
||||
"login": "Вход"}
|
||||
# so we first encode keys to cp1251 then do default decode whole string
|
||||
data_encoded = urlencode({k: v.encode('cp1251') for k, v in form_data.items()}).encode()
|
||||
@ -92,122 +91,22 @@ class nnmclub(object):
|
||||
else:
|
||||
logging.info('We successfully authorized')
|
||||
|
||||
class WorstParser(HTMLParser):
|
||||
def __init__(self, url='', login=False):
|
||||
HTMLParser.__init__(self)
|
||||
self.url = url
|
||||
self.login = login
|
||||
self.torrent = {'link': '',
|
||||
'name': '',
|
||||
'size': '',
|
||||
'seeds': '',
|
||||
'leech': '',
|
||||
'desc_link': '', }
|
||||
def draw(self, html: str):
|
||||
torrents = re.findall(r'd\stopic.+?href="(.+?)".+?<b>(.+?)</b>.+?href="(d.+?)"'
|
||||
r'.+?/u>\s(.+?)<.+?b>(\d+)</.+?b>(\d+)<', html, re.S)
|
||||
|
||||
self.login_code = None
|
||||
for tor in torrents:
|
||||
torrent = {"engine_url": self.url,
|
||||
"desc_link": self.url + tor[0],
|
||||
"name": tor[1],
|
||||
"link": self.url + tor[2],
|
||||
"size": tor[3].replace(',', '.'),
|
||||
"seeds": tor[4],
|
||||
"leech": tor[5]}
|
||||
|
||||
# we need a page markup to know when stop and collect data,
|
||||
# because available methods, in this class, do not communicate each other
|
||||
# as a result, we make markup to transfer information
|
||||
# from one method to another, along a chain
|
||||
#
|
||||
# markup on result table
|
||||
self.result_table = False # table with results is found
|
||||
self.torrent_row = False # found torrent row for collect data
|
||||
self.index_td = 0 # td counter in torrent row
|
||||
self.write = None # trigger to detecting when to collect data
|
||||
|
||||
# markup pagination
|
||||
self.paginator = False # found more pages in result
|
||||
self.pages = 0 # page counter
|
||||
|
||||
self.search_id = 0
|
||||
self.found_torrents = 0
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
# login
|
||||
if self.login and tag == 'input':
|
||||
tmp = dict(attrs)
|
||||
if tmp.get('name') == 'code':
|
||||
self.login_code = tmp['value']
|
||||
return
|
||||
|
||||
# search result table by class tablesorter
|
||||
if tag == 'table':
|
||||
for name, value in attrs:
|
||||
if name == 'class' and 'tablesorter' in value:
|
||||
self.result_table = True
|
||||
|
||||
# search for torrent row by class prow
|
||||
if self.result_table and tag == 'tr':
|
||||
for name, value in attrs:
|
||||
if name == 'class' and 'prow' in value:
|
||||
self.torrent_row = True
|
||||
|
||||
# count td for find right td
|
||||
if self.torrent_row and tag == 'td':
|
||||
if self.index_td == 5:
|
||||
self.write = "size"
|
||||
elif self.index_td == 7:
|
||||
self.write = "seeds"
|
||||
elif self.index_td == 8:
|
||||
self.write = "leech"
|
||||
|
||||
self.index_td += 1
|
||||
|
||||
# search for torrent link by classes r0 or r1
|
||||
if self.torrent_row and tag == 'a':
|
||||
if self.index_td == 3:
|
||||
self.torrent['desc_link'] = self.url + attrs[1][1]
|
||||
self.write = "name"
|
||||
|
||||
if self.index_td == 5:
|
||||
self.torrent['link'] = self.url + attrs[0][1]
|
||||
|
||||
# search for right div with class paginator
|
||||
if self.found_torrents == 50 and tag == 'span':
|
||||
for name, value in attrs:
|
||||
if name == 'class' and value == 'nav':
|
||||
self.paginator = True
|
||||
|
||||
# search for block with page numbers
|
||||
if self.paginator and tag == 'a':
|
||||
# if not self.pages:
|
||||
# parsing for search_id
|
||||
# self.search_id = parse_qs(attrs[0][1].split('?')[1])['search_id']
|
||||
self.pages += 1
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
# detecting that torrent row is closed and print all collected data
|
||||
if self.torrent_row and tag == 'tr':
|
||||
self.torrent["engine_url"] = self.url
|
||||
logging.debug('torrent row: ' + str(self.torrent))
|
||||
prettyPrinter(self.torrent)
|
||||
self.torrent = {key: '' for key in self.torrent}
|
||||
self.index_td = 0
|
||||
self.torrent_row = False
|
||||
self.found_torrents += 1
|
||||
|
||||
# detecting that table with result is close
|
||||
if self.result_table and tag == 'table':
|
||||
self.result_table = False
|
||||
|
||||
# detecting that we found all pagination
|
||||
if self.paginator and tag == 'span':
|
||||
self.paginator = False
|
||||
|
||||
def handle_data(self, data: str):
|
||||
# detecting that we need write data at this moment
|
||||
if self.write and self.result_table:
|
||||
if data.startswith('<b>'):
|
||||
data = data[3:-5]
|
||||
if self.index_td == 5:
|
||||
data = data.split('</u>')[1].strip()
|
||||
self.torrent[self.write] = data.strip()
|
||||
self.write = None
|
||||
|
||||
def error(self, message):
|
||||
pass
|
||||
prettyPrinter(torrent)
|
||||
del torrents
|
||||
# return len(torrents)
|
||||
|
||||
def download_torrent(self, url):
|
||||
if self.blocked:
|
||||
@ -227,27 +126,37 @@ class nnmclub(object):
|
||||
logging.debug(path + " " + url)
|
||||
print(path + " " + url)
|
||||
|
||||
def searching(self, query, first=False):
|
||||
response = self._catch_error_request(query)
|
||||
page = response.read().decode('cp1251')
|
||||
self.draw(page)
|
||||
total = int(re.search(r'\(max:\s(\d{1,3})\)', page)[1]) if first else -1
|
||||
|
||||
return total
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
if self.blocked:
|
||||
return
|
||||
c = self.supported_categories[cat]
|
||||
query = '{}tracker.php?nm={}&{}'.format(self.url, what.replace(" ", "+"), "f=-1" if c == '-1' else "c=" + c)
|
||||
response = self._catch_error_request(query)
|
||||
parser = self.WorstParser(self.url)
|
||||
parser.feed(response.read().decode('cp1251'))
|
||||
parser.close()
|
||||
|
||||
# if first request return that we have pages, we do cycle
|
||||
if parser.pages:
|
||||
for x in range(1, parser.pages):
|
||||
response = self._catch_error_request('{}&start={}'.format(query, # &search_id=
|
||||
# parser.search_id,
|
||||
parser.found_torrents))
|
||||
parser.feed(response.read().decode('cp1251'))
|
||||
parser.close()
|
||||
# make first request (maybe it enough)
|
||||
total = self.searching(query, True)
|
||||
# do async requests
|
||||
if total > 50:
|
||||
tasks = []
|
||||
for x in range(1, math.ceil(total / 50)):
|
||||
task = threading.Thread(target=self.searching, args=(query + "&start={}".format(x * 50),))
|
||||
tasks.append(task)
|
||||
task.start()
|
||||
|
||||
# wait slower request in stack
|
||||
for task in tasks:
|
||||
task.join()
|
||||
del tasks
|
||||
|
||||
logging.debug("--- {} seconds ---".format(time.time() - start_time))
|
||||
logging.info("Found torrents: {}".format(parser.found_torrents))
|
||||
logging.info("Found torrents: {}".format(total))
|
||||
|
||||
def _catch_error_request(self, url='', data=None):
|
||||
url = url if url else self.url
|
||||
|
Loading…
x
Reference in New Issue
Block a user