Browse Source

v2: regex, threading - speeding up

master
imDMG 6 years ago
parent
commit
66270f4ee4
  1. 2
      kinozal.py
  2. 191
      nnmclub.py

2
kinozal.py

@ -87,7 +87,7 @@ class kinozal(object):
for tor in torrents: for tor in torrents:
torrent = {"engine_url": self.url, torrent = {"engine_url": self.url,
"desc_link": tor[0], "desc_link": self.url + tor[0],
"name": tor[1], "name": tor[1],
"link": 'http://dl.kinozal.tv/download.php?id=' + tor[0].split('=')[1], "link": 'http://dl.kinozal.tv/download.php?id=' + tor[0].split('=')[1],
"size": self.units_convert(tor[2]), "size": self.units_convert(tor[2]),

191
nnmclub.py

@ -1,19 +1,21 @@
# VERSION: 1.2 # VERSION: 2.0
# AUTHORS: imDMG [imdmgg@gmail.com] # AUTHORS: imDMG [imdmgg@gmail.com]
# NoNaMe-Club search engine plugin for qBittorrent # NoNaMe-Club search engine plugin for qBittorrent
import tempfile
import os
import logging
import json import json
import logging
import math
import os
import re
import tempfile
import threading
import time import time
from urllib.request import build_opener, HTTPCookieProcessor, ProxyHandler from urllib.request import build_opener, HTTPCookieProcessor, ProxyHandler
from urllib.parse import urlencode # , parse_qs from urllib.parse import urlencode # , parse_qs
from urllib.error import URLError, HTTPError from urllib.error import URLError, HTTPError
from http.cookiejar import Cookie, CookieJar from http.cookiejar import Cookie, CookieJar
from html.parser import HTMLParser
from novaprinter import prettyPrinter from novaprinter import prettyPrinter
# setup logging into qBittorrent/logs # setup logging into qBittorrent/logs
@ -43,7 +45,7 @@ class nnmclub(object):
try: try:
# try to load user data from file # try to load user data from file
with open(os.path.abspath(os.path.join(os.path.dirname(__file__), 'nnmclub.json'))) as f: with open(os.path.abspath(os.path.join(os.path.dirname(__file__), 'nnmclub.json'))) as f:
config = json.load(f) config: dict = json.load(f)
except OSError as e: except OSError as e:
# file not found # file not found
logging.error(e) logging.error(e)
@ -73,14 +75,11 @@ class nnmclub(object):
response = self._catch_error_request(self.url + 'login.php') response = self._catch_error_request(self.url + 'login.php')
if not self.blocked: if not self.blocked:
parser = self.WorstParser(self.url, True) code = re.search(r'code"\svalue="(.+?)"', response.read().decode('cp1251'))[1]
parser.feed(response.read().decode('cp1251'))
parser.close()
form_data = {"username": self.config['username'], form_data = {"username": self.config['username'],
"password": self.config['password'], "password": self.config['password'],
"autologin": "on", "autologin": "on",
"code": parser.login_code, "code": code,
"login": "Вход"} "login": "Вход"}
# so we first encode keys to cp1251 then do default decode whole string # so we first encode keys to cp1251 then do default decode whole string
data_encoded = urlencode({k: v.encode('cp1251') for k, v in form_data.items()}).encode() data_encoded = urlencode({k: v.encode('cp1251') for k, v in form_data.items()}).encode()
@ -92,122 +91,22 @@ class nnmclub(object):
else: else:
logging.info('We successfully authorized') logging.info('We successfully authorized')
class WorstParser(HTMLParser): def draw(self, html: str):
def __init__(self, url='', login=False): torrents = re.findall(r'd\stopic.+?href="(.+?)".+?<b>(.+?)</b>.+?href="(d.+?)"'
HTMLParser.__init__(self) r'.+?/u>\s(.+?)<.+?b>(\d+)</.+?b>(\d+)<', html, re.S)
self.url = url
self.login = login for tor in torrents:
self.torrent = {'link': '', torrent = {"engine_url": self.url,
'name': '', "desc_link": self.url + tor[0],
'size': '', "name": tor[1],
'seeds': '', "link": self.url + tor[2],
'leech': '', "size": tor[3].replace(',', '.'),
'desc_link': '', } "seeds": tor[4],
"leech": tor[5]}
self.login_code = None
prettyPrinter(torrent)
# we need a page markup to know when stop and collect data, del torrents
# because available methods, in this class, do not communicate each other # return len(torrents)
# as a result, we make markup to transfer information
# from one method to another, along a chain
#
# markup on result table
self.result_table = False # table with results is found
self.torrent_row = False # found torrent row for collect data
self.index_td = 0 # td counter in torrent row
self.write = None # trigger to detecting when to collect data
# markup pagination
self.paginator = False # found more pages in result
self.pages = 0 # page counter
self.search_id = 0
self.found_torrents = 0
def handle_starttag(self, tag, attrs):
# login
if self.login and tag == 'input':
tmp = dict(attrs)
if tmp.get('name') == 'code':
self.login_code = tmp['value']
return
# search result table by class tablesorter
if tag == 'table':
for name, value in attrs:
if name == 'class' and 'tablesorter' in value:
self.result_table = True
# search for torrent row by class prow
if self.result_table and tag == 'tr':
for name, value in attrs:
if name == 'class' and 'prow' in value:
self.torrent_row = True
# count td for find right td
if self.torrent_row and tag == 'td':
if self.index_td == 5:
self.write = "size"
elif self.index_td == 7:
self.write = "seeds"
elif self.index_td == 8:
self.write = "leech"
self.index_td += 1
# search for torrent link by classes r0 or r1
if self.torrent_row and tag == 'a':
if self.index_td == 3:
self.torrent['desc_link'] = self.url + attrs[1][1]
self.write = "name"
if self.index_td == 5:
self.torrent['link'] = self.url + attrs[0][1]
# search for right div with class paginator
if self.found_torrents == 50 and tag == 'span':
for name, value in attrs:
if name == 'class' and value == 'nav':
self.paginator = True
# search for block with page numbers
if self.paginator and tag == 'a':
# if not self.pages:
# parsing for search_id
# self.search_id = parse_qs(attrs[0][1].split('?')[1])['search_id']
self.pages += 1
def handle_endtag(self, tag):
# detecting that torrent row is closed and print all collected data
if self.torrent_row and tag == 'tr':
self.torrent["engine_url"] = self.url
logging.debug('torrent row: ' + str(self.torrent))
prettyPrinter(self.torrent)
self.torrent = {key: '' for key in self.torrent}
self.index_td = 0
self.torrent_row = False
self.found_torrents += 1
# detecting that table with result is close
if self.result_table and tag == 'table':
self.result_table = False
# detecting that we found all pagination
if self.paginator and tag == 'span':
self.paginator = False
def handle_data(self, data: str):
# detecting that we need write data at this moment
if self.write and self.result_table:
if data.startswith('<b>'):
data = data[3:-5]
if self.index_td == 5:
data = data.split('</u>')[1].strip()
self.torrent[self.write] = data.strip()
self.write = None
def error(self, message):
pass
def download_torrent(self, url): def download_torrent(self, url):
if self.blocked: if self.blocked:
@ -227,27 +126,37 @@ class nnmclub(object):
logging.debug(path + " " + url) logging.debug(path + " " + url)
print(path + " " + url) print(path + " " + url)
def searching(self, query, first=False):
response = self._catch_error_request(query)
page = response.read().decode('cp1251')
self.draw(page)
total = int(re.search(r'\(max:\s(\d{1,3})\)', page)[1]) if first else -1
return total
def search(self, what, cat='all'): def search(self, what, cat='all'):
if self.blocked: if self.blocked:
return return
c = self.supported_categories[cat] c = self.supported_categories[cat]
query = '{}tracker.php?nm={}&{}'.format(self.url, what.replace(" ", "+"), "f=-1" if c == '-1' else "c=" + c) query = '{}tracker.php?nm={}&{}'.format(self.url, what.replace(" ", "+"), "f=-1" if c == '-1' else "c=" + c)
response = self._catch_error_request(query)
parser = self.WorstParser(self.url) # make first request (maybe it enough)
parser.feed(response.read().decode('cp1251')) total = self.searching(query, True)
parser.close() # do async requests
if total > 50:
# if first request return that we have pages, we do cycle tasks = []
if parser.pages: for x in range(1, math.ceil(total / 50)):
for x in range(1, parser.pages): task = threading.Thread(target=self.searching, args=(query + "&start={}".format(x * 50),))
response = self._catch_error_request('{}&start={}'.format(query, # &search_id= tasks.append(task)
# parser.search_id, task.start()
parser.found_torrents))
parser.feed(response.read().decode('cp1251')) # wait slower request in stack
parser.close() for task in tasks:
task.join()
del tasks
logging.debug("--- {} seconds ---".format(time.time() - start_time)) logging.debug("--- {} seconds ---".format(time.time() - start_time))
logging.info("Found torrents: {}".format(parser.found_torrents)) logging.info("Found torrents: {}".format(total))
def _catch_error_request(self, url='', data=None): def _catch_error_request(self, url='', data=None):
url = url if url else self.url url = url if url else self.url

Loading…
Cancel
Save