Browse Source

v2: regex, threading - speeding up

master
imDMG 6 years ago
parent
commit
66270f4ee4
  1. 2
      kinozal.py
  2. 189
      nnmclub.py

2
kinozal.py

@ -87,7 +87,7 @@ class kinozal(object): @@ -87,7 +87,7 @@ class kinozal(object):
for tor in torrents:
torrent = {"engine_url": self.url,
"desc_link": tor[0],
"desc_link": self.url + tor[0],
"name": tor[1],
"link": 'http://dl.kinozal.tv/download.php?id=' + tor[0].split('=')[1],
"size": self.units_convert(tor[2]),

189
nnmclub.py

@ -1,19 +1,21 @@ @@ -1,19 +1,21 @@
# VERSION: 1.2
# VERSION: 2.0
# AUTHORS: imDMG [imdmgg@gmail.com]
# NoNaMe-Club search engine plugin for qBittorrent
import tempfile
import os
import logging
import json
import logging
import math
import os
import re
import tempfile
import threading
import time
from urllib.request import build_opener, HTTPCookieProcessor, ProxyHandler
from urllib.parse import urlencode # , parse_qs
from urllib.error import URLError, HTTPError
from http.cookiejar import Cookie, CookieJar
from html.parser import HTMLParser
from novaprinter import prettyPrinter
# setup logging into qBittorrent/logs
@ -43,7 +45,7 @@ class nnmclub(object): @@ -43,7 +45,7 @@ class nnmclub(object):
try:
# try to load user data from file
with open(os.path.abspath(os.path.join(os.path.dirname(__file__), 'nnmclub.json'))) as f:
config = json.load(f)
config: dict = json.load(f)
except OSError as e:
# file not found
logging.error(e)
@ -73,14 +75,11 @@ class nnmclub(object): @@ -73,14 +75,11 @@ class nnmclub(object):
response = self._catch_error_request(self.url + 'login.php')
if not self.blocked:
parser = self.WorstParser(self.url, True)
parser.feed(response.read().decode('cp1251'))
parser.close()
code = re.search(r'code"\svalue="(.+?)"', response.read().decode('cp1251'))[1]
form_data = {"username": self.config['username'],
"password": self.config['password'],
"autologin": "on",
"code": parser.login_code,
"code": code,
"login": "Вход"}
# so we first encode keys to cp1251 then do default decode whole string
data_encoded = urlencode({k: v.encode('cp1251') for k, v in form_data.items()}).encode()
@ -92,122 +91,22 @@ class nnmclub(object): @@ -92,122 +91,22 @@ class nnmclub(object):
else:
logging.info('We successfully authorized')
class WorstParser(HTMLParser):
def __init__(self, url='', login=False):
HTMLParser.__init__(self)
self.url = url
self.login = login
self.torrent = {'link': '',
'name': '',
'size': '',
'seeds': '',
'leech': '',
'desc_link': '', }
self.login_code = None
# we need a page markup to know when stop and collect data,
# because available methods, in this class, do not communicate each other
# as a result, we make markup to transfer information
# from one method to another, along a chain
#
# markup on result table
self.result_table = False # table with results is found
self.torrent_row = False # found torrent row for collect data
self.index_td = 0 # td counter in torrent row
self.write = None # trigger to detecting when to collect data
# markup pagination
self.paginator = False # found more pages in result
self.pages = 0 # page counter
self.search_id = 0
self.found_torrents = 0
def handle_starttag(self, tag, attrs):
# login
if self.login and tag == 'input':
tmp = dict(attrs)
if tmp.get('name') == 'code':
self.login_code = tmp['value']
return
def draw(self, html: str):
torrents = re.findall(r'd\stopic.+?href="(.+?)".+?<b>(.+?)</b>.+?href="(d.+?)"'
r'.+?/u>\s(.+?)<.+?b>(\d+)</.+?b>(\d+)<', html, re.S)
for tor in torrents:
torrent = {"engine_url": self.url,
"desc_link": self.url + tor[0],
"name": tor[1],
"link": self.url + tor[2],
"size": tor[3].replace(',', '.'),
"seeds": tor[4],
"leech": tor[5]}
# search result table by class tablesorter
if tag == 'table':
for name, value in attrs:
if name == 'class' and 'tablesorter' in value:
self.result_table = True
# search for torrent row by class prow
if self.result_table and tag == 'tr':
for name, value in attrs:
if name == 'class' and 'prow' in value:
self.torrent_row = True
# count td for find right td
if self.torrent_row and tag == 'td':
if self.index_td == 5:
self.write = "size"
elif self.index_td == 7:
self.write = "seeds"
elif self.index_td == 8:
self.write = "leech"
self.index_td += 1
# search for torrent link by classes r0 or r1
if self.torrent_row and tag == 'a':
if self.index_td == 3:
self.torrent['desc_link'] = self.url + attrs[1][1]
self.write = "name"
if self.index_td == 5:
self.torrent['link'] = self.url + attrs[0][1]
# search for right div with class paginator
if self.found_torrents == 50 and tag == 'span':
for name, value in attrs:
if name == 'class' and value == 'nav':
self.paginator = True
# search for block with page numbers
if self.paginator and tag == 'a':
# if not self.pages:
# parsing for search_id
# self.search_id = parse_qs(attrs[0][1].split('?')[1])['search_id']
self.pages += 1
def handle_endtag(self, tag):
# detecting that torrent row is closed and print all collected data
if self.torrent_row and tag == 'tr':
self.torrent["engine_url"] = self.url
logging.debug('torrent row: ' + str(self.torrent))
prettyPrinter(self.torrent)
self.torrent = {key: '' for key in self.torrent}
self.index_td = 0
self.torrent_row = False
self.found_torrents += 1
# detecting that table with result is close
if self.result_table and tag == 'table':
self.result_table = False
# detecting that we found all pagination
if self.paginator and tag == 'span':
self.paginator = False
def handle_data(self, data: str):
# detecting that we need write data at this moment
if self.write and self.result_table:
if data.startswith('<b>'):
data = data[3:-5]
if self.index_td == 5:
data = data.split('</u>')[1].strip()
self.torrent[self.write] = data.strip()
self.write = None
def error(self, message):
pass
prettyPrinter(torrent)
del torrents
# return len(torrents)
def download_torrent(self, url):
if self.blocked:
@ -227,27 +126,37 @@ class nnmclub(object): @@ -227,27 +126,37 @@ class nnmclub(object):
logging.debug(path + " " + url)
print(path + " " + url)
def searching(self, query, first=False):
response = self._catch_error_request(query)
page = response.read().decode('cp1251')
self.draw(page)
total = int(re.search(r'\(max:\s(\d{1,3})\)', page)[1]) if first else -1
return total
def search(self, what, cat='all'):
if self.blocked:
return
c = self.supported_categories[cat]
query = '{}tracker.php?nm={}&{}'.format(self.url, what.replace(" ", "+"), "f=-1" if c == '-1' else "c=" + c)
response = self._catch_error_request(query)
parser = self.WorstParser(self.url)
parser.feed(response.read().decode('cp1251'))
parser.close()
# if first request return that we have pages, we do cycle
if parser.pages:
for x in range(1, parser.pages):
response = self._catch_error_request('{}&start={}'.format(query, # &search_id=
# parser.search_id,
parser.found_torrents))
parser.feed(response.read().decode('cp1251'))
parser.close()
# make first request (maybe it enough)
total = self.searching(query, True)
# do async requests
if total > 50:
tasks = []
for x in range(1, math.ceil(total / 50)):
task = threading.Thread(target=self.searching, args=(query + "&start={}".format(x * 50),))
tasks.append(task)
task.start()
# wait slower request in stack
for task in tasks:
task.join()
del tasks
logging.debug("--- {} seconds ---".format(time.time() - start_time))
logging.info("Found torrents: {}".format(parser.found_torrents))
logging.info("Found torrents: {}".format(total))
def _catch_error_request(self, url='', data=None):
url = url if url else self.url

Loading…
Cancel
Save