|
|
|
@ -1,20 +1,21 @@
@@ -1,20 +1,21 @@
|
|
|
|
|
# VERSION: 1.3 |
|
|
|
|
# VERSION: 2.0 |
|
|
|
|
# AUTHORS: imDMG [imdmgg@gmail.com] |
|
|
|
|
|
|
|
|
|
# Kinozal.tv search engine plugin for qBittorrent |
|
|
|
|
|
|
|
|
|
import tempfile |
|
|
|
|
import os |
|
|
|
|
import logging |
|
|
|
|
import json |
|
|
|
|
# import re |
|
|
|
|
import logging |
|
|
|
|
import math |
|
|
|
|
import os |
|
|
|
|
import re |
|
|
|
|
import tempfile |
|
|
|
|
import threading |
|
|
|
|
import time |
|
|
|
|
|
|
|
|
|
from urllib.request import build_opener, HTTPCookieProcessor, ProxyHandler |
|
|
|
|
from urllib.parse import urlencode |
|
|
|
|
from urllib.error import URLError, HTTPError |
|
|
|
|
from http.cookiejar import CookieJar |
|
|
|
|
from html.parser import HTMLParser |
|
|
|
|
from novaprinter import prettyPrinter |
|
|
|
|
|
|
|
|
|
# setup logging into qBittorrent/logs |
|
|
|
@ -24,7 +25,6 @@ logging.basicConfig(level=logging.INFO,
@@ -24,7 +25,6 @@ logging.basicConfig(level=logging.INFO,
|
|
|
|
|
filename=os.path.abspath(os.path.join(os.path.dirname(__file__), '../../logs', 'kinozal.log')), |
|
|
|
|
filemode='w') |
|
|
|
|
|
|
|
|
|
# benchmark |
|
|
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -51,6 +51,8 @@ class kinozal(object):
@@ -51,6 +51,8 @@ class kinozal(object):
|
|
|
|
|
raise e |
|
|
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
|
logging.info('Initialisation') |
|
|
|
|
self.result = [] |
|
|
|
|
# establish connection |
|
|
|
|
# |
|
|
|
|
# make cookie |
|
|
|
@ -79,113 +81,30 @@ class kinozal(object):
@@ -79,113 +81,30 @@ class kinozal(object):
|
|
|
|
|
else: |
|
|
|
|
logging.info('We successfully authorized') |
|
|
|
|
|
|
|
|
|
class WorstParser(HTMLParser): |
|
|
|
|
def __init__(self, url=''): |
|
|
|
|
HTMLParser.__init__(self) |
|
|
|
|
self.url = url |
|
|
|
|
self.torrent = {'link': '', |
|
|
|
|
'name': '', |
|
|
|
|
'size': '', |
|
|
|
|
'seeds': '', |
|
|
|
|
'leech': '', |
|
|
|
|
'desc_link': '', } |
|
|
|
|
|
|
|
|
|
# we need a page markup to know when stop and collect data, |
|
|
|
|
# because available methods, in this class, do not communicate each other |
|
|
|
|
# as a result, we make markup to transfer information |
|
|
|
|
# from one method to another, along a chain |
|
|
|
|
# |
|
|
|
|
# markup on result table |
|
|
|
|
self.result_table = False # table with results is found |
|
|
|
|
self.torrent_row = False # found torrent row for collect data |
|
|
|
|
self.index_td = 0 # td counter in torrent row |
|
|
|
|
self.write = None # trigger to detecting when to collect data |
|
|
|
|
|
|
|
|
|
# markup pagination |
|
|
|
|
self.paginator = False # found more pages in result |
|
|
|
|
self.pages = 0 # page counter |
|
|
|
|
|
|
|
|
|
self.found_torrents = 0 |
|
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attrs): |
|
|
|
|
# search result table by class t_peer |
|
|
|
|
if tag == 'table': |
|
|
|
|
for name, value in attrs: |
|
|
|
|
if name == 'class' and 't_peer' in value: |
|
|
|
|
self.result_table = True |
|
|
|
|
|
|
|
|
|
# search for torrent row by class bg |
|
|
|
|
if self.result_table and tag == 'tr': |
|
|
|
|
for name, value in attrs: |
|
|
|
|
if name == 'class' and 'bg' in value: |
|
|
|
|
self.torrent_row = True |
|
|
|
|
|
|
|
|
|
# count td for find right td |
|
|
|
|
if self.torrent_row and tag == 'td': |
|
|
|
|
if self.index_td == 3: |
|
|
|
|
self.write = "size" |
|
|
|
|
elif self.index_td == 4: |
|
|
|
|
self.write = "seeds" |
|
|
|
|
elif self.index_td == 5: |
|
|
|
|
self.write = "leech" |
|
|
|
|
|
|
|
|
|
self.index_td += 1 |
|
|
|
|
|
|
|
|
|
# search for torrent link by classes r0 or r1 |
|
|
|
|
if self.torrent_row and tag == 'a': |
|
|
|
|
for name, value in attrs: |
|
|
|
|
if name == 'class' and 'r' in value: |
|
|
|
|
self.torrent['link'] = 'http://dl.kinozal.tv/download.php?id=' + attrs[0][1].split('=')[1] |
|
|
|
|
self.torrent['desc_link'] = self.url + attrs[0][1] |
|
|
|
|
self.write = "name" |
|
|
|
|
|
|
|
|
|
# search for right div with class paginator |
|
|
|
|
if self.found_torrents == 50 and tag == 'div': |
|
|
|
|
for name, value in attrs: |
|
|
|
|
if name == 'class' and value == 'paginator': |
|
|
|
|
self.paginator = True |
|
|
|
|
|
|
|
|
|
# search for block with page numbers |
|
|
|
|
if self.paginator and tag == 'li': |
|
|
|
|
self.pages += 1 |
|
|
|
|
|
|
|
|
|
def handle_endtag(self, tag): |
|
|
|
|
# detecting that torrent row is closed and print all collected data |
|
|
|
|
if self.torrent_row and tag == 'tr': |
|
|
|
|
self.torrent["engine_url"] = self.url |
|
|
|
|
logging.debug('self.torrent: ' + str(self.torrent)) |
|
|
|
|
prettyPrinter(self.torrent) |
|
|
|
|
self.torrent = {key: '' for key in self.torrent} |
|
|
|
|
self.index_td = 0 |
|
|
|
|
self.torrent_row = False |
|
|
|
|
self.found_torrents += 1 |
|
|
|
|
|
|
|
|
|
# detecting that table with result is close |
|
|
|
|
if self.result_table and tag == 'table': |
|
|
|
|
self.result_table = False |
|
|
|
|
|
|
|
|
|
# detecting that we found all pagination |
|
|
|
|
if self.paginator and tag == 'ul': |
|
|
|
|
self.paginator = False |
|
|
|
|
|
|
|
|
|
def handle_data(self, data: str): |
|
|
|
|
# detecting that we need write data at this moment |
|
|
|
|
if self.write and self.result_table: |
|
|
|
|
if self.write == 'size': |
|
|
|
|
data = self.units_convert(data) |
|
|
|
|
self.torrent[self.write] = data.strip() |
|
|
|
|
self.write = None |
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
def units_convert(unit): |
|
|
|
|
# replace size units |
|
|
|
|
find = unit.split()[1] |
|
|
|
|
replace = {'ТБ': 'TB', 'ГБ': 'GB', 'МБ': 'MB', 'КБ': 'KB'}[find] |
|
|
|
|
|
|
|
|
|
return unit.replace(find, replace) |
|
|
|
|
|
|
|
|
|
def error(self, message): |
|
|
|
|
pass |
|
|
|
|
def draw(self, html: str): |
|
|
|
|
torrents = re.findall(r'nam"><a\s+?href="(.+?)"\s+?class="r\d">(.*?)</a>' |
|
|
|
|
r'.+?s\'>.+?s\'>(.*?)<.+?sl_s\'>(\d+)<.+?sl_p\'>(\d+)<', html, re.S) |
|
|
|
|
|
|
|
|
|
for tor in torrents: |
|
|
|
|
torrent = {"engine_url": self.url, |
|
|
|
|
"desc_link": tor[0], |
|
|
|
|
"name": tor[1], |
|
|
|
|
"link": 'http://dl.kinozal.tv/download.php?id=' + tor[0].split('=')[1], |
|
|
|
|
"size": self.units_convert(tor[2]), |
|
|
|
|
"seeds": tor[3], |
|
|
|
|
"leech": tor[4]} |
|
|
|
|
|
|
|
|
|
prettyPrinter(torrent) |
|
|
|
|
del torrents |
|
|
|
|
# return len(torrents) |
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
def units_convert(unit): |
|
|
|
|
# replace size units |
|
|
|
|
find = unit.split()[1] |
|
|
|
|
replace = {'ТБ': 'TB', 'ГБ': 'GB', 'МБ': 'MB', 'КБ': 'KB'}[find] |
|
|
|
|
|
|
|
|
|
return unit.replace(find, replace) |
|
|
|
|
|
|
|
|
|
def download_torrent(self, url: str): |
|
|
|
|
if self.blocked: |
|
|
|
@ -214,24 +133,54 @@ class kinozal(object):
@@ -214,24 +133,54 @@ class kinozal(object):
|
|
|
|
|
logging.debug(path + " " + url) |
|
|
|
|
print(path + " " + url) |
|
|
|
|
|
|
|
|
|
def search(self, what, cat='all'): |
|
|
|
|
def searching(self, query, first=False): |
|
|
|
|
response = self._catch_error_request(query) |
|
|
|
|
page = response.read().decode('cp1251') |
|
|
|
|
self.draw(page) |
|
|
|
|
total = int(re.search(r'</span>Найдено\s+?(\d+)\s+?раздач', page)[1]) if first else -1 |
|
|
|
|
|
|
|
|
|
return total |
|
|
|
|
|
|
|
|
|
def search_old(self, what, cat='all'): |
|
|
|
|
if self.blocked: |
|
|
|
|
return |
|
|
|
|
query = '{}/browse.php?s={}&c={}'.format(self.url, what.replace(" ", "+"), self.supported_categories[cat]) |
|
|
|
|
response = self._catch_error_request(query) |
|
|
|
|
parser = self.WorstParser(self.url) |
|
|
|
|
parser.feed(response.read().decode('cp1251')) |
|
|
|
|
parser.close() |
|
|
|
|
total, current = -1, 0 |
|
|
|
|
while total != current: |
|
|
|
|
query = '{}/browse.php?s={}&c={}&page={}'.format(self.url, what.replace(" ", "+"), |
|
|
|
|
self.supported_categories[cat], |
|
|
|
|
math.ceil(current / 50)) |
|
|
|
|
response = self._catch_error_request(query) |
|
|
|
|
page = response.read().decode('cp1251') |
|
|
|
|
if total == -1: |
|
|
|
|
total = int(re.search(r'</span>Найдено\s+?(\d+)\s+?раздач</td>', page)[1]) |
|
|
|
|
current += self.draw(page) |
|
|
|
|
|
|
|
|
|
# if first request return that we have pages, we do cycle |
|
|
|
|
if parser.pages: |
|
|
|
|
for x in range(1, parser.pages): |
|
|
|
|
response = self._catch_error_request('{}&page={}'.format(query, x)) |
|
|
|
|
parser.feed(response.read().decode('cp1251')) |
|
|
|
|
parser.close() |
|
|
|
|
logging.debug("--- {} seconds ---".format(time.time() - start_time)) |
|
|
|
|
logging.info("Found torrents: {}".format(total)) |
|
|
|
|
|
|
|
|
|
def search(self, what, cat='all'): |
|
|
|
|
if self.blocked: |
|
|
|
|
return |
|
|
|
|
query = '{}/browse.php?s={}&c={}'.format(self.url, what.replace(" ", "+"), |
|
|
|
|
self.supported_categories[cat]) |
|
|
|
|
|
|
|
|
|
# make first request (maybe it enough) |
|
|
|
|
total = self.searching(query, True) |
|
|
|
|
# do async requests |
|
|
|
|
if total > 50: |
|
|
|
|
tasks = [] |
|
|
|
|
for x in range(1, math.ceil(total / 50)): |
|
|
|
|
task = threading.Thread(target=self.searching, args=(query + "&page={}".format(x),)) |
|
|
|
|
tasks.append(task) |
|
|
|
|
task.start() |
|
|
|
|
|
|
|
|
|
# wait slower request in stack |
|
|
|
|
for task in tasks: |
|
|
|
|
task.join() |
|
|
|
|
del tasks |
|
|
|
|
|
|
|
|
|
logging.debug("--- {} seconds ---".format(time.time() - start_time)) |
|
|
|
|
logging.info("Found torrents: {}".format(parser.found_torrents)) |
|
|
|
|
logging.info("Found torrents: {}".format(total)) |
|
|
|
|
|
|
|
|
|
def _catch_error_request(self, url='', data=None): |
|
|
|
|
url = url if url else self.url |
|
|
|
@ -258,6 +207,8 @@ class kinozal(object):
@@ -258,6 +207,8 @@ class kinozal(object):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
# f = open("result.html", "r") |
|
|
|
|
kinozal_se = kinozal() |
|
|
|
|
# kinozal_se.download_torrent("http://kinozal.tv/details.php?id=1263407") |
|
|
|
|
# kinozal_se.search('supernatural') |
|
|
|
|
# kinozal_se.draw(f.read()) |
|
|
|
|
kinozal_se.search('doctor') |
|
|
|
|
print("--- %s seconds ---" % (time.time() - start_time)) |
|
|
|
|