diff --git a/src/searchengine/nova/engines/legittorrents.py b/src/searchengine/nova/engines/legittorrents.py
index 6ae66e070..59c9f2d15 100644
--- a/src/searchengine/nova/engines/legittorrents.py
+++ b/src/searchengine/nova/engines/legittorrents.py
@@ -1,5 +1,6 @@
-#VERSION: 1.05
+#VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
+# Douman (custparasite@gmx.se)
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
@@ -28,78 +29,73 @@
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
-import sgmllib
-import re
+from HTMLParser import HTMLParser
+from re import compile as re_compile
class legittorrents(object):
- url = 'http://www.legittorrents.info'
- name = 'Legit Torrents'
- supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
+ url = 'http://www.legittorrents.info'
+ name = 'Legit Torrents'
+ supported_categories = {'all': '0', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
- def download_torrent(self, info):
- print download_file(info)
+ def download_torrent(self, info):
+ print(download_file(info))
- class SimpleSGMLParser(sgmllib.SGMLParser):
- def __init__(self, results, url, *args):
- sgmllib.SGMLParser.__init__(self)
- self.url = url
- self.td_counter = None
- self.current_item = None
- self.start_name = False
- self.results = results
+ class MyHtmlParseWithBlackJack(HTMLParser):
+ """ Parser class """
+ def __init__(self, url):
+ HTMLParser.__init__(self)
+ self.url = url
+ self.current_item = None
+ self.save_item_key = None
- def start_a(self, attr):
- params = dict(attr)
- if params.has_key('href') and params['href'].startswith('download.php?'):
- self.current_item['link'] = self.url + '/' + params['href'].strip()
- elif params.has_key('href') and params['href'].startswith('index.php?page=torrent-details'):
- self.current_item = {}
- self.td_counter = 0
- self.current_item['desc_link'] = self.url + '/' + params['href'].strip()
+ def handle_starttag(self, tag, attrs):
+ """ Parser's start tag handler """
+ if self.current_item:
+ params = dict(attrs)
+ if tag == "a":
+ link = params["href"]
+ if link.startswith("index") and "title" in params:
+ #description link
+ self.current_item["name"] = params["title"][14:]
+ self.current_item["desc_link"] = "/".join((self.url, link))
+ elif link.startswith("download"):
+ self.current_item["link"] = "/".join((self.url, link))
+ elif tag == "td":
+ if "class" in params and params["class"].startswith("#FF"):
+ self.save_item_key = "leech" if "seeds" in self.current_item else "seeds"
- def handle_data(self, data):
- if self.td_counter == 0:
- if not self.current_item.has_key('name'):
- self.current_item['name'] = data.strip()
- elif self.td_counter == 3:
- if not self.current_item.has_key('seeds'):
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 4:
- if not self.current_item.has_key('leech'):
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
+ elif tag == "tr":
+ self.current_item = {}
+ self.current_item["size"] = ""
+ self.current_item["engine_url"] = self.url
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 5:
- self.td_counter = None
- # Display item
- if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- self.current_item['size'] = ''
- prettyPrinter(self.current_item)
- self.results.append('a')
+ def handle_endtag(self, tag):
+ """ Parser's end tag handler """
+ if self.current_item and tag == "tr":
+ if len(self.current_item) > 4:
+ prettyPrinter(self.current_item)
+ self.current_item = None
+
+ def handle_data(self, data):
+ """ Parser's data handler """
+ if self.save_item_key:
+ self.current_item[self.save_item_key] = data.strip()
+ self.save_item_key = None
+
+ def search(self, what, cat='all'):
+ """ Performs search """
+ query = "".join((self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, '0'), "&active=1"))
+
+ get_table = re_compile('(?s)
')
+ data = get_table.search(retrieve_url(query)).group(0)
+ #extract first ten pages of next results
+ next_pages = re_compile('(?m)')
+ next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]]
- def search(self, what, cat='all'):
- ret = []
- i = 1
- while True and i<11:
- results = []
- parser = self.SimpleSGMLParser(results, self.url)
- dat = retrieve_url(self.url+'/index.php?page=torrents&search=%s&category=%s&active=1&order=3&by=2&pages=%d'%(what, self.supported_categories[cat], i))
- results_re = re.compile('(?s).*')
- for match in results_re.finditer(dat):
- res_tab = match.group(0)
- parser.feed(res_tab)
+ parser = self.MyHtmlParseWithBlackJack(self.url)
+ parser.feed(data)
parser.close()
- break
- if len(results) <= 0:
- break
- i += 1
+ for page in next_pages:
+ parser.feed(get_table.search(retrieve_url(page)).group(0))
+ parser.close()
diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt
index a1170497f..25b2da2a0 100644
--- a/src/searchengine/nova/engines/versions.txt
+++ b/src/searchengine/nova/engines/versions.txt
@@ -2,7 +2,7 @@ btdigg: 1.25
demonoid: 1.1
extratorrent: 2.0
kickasstorrents: 1.27
-legittorrents: 1.05
+legittorrents: 2.00
mininova: 2.00
piratebay: 2.11
torrentreactor: 1.36
diff --git a/src/searchengine/nova3/engines/legittorrents.py b/src/searchengine/nova3/engines/legittorrents.py
index 40c40decc..c3e9bf20a 100644
--- a/src/searchengine/nova3/engines/legittorrents.py
+++ b/src/searchengine/nova3/engines/legittorrents.py
@@ -1,5 +1,6 @@
-#VERSION: 1.05
+#VERSION: 2.00
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
+# Douman (custparasite@gmx.se)
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
@@ -28,78 +29,73 @@
from novaprinter import prettyPrinter
from helpers import retrieve_url, download_file
-import sgmllib3 as sgmllib
-import re
+from html.parser import HTMLParser
+from re import compile as re_compile
class legittorrents(object):
- url = 'http://www.legittorrents.info'
- name = 'Legit Torrents'
- supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
+ url = 'http://www.legittorrents.info'
+ name = 'Legit Torrents'
+ supported_categories = {'all': '0', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
- def download_torrent(self, info):
- print(download_file(info))
+ def download_torrent(self, info):
+ print(download_file(info))
- class SimpleSGMLParser(sgmllib.SGMLParser):
- def __init__(self, results, url, *args):
- sgmllib.SGMLParser.__init__(self)
- self.url = url
- self.td_counter = None
- self.current_item = None
- self.start_name = False
- self.results = results
+ class MyHtmlParseWithBlackJack(HTMLParser):
+ """ Parser class """
+ def __init__(self, url):
+ HTMLParser.__init__(self)
+ self.url = url
+ self.current_item = None
+ self.save_item_key = None
- def start_a(self, attr):
- params = dict(attr)
- if 'href' in params and params['href'].startswith('download.php?'):
- self.current_item['link'] = self.url + '/' + params['href'].strip()
- elif 'href' in params and params['href'].startswith('index.php?page=torrent-details'):
- self.current_item = {}
- self.td_counter = 0
- self.current_item['desc_link'] = self.url + '/' + params['href'].strip()
+ def handle_starttag(self, tag, attrs):
+ """ Parser's start tag handler """
+ if self.current_item:
+ params = dict(attrs)
+ if tag == "a":
+ link = params["href"]
+ if link.startswith("index") and "title" in params:
+ #description link
+ self.current_item["name"] = params["title"][14:]
+ self.current_item["desc_link"] = "/".join((self.url, link))
+ elif link.startswith("download"):
+ self.current_item["link"] = "/".join((self.url, link))
+ elif tag == "td":
+ if "class" in params and params["class"].startswith("#FF"):
+ self.save_item_key = "leech" if "seeds" in self.current_item else "seeds"
- def handle_data(self, data):
- if self.td_counter == 0:
- if 'name' not in self.current_item:
- self.current_item['name'] = data.strip()
- elif self.td_counter == 3:
- if 'seeds' not in self.current_item:
- self.current_item['seeds'] = ''
- self.current_item['seeds']+= data.strip()
- elif self.td_counter == 4:
- if 'leech' not in self.current_item:
- self.current_item['leech'] = ''
- self.current_item['leech']+= data.strip()
+ elif tag == "tr":
+ self.current_item = {}
+ self.current_item["size"] = ""
+ self.current_item["engine_url"] = self.url
- def start_td(self,attr):
- if isinstance(self.td_counter,int):
- self.td_counter += 1
- if self.td_counter > 5:
- self.td_counter = None
- # Display item
- if self.current_item:
- self.current_item['engine_url'] = self.url
- if not self.current_item['seeds'].isdigit():
- self.current_item['seeds'] = 0
- if not self.current_item['leech'].isdigit():
- self.current_item['leech'] = 0
- self.current_item['size'] = ''
- prettyPrinter(self.current_item)
- self.results.append('a')
+ def handle_endtag(self, tag):
+ """ Parser's end tag handler """
+ if self.current_item and tag == "tr":
+ if len(self.current_item) > 4:
+ prettyPrinter(self.current_item)
+ self.current_item = None
+
+ def handle_data(self, data):
+ """ Parser's data handler """
+ if self.save_item_key:
+ self.current_item[self.save_item_key] = data.strip()
+ self.save_item_key = None
+
+ def search(self, what, cat='all'):
+ """ Performs search """
+ query = "".join((self.url, "/index.php?page=torrents&search=", what, "&category=", self.supported_categories.get(cat, '0'), "&active=1"))
+
+ get_table = re_compile('(?s)')
+ data = get_table.search(retrieve_url(query)).group(0)
+ #extract first ten pages of next results
+ next_pages = re_compile('(?m)')
+ next_pages = ["".join((self.url, page)) for page in next_pages.findall(data)[:10]]
- def search(self, what, cat='all'):
- ret = []
- i = 1
- while True and i<11:
- results = []
- parser = self.SimpleSGMLParser(results, self.url)
- dat = retrieve_url(self.url+'/index.php?page=torrents&search=%s&category=%s&active=1&order=3&by=2&pages=%d'%(what, self.supported_categories[cat], i))
- results_re = re.compile('(?s).*')
- for match in results_re.finditer(dat):
- res_tab = match.group(0)
- parser.feed(res_tab)
+ parser = self.MyHtmlParseWithBlackJack(self.url)
+ parser.feed(data)
parser.close()
- break
- if len(results) <= 0:
- break
- i += 1
+ for page in next_pages:
+ parser.feed(get_table.search(retrieve_url(page)).group(0))
+ parser.close()
diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt
index a1170497f..25b2da2a0 100644
--- a/src/searchengine/nova3/engines/versions.txt
+++ b/src/searchengine/nova3/engines/versions.txt
@@ -2,7 +2,7 @@ btdigg: 1.25
demonoid: 1.1
extratorrent: 2.0
kickasstorrents: 1.27
-legittorrents: 1.05
+legittorrents: 2.00
mininova: 2.00
piratebay: 2.11
torrentreactor: 1.36