From a2e9210665babdde44c9c0d7b1bc46ad9652e59d Mon Sep 17 00:00:00 2001 From: Christophe Dumez Date: Thu, 26 Mar 2009 16:49:44 +0000 Subject: [PATCH] - Fixed unicode in mininova and btjunkie search engines --- src/search.qrc | 3 +- src/searchEngine.cpp | 8 ++++ src/search_engine/engines/btjunkie.py | 16 +++---- src/search_engine/engines/mininova.py | 11 +++-- src/search_engine/engines/versions.txt | 4 +- src/search_engine/helpers.py | 59 ++++++++++++++++++++++++++ src/search_engine/nova2.py | 8 ++-- src/search_engine/novaprinter.py | 8 ++-- 8 files changed, 95 insertions(+), 22 deletions(-) create mode 100644 src/search_engine/helpers.py diff --git a/src/search.qrc b/src/search.qrc index 074110bca..735122372 100644 --- a/src/search.qrc +++ b/src/search.qrc @@ -2,6 +2,7 @@ search_engine/nova2.py search_engine/novaprinter.py + search_engine/helpers.py search_engine/engines/btjunkie.png search_engine/engines/btjunkie.py search_engine/engines/isohunt.png @@ -13,4 +14,4 @@ search_engine/engines/torrentreactor.png search_engine/engines/torrentreactor.py - \ No newline at end of file + diff --git a/src/searchEngine.cpp b/src/searchEngine.cpp index ca0bf7e57..fb6d8c3a4 100644 --- a/src/searchEngine.cpp +++ b/src/searchEngine.cpp @@ -303,6 +303,14 @@ void SearchEngine::updateNova() { } QFile::copy(":/search_engine/novaprinter.py", filePath); } + QFile(misc::qBittorrentPath()+"search_engine"+QDir::separator()+"helpers.py").setPermissions(perm); + filePath = misc::qBittorrentPath()+"search_engine"+QDir::separator()+"helpers.py"; + if(misc::getPluginVersion(":/search_engine/helpers.py") > misc::getPluginVersion(filePath)) { + if(QFile::exists(filePath)){ + QFile::remove(filePath); + } + QFile::copy(":/search_engine/helpers.py", filePath); + } QString destDir = misc::qBittorrentPath()+"search_engine"+QDir::separator()+"engines"+QDir::separator(); QDir shipped_subDir(":/search_engine/engines/"); QStringList files = shipped_subDir.entryList(); diff --git a/src/search_engine/engines/btjunkie.py b/src/search_engine/engines/btjunkie.py index 4febeddcb..57d663509 100644 --- a/src/search_engine/engines/btjunkie.py +++ b/src/search_engine/engines/btjunkie.py @@ -1,4 +1,4 @@ -#VERSION: 2.0 +#VERSION: 2.1 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) # Redistribution and use in source and binary forms, with or without @@ -27,8 +27,8 @@ from novaprinter import prettyPrinter +from helpers import retrieve_url import sgmllib -import urllib import re class btjunkie(object): @@ -72,11 +72,6 @@ class btjunkie(object): if not self.current_item.has_key('leech'): self.current_item['leech'] = '' self.current_item['leech']+= data.strip() - - def start_font(self, attr): - if isinstance(self.th_counter,int): - if self.th_counter == 0: - self.current_item['name'] += ' ' def start_th(self,attr): if isinstance(self.th_counter,int): @@ -99,7 +94,12 @@ class btjunkie(object): while True and i<11: results = [] parser = self.SimpleSGMLParser(results, self.url) - dat = urllib.urlopen(self.url+'/search?q=%s&o=52&p=%d'%(what,i)).read() + dat = retrieve_url(self.url+'/search?q=%s&o=52&p=%d'%(what,i)) + # Remove tags from page + p = re.compile( '<[/]?font.*?>') + dat = p.sub('', dat) + #print dat + #return results_re = re.compile('(?s)class="tab_results">.*') for match in results_re.finditer(dat): res_tab = match.group(0) diff --git a/src/search_engine/engines/mininova.py b/src/search_engine/engines/mininova.py index cbd07ed0f..400b155cd 100644 --- a/src/search_engine/engines/mininova.py +++ b/src/search_engine/engines/mininova.py @@ -1,4 +1,4 @@ -#VERSION: 1.13 +#VERSION: 1.2 #AUTHORS: Fabien Devaux (fab@gnux.info) # Redistribution and use in source and binary forms, with or without @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. from novaprinter import prettyPrinter -import urllib +from helpers import retrieve_url from xml.dom import minidom import re @@ -64,12 +64,15 @@ class mininova(object): return ''.join([ get_text(n) for n in txt.childNodes]) page = 1 while True and page<11: + file = open('/home/chris/mytest.txt', 'w') + file.write(self.url+'/search/%s/seeds/%d'%(what, page)) + file.close() res = 0 - dat = urllib.urlopen(self.url+'/search/%s/seeds/%d'%(what, page)).read().decode('utf-8', 'replace') + dat = retrieve_url(self.url+'/search/%s/seeds/%d'%(what, page)) dat = re.sub("