Browse Source

[search engine] Nova2 multiprocessing

adaptive-webui-19844
DoumanAsh 10 years ago
parent
commit
bef8106d0f
  1. 242
      src/searchengine/nova/nova2.py
  2. 235
      src/searchengine/nova3/nova2.py

242
src/searchengine/nova/nova2.py

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.32 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,16 +37,15 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib import urllib
from os import path
import fix_encoding from glob import glob
from sys import argv
from multiprocessing import Pool, cpu_count
from fix_encoding import fix_encoding
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -56,108 +55,125 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
supported_engines = [] def initialize_engines():
""" Import available engines
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
for engine in engines: Return list of available engines
e = engine.split(os.sep)[-1][:-3] """
if len(e.strip()) == 0: continue supported_engines = []
if e.startswith('_'): continue
try: engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
exec "from engines.%s import %s"%(e,e) for engine in engines:
supported_engines.append(e) engi = path.basename(engine).split('.')[0].strip()
except: if len(engi) == 0 or engi.startswith('_'):
pass continue
try:
def engineToXml(short_name): #import engines.[engine]
xml = "<%s>\n"%short_name engine_module = __import__(".".join(("engines", engi)))
exec "engine = %s()"%short_name #get low-level module
xml += "<name>%s</name>\n"%engine.name engine_module = getattr(engine_module, engi)
xml += "<url>%s</url>\n"%engine.url #bind class name
xml += "<categories>" globals()[engi] = getattr(engine_module, engi)
if hasattr(engine, 'supported_categories'): supported_engines.append(engi)
supported_categories = engine.supported_categories.keys() except:
supported_categories.remove('all') pass
xml += " ".join(supported_categories)
xml += "</categories>\n" return supported_engines
xml += "</%s>\n"%short_name
return xml def engines_to_xml(supported_engines):
""" Generates xml for supported engines """
def displayCapabilities(): tab = " " * 4
"""
Display capabilities in XML format for short_name in supported_engines:
<capabilities> search_engine = globals()[short_name]()
<engine_short_name>
<name>long name</name> supported_categories = ""
<url>http://example.com</url> if hasattr(search_engine, "supported_categories"):
<categories>movies music games</categories> supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
</engine_short_name> if key is not "all"))
</capabilities>
""" yield "".join((tab, "<", short_name, ">\n",
xml = "<capabilities>" tab, tab, "<name>", search_engine.name, "</name>\n",
for short_name in supported_engines: tab, tab, "<url>", search_engine.url, "</url>\n",
xml += engineToXml(short_name) tab, tab, "<categories>", supported_categories, "</categories>\n",
xml += "</capabilities>" tab, "</", short_name, ">\n"))
print xml
def displayCapabilities(supported_engines):
class EngineLauncher(threading.Thread): """
def __init__(self, engine, what, cat='all'): Display capabilities in XML format
threading.Thread.__init__(self) <capabilities>
self.engine = engine <engine_short_name>
self.what = what <name>long name</name>
self.cat = cat <url>http://example.com</url>
def run(self): <categories>movies music games</categories>
if hasattr(self.engine, 'supported_categories'): </engine_short_name>
if self.cat == 'all' or self.cat in self.engine.supported_categories.keys(): </capabilities>
self.engine.search(self.what, self.cat) """
elif self.cat == 'all': xml = "".join(("<capabilities>\n",
self.engine.search(self.what) "".join(engines_to_xml(supported_engines)),
"</capabilities>"))
if __name__ == '__main__': print(xml)
# Make sure we enforce utf-8 encoding
fix_encoding.fix_encoding() def run_search(engine_list):
""" Run search in engine
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% @retval False if any exceptions occured
(','.join(supported_engines))) @retval True otherwise
"""
if len(sys.argv) == 2: engine, what, cat = engine_list
if sys.argv[1] == "--capabilities": try:
displayCapabilities() engine = engine()
sys.exit(0) #avoid exceptions due to invalid category
else: if hasattr(engine, 'supported_categories'):
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% cat = cat if cat in engine.supported_categories else "all"
(','.join(supported_engines))) engine.search(what, cat)
else:
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] engine.search(what)
return True
if 'all' in engines_list: except:
engines_list = supported_engines return False
cat = sys.argv[2].lower() def main(args):
fix_encoding()
if cat not in CATEGORIES: supported_engines = initialize_engines()
raise SystemExit('Invalid category!')
if not args:
what = urllib.quote(' '.join(sys.argv[3:])) raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
threads = []
for engine in engines_list: elif args[0] == "--capabilities":
try: displayCapabilities(supported_engines)
if THREADED: return
exec "l = EngineLauncher(%s(), what, cat)"%engine
threads.append(l) elif len(args) < 3:
l.start() raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
else: "available engines: %s" % (','.join(supported_engines)))
exec "e = %s()"%engine
if hasattr(engine, 'supported_categories'): engines_list = set(e.lower() for e in args[0].strip().split(','))
if cat == 'all' or cat in e.supported_categories.keys():
e.search(what, cat) if 'all' in engines_list:
elif self.cat == 'all': engines_list = supported_engines
e.search(what) else:
engine().search(what, cat) #discard un-supported engines
except: engines_list = [engine for engine in engines_list
pass if engine in supported_engines]
if THREADED:
for t in threads: if not engines_list:
t.join() #engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.quote(' '.join(args[2:]))
if THREADED:
pool = Pool(min(len(engines_list), cpu_count()))
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else:
_ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
if __name__ == "__main__":
main(argv[1:])

235
src/searchengine/nova3/nova2.py

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.24 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,14 +37,14 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib.parse import urllib.parse
from os import path, cpu_count
from glob import glob
from sys import argv
from multiprocessing import Pool
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -54,105 +54,124 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
supported_engines = [] def initialize_engines():
""" Import available engines
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
for engine in engines: Return list of available engines
e = engine.split(os.sep)[-1][:-3] """
if len(e.strip()) == 0: continue supported_engines = []
if e.startswith('_'): continue
try: engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
exec("from engines.%s import %s"%(e,e)) for engine in engines:
supported_engines.append(e) engi = path.basename(engine).split('.')[0].strip()
except: if len(engi) == 0 or engi.startswith('_'):
pass continue
try:
def engineToXml(short_name): #import engines.[engine]
xml = "<%s>\n"%short_name engine_module = __import__(".".join(("engines", engi)))
exec("search_engine = %s()"%short_name, globals()) #get low-level module
xml += "<name>%s</name>\n"%search_engine.name engine_module = getattr(engine_module, engi)
xml += "<url>%s</url>\n"%search_engine.url #bind class name
xml += "<categories>" globals()[engi] = getattr(engine_module, engi)
if hasattr(search_engine, 'supported_categories'): supported_engines.append(engi)
supported_categories = list(search_engine.supported_categories.keys()) except:
supported_categories.remove('all') pass
xml += " ".join(supported_categories)
xml += "</categories>\n" return supported_engines
xml += "</%s>\n"%short_name
return xml def engines_to_xml(supported_engines):
""" Generates xml for supported engines """
def displayCapabilities(): tab = " " * 4
"""
Display capabilities in XML format for short_name in supported_engines:
<capabilities> search_engine = globals()[short_name]()
<engine_short_name>
<name>long name</name> supported_categories = ""
<url>http://example.com</url> if hasattr(search_engine, "supported_categories"):
<categories>movies music games</categories> supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
</engine_short_name> if key is not "all"))
</capabilities>
""" yield "".join((tab, "<", short_name, ">\n",
xml = "<capabilities>" tab, tab, "<name>", search_engine.name, "</name>\n",
for short_name in supported_engines: tab, tab, "<url>", search_engine.url, "</url>\n",
xml += engineToXml(short_name) tab, tab, "<categories>", supported_categories, "</categories>\n",
xml += "</capabilities>" tab, "</", short_name, ">\n"))
print(xml)
def displayCapabilities(supported_engines):
class EngineLauncher(threading.Thread): """
def __init__(self, engine, what, cat='all'): Display capabilities in XML format
threading.Thread.__init__(self) <capabilities>
self.engine = engine <engine_short_name>
self.what = what <name>long name</name>
self.cat = cat <url>http://example.com</url>
def run(self): <categories>movies music games</categories>
if hasattr(self.engine, 'supported_categories'): </engine_short_name>
if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()): </capabilities>
self.engine.search(self.what, self.cat) """
elif self.cat == 'all': xml = "".join(("<capabilities>\n",
self.engine.search(self.what) "".join(engines_to_xml(supported_engines)),
"</capabilities>"))
if __name__ == '__main__': print(xml)
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% def run_search(engine_list):
(','.join(supported_engines))) """ Run search in engine
if len(sys.argv) == 2: @retval False if any exceptions occured
if sys.argv[1] == "--capabilities": @retval True otherwise
displayCapabilities() """
sys.exit(0) engine, what, cat = engine_list
else: try:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% engine = engine()
(','.join(supported_engines))) #avoid exceptions due to invalid category
if hasattr(engine, 'supported_categories'):
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] cat = cat if cat in engine.supported_categories else "all"
engine.search(what, cat)
if 'all' in engines_list: else:
engines_list = supported_engines engine.search(what)
return True
cat = sys.argv[2].lower() except:
return False
if cat not in CATEGORIES:
raise SystemExit('Invalid category!') def main(args):
supported_engines = initialize_engines()
what = urllib.parse.quote(' '.join(sys.argv[3:]))
if not args:
threads = [] raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
for engine in engines_list: "available engines: %s" % (','.join(supported_engines)))
try:
if THREADED: elif args[0] == "--capabilities":
exec("l = EngineLauncher(%s(), what, cat)"%engine) displayCapabilities(supported_engines)
threads.append(l) return
l.start()
else: elif len(args) < 3:
exec("e = %s()"%engine) raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
if hasattr(engine, 'supported_categories'): "available engines: %s" % (','.join(supported_engines)))
if cat == 'all' or cat in list(e.supported_categories.keys()):
e.search(what, cat) engines_list = set(e.lower() for e in args[0].strip().split(','))
elif self.cat == 'all':
e.search(what) if 'all' in engines_list:
engine().search(what, cat) engines_list = supported_engines
except: else:
pass #discard un-supported engines
if THREADED: engines_list = [engine for engine in engines_list
for t in threads: if engine in supported_engines]
t.join()
if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES:
raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.parse.quote(' '.join(args[2:]))
if THREADED:
with Pool(min(len(engines_list), cpu_count())) as pool:
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
else:
_ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
if __name__ == "__main__":
main(argv[1:])

Loading…
Cancel
Save