Browse Source

[search engine] Nova2 multiprocessing

adaptive-webui-19844
DoumanAsh 10 years ago
parent
commit
bef8106d0f
  1. 184
      src/searchengine/nova/nova2.py
  2. 173
      src/searchengine/nova3/nova2.py

184
src/searchengine/nova/nova2.py

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.32 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,16 +37,15 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib import urllib
from os import path
import fix_encoding from glob import glob
from sys import argv
from multiprocessing import Pool, cpu_count
from fix_encoding import fix_encoding
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -56,34 +55,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
def initialize_engines():
""" Import available engines
Return list of available engines
"""
supported_engines = [] supported_engines = []
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
for engine in engines: for engine in engines:
e = engine.split(os.sep)[-1][:-3] engi = path.basename(engine).split('.')[0].strip()
if len(e.strip()) == 0: continue if len(engi) == 0 or engi.startswith('_'):
if e.startswith('_'): continue continue
try: try:
exec "from engines.%s import %s"%(e,e) #import engines.[engine]
supported_engines.append(e) engine_module = __import__(".".join(("engines", engi)))
#get low-level module
engine_module = getattr(engine_module, engi)
#bind class name
globals()[engi] = getattr(engine_module, engi)
supported_engines.append(engi)
except: except:
pass pass
def engineToXml(short_name): return supported_engines
xml = "<%s>\n"%short_name
exec "engine = %s()"%short_name def engines_to_xml(supported_engines):
xml += "<name>%s</name>\n"%engine.name """ Generates xml for supported engines """
xml += "<url>%s</url>\n"%engine.url tab = " " * 4
xml += "<categories>"
if hasattr(engine, 'supported_categories'): for short_name in supported_engines:
supported_categories = engine.supported_categories.keys() search_engine = globals()[short_name]()
supported_categories.remove('all')
xml += " ".join(supported_categories) supported_categories = ""
xml += "</categories>\n" if hasattr(search_engine, "supported_categories"):
xml += "</%s>\n"%short_name supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
return xml if key is not "all"))
def displayCapabilities(): yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
def displayCapabilities(supported_engines):
""" """
Display capabilities in XML format Display capabilities in XML format
<capabilities> <capabilities>
@ -94,70 +109,71 @@ def displayCapabilities():
</engine_short_name> </engine_short_name>
</capabilities> </capabilities>
""" """
xml = "<capabilities>" xml = "".join(("<capabilities>\n",
for short_name in supported_engines: "".join(engines_to_xml(supported_engines)),
xml += engineToXml(short_name) "</capabilities>"))
xml += "</capabilities>" print(xml)
print xml
def run_search(engine_list):
class EngineLauncher(threading.Thread): """ Run search in engine
def __init__(self, engine, what, cat='all'):
threading.Thread.__init__(self) @retval False if any exceptions occured
self.engine = engine @retval True otherwise
self.what = what """
self.cat = cat engine, what, cat = engine_list
def run(self): try:
if hasattr(self.engine, 'supported_categories'): engine = engine()
if self.cat == 'all' or self.cat in self.engine.supported_categories.keys(): #avoid exceptions due to invalid category
self.engine.search(self.what, self.cat) if hasattr(engine, 'supported_categories'):
elif self.cat == 'all': cat = cat if cat in engine.supported_categories else "all"
self.engine.search(self.what) engine.search(what, cat)
if __name__ == '__main__':
# Make sure we enforce utf-8 encoding
fix_encoding.fix_encoding()
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2:
if sys.argv[1] == "--capabilities":
displayCapabilities()
sys.exit(0)
else: else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% engine.search(what)
(','.join(supported_engines))) return True
except:
return False
def main(args):
fix_encoding()
supported_engines = initialize_engines()
if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list: if 'all' in engines_list:
engines_list = supported_engines engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
cat = sys.argv[2].lower() if not engines_list:
#engine list is empty. Nothing to do here
return
cat = args[1].lower()
if cat not in CATEGORIES: if cat not in CATEGORIES:
raise SystemExit('Invalid category!') raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.quote(' '.join(sys.argv[3:])) what = urllib.quote(' '.join(args[2:]))
threads = []
for engine in engines_list:
try:
if THREADED: if THREADED:
exec "l = EngineLauncher(%s(), what, cat)"%engine pool = Pool(min(len(engines_list), cpu_count()))
threads.append(l) pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
l.start()
else: else:
exec "e = %s()"%engine _ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
if hasattr(engine, 'supported_categories'):
if cat == 'all' or cat in e.supported_categories.keys(): if __name__ == "__main__":
e.search(what, cat) main(argv[1:])
elif self.cat == 'all':
e.search(what)
engine().search(what, cat)
except:
pass
if THREADED:
for t in threads:
t.join()

173
src/searchengine/nova3/nova2.py

@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.24 #VERSION: 1.40
# Author: # Author:
# Fabien Devaux <fab AT gnux DOT info> # Fabien Devaux <fab AT gnux DOT info>
@ -37,14 +37,14 @@
# #
# Licence: BSD # Licence: BSD
import sys
import threading
import os
import glob
import urllib.parse import urllib.parse
from os import path, cpu_count
from glob import glob
from sys import argv
from multiprocessing import Pool
THREADED = True THREADED = True
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
################################################################################ ################################################################################
# Every engine should have a "search" method taking # Every engine should have a "search" method taking
@ -54,34 +54,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
# As a convention, try to list results by decrasing number of seeds or similar # As a convention, try to list results by decrasing number of seeds or similar
################################################################################ ################################################################################
def initialize_engines():
""" Import available engines
Return list of available engines
"""
supported_engines = [] supported_engines = []
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
for engine in engines: for engine in engines:
e = engine.split(os.sep)[-1][:-3] engi = path.basename(engine).split('.')[0].strip()
if len(e.strip()) == 0: continue if len(engi) == 0 or engi.startswith('_'):
if e.startswith('_'): continue continue
try: try:
exec("from engines.%s import %s"%(e,e)) #import engines.[engine]
supported_engines.append(e) engine_module = __import__(".".join(("engines", engi)))
#get low-level module
engine_module = getattr(engine_module, engi)
#bind class name
globals()[engi] = getattr(engine_module, engi)
supported_engines.append(engi)
except: except:
pass pass
def engineToXml(short_name): return supported_engines
xml = "<%s>\n"%short_name
exec("search_engine = %s()"%short_name, globals()) def engines_to_xml(supported_engines):
xml += "<name>%s</name>\n"%search_engine.name """ Generates xml for supported engines """
xml += "<url>%s</url>\n"%search_engine.url tab = " " * 4
xml += "<categories>"
if hasattr(search_engine, 'supported_categories'): for short_name in supported_engines:
supported_categories = list(search_engine.supported_categories.keys()) search_engine = globals()[short_name]()
supported_categories.remove('all')
xml += " ".join(supported_categories) supported_categories = ""
xml += "</categories>\n" if hasattr(search_engine, "supported_categories"):
xml += "</%s>\n"%short_name supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
return xml if key is not "all"))
def displayCapabilities(): yield "".join((tab, "<", short_name, ">\n",
tab, tab, "<name>", search_engine.name, "</name>\n",
tab, tab, "<url>", search_engine.url, "</url>\n",
tab, tab, "<categories>", supported_categories, "</categories>\n",
tab, "</", short_name, ">\n"))
def displayCapabilities(supported_engines):
""" """
Display capabilities in XML format Display capabilities in XML format
<capabilities> <capabilities>
@ -92,67 +108,70 @@ def displayCapabilities():
</engine_short_name> </engine_short_name>
</capabilities> </capabilities>
""" """
xml = "<capabilities>" xml = "".join(("<capabilities>\n",
for short_name in supported_engines: "".join(engines_to_xml(supported_engines)),
xml += engineToXml(short_name) "</capabilities>"))
xml += "</capabilities>"
print(xml) print(xml)
class EngineLauncher(threading.Thread): def run_search(engine_list):
def __init__(self, engine, what, cat='all'): """ Run search in engine
threading.Thread.__init__(self)
self.engine = engine @retval False if any exceptions occured
self.what = what @retval True otherwise
self.cat = cat """
def run(self): engine, what, cat = engine_list
if hasattr(self.engine, 'supported_categories'): try:
if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()): engine = engine()
self.engine.search(self.what, self.cat) #avoid exceptions due to invalid category
elif self.cat == 'all': if hasattr(engine, 'supported_categories'):
self.engine.search(self.what) cat = cat if cat in engine.supported_categories else "all"
engine.search(what, cat)
if __name__ == '__main__':
if len(sys.argv) < 2:
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
(','.join(supported_engines)))
if len(sys.argv) == 2:
if sys.argv[1] == "--capabilities":
displayCapabilities()
sys.exit(0)
else: else:
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'% engine.search(what)
(','.join(supported_engines))) return True
except:
return False
def main(args):
supported_engines = initialize_engines()
if not args:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
elif args[0] == "--capabilities":
displayCapabilities(supported_engines)
return
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] elif len(args) < 3:
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
"available engines: %s" % (','.join(supported_engines)))
engines_list = set(e.lower() for e in args[0].strip().split(','))
if 'all' in engines_list: if 'all' in engines_list:
engines_list = supported_engines engines_list = supported_engines
else:
#discard un-supported engines
engines_list = [engine for engine in engines_list
if engine in supported_engines]
if not engines_list:
#engine list is empty. Nothing to do here
return
cat = sys.argv[2].lower() cat = args[1].lower()
if cat not in CATEGORIES: if cat not in CATEGORIES:
raise SystemExit('Invalid category!') raise SystemExit(" - ".join(('Invalid category', cat)))
what = urllib.parse.quote(' '.join(sys.argv[3:])) what = urllib.parse.quote(' '.join(args[2:]))
threads = []
for engine in engines_list:
try:
if THREADED: if THREADED:
exec("l = EngineLauncher(%s(), what, cat)"%engine) with Pool(min(len(engines_list), cpu_count())) as pool:
threads.append(l) pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
l.start()
else: else:
exec("e = %s()"%engine) _ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
if hasattr(engine, 'supported_categories'):
if cat == 'all' or cat in list(e.supported_categories.keys()): if __name__ == "__main__":
e.search(what, cat) main(argv[1:])
elif self.cat == 'all':
e.search(what)
engine().search(what, cat)
except:
pass
if THREADED:
for t in threads:
t.join()

Loading…
Cancel
Save