#!/usr/bin/python import re import os import os.path import sys import errno import datetime import urllib2 import subprocess import argparse import configobj # parse command line options parser = argparse.ArgumentParser( description='Hosts fetcher for py-i2phosts.', epilog='Report bugs to http://zzz.i2p/topics/733') parser.add_argument('-d', '--debug', action='store_true', help='write debug messages to stdout instead of log file'), parser.add_argument('-c', '--config', default='/etc/py-i2phosts/fetcher.conf', dest='config_file', help='config file to use') args = parser.parse_args() # read config config = configobj.ConfigObj(args.config_file, file_error=True) if 'include' in config: config_included = configobj.ConfigObj(config['include']) config.merge(config_included) # django setup DJANGO_SETTINGS_MODULE = 'settings' if 'DJANGO_PROJECT_PATH' in config: DJANGO_PROJECT_PATH = config['DJANGO_PROJECT_PATH'] else: DJANGO_PROJECT_PATH = os.path.dirname(sys.argv[0]) + '/web' sys.path.insert(1, DJANGO_PROJECT_PATH) os.environ['DJANGO_SETTINGS_MODULE'] = DJANGO_SETTINGS_MODULE from web.lib.utils import get_logger from web.lib.utils import check_logger_options from web.extsources.models import ExternalSource # configure logger if args.debug == True: log_level = 'debug' log_file = None else: log_file, log_level = check_logger_options(config) log = get_logger(filename=log_file, log_level=log_level) # we want open urls through proxy if 'proxyurl' in config: proxy_handler = urllib2.ProxyHandler({'http': config['proxyurl']}) else: log.critical('"proxyurl" is missing in config') sys.exit(1) opener = urllib2.build_opener(proxy_handler) all_sources = ExternalSource.objects.filter(active=True) for source in all_sources: # use separate file for each host filename = 'hosts.txt.' + source.name if source.last_modified: last_modified = source.last_modified.strftime('%a, %d %b %Y %H:%M:%S GMT') # prevent redownloading of hosts-file by passing If-Modified-Since http header opener.addheaders = [('If-Modified-Since', last_modified)] if source.etag: opener.addheaders = [('If-None-Match', source.etag)] try: log.debug('fetching hosts from: %s', source.name) resp = opener.open(source.url, timeout=60) except urllib2.URLError, e: if hasattr(e, 'reason'): log.warning('failed to reach server %s, reason: %s', source.name, e.reason) elif hasattr(e, 'code'): if e.code == 304: log.info('%s: not modified', source.name) source.last_success = datetime.datetime.now() source.save() else: log.warning('server %s can\'t finish the request, error code: %s', source.name, e.code) continue # read data from remote and write it to local file try: content = resp.read() except: log.warning('failed to read data from %s', source.name) continue f = open(filename, 'w') f.write(content) f.close() # get last-modified info from header lm = resp.headers.get('Last-Modified') if lm: log.debug('%s Last-Modified: %s', source.name, lm) source.last_modified = datetime.datetime.strptime(lm, '%a, %d %b %Y %H:%M:%S GMT') # get ETag etag = resp.headers.get('ETag') if etag: log.debug('%s ETag: %s', source.name, etag) source.etag = etag # update last_success source.last_success = datetime.datetime.now() source.save() # form commnd-line for invoke injector path = os.path.dirname(sys.argv[0]) log.info('adding hosts from: %s', source.name) sp_args = [path + '/py-i2phosts-injector', '-s', '-a', '-f', filename, '-d', 'Auto-added from ' + source.name] p = subprocess.Popen(sp_args, shell=False, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out = p.communicate()[0] log.info('injector output: \n%s', out)