#!/usr/bin/python import re import os import os.path import sys import errno import datetime import urllib2 import subprocess import argparse import configobj import tempfile import socket # parse command line options parser = argparse.ArgumentParser( description='Hosts fetcher for py-i2phosts.', epilog='Report bugs to http://zzz.i2p/topics/733') parser.add_argument('-d', '--debug', action='store_true', help='write debug messages to stdout instead of log file'), parser.add_argument('-c', '--config', default='/etc/py-i2phosts/fetcher.conf', dest='config_file', help='config file to use') args = parser.parse_args() # read config spec = ''' proxyurl = string(default='http://localhost:4444/') log_file = string(default='/var/log/py-i2phosts/fetcher.log') log_level = option('debug', 'info', 'warning', 'error', 'critical', default='info') ''' spec = spec.split('\n') config = configobj.ConfigObj(args.config_file, configspec=spec, file_error=True) if 'include' in config: config_included = configobj.ConfigObj(config['include']) config.merge(config_included) # django setup DJANGO_SETTINGS_MODULE = 'settings' if 'DJANGO_PROJECT_PATH' in config: DJANGO_PROJECT_PATH = config['DJANGO_PROJECT_PATH'] else: DJANGO_PROJECT_PATH = os.path.dirname(sys.argv[0]) + '/web' sys.path.insert(1, DJANGO_PROJECT_PATH) os.environ['DJANGO_SETTINGS_MODULE'] = DJANGO_SETTINGS_MODULE from pyi2phosts.lib.utils import get_logger from pyi2phosts.lib.utils import validate_config from pyi2phosts.extsources.models import ExternalSource # validate config validate_config(config) # configure logger if args.debug == True: log_level = 'debug' log_file = None else: log_level = config['log_level'] log_file = config['log_file'] log = get_logger(filename=log_file, log_level=log_level) # we want open urls through proxy proxy_handler = urllib2.ProxyHandler({'http': config['proxyurl']}) opener = urllib2.build_opener(proxy_handler) all_sources = ExternalSource.objects.filter(active=True) for source in all_sources: log.debug('%s: starting work', source.name) if source.last_modified: last_modified = source.last_modified.strftime('%a, %d %b %Y %H:%M:%S GMT') # prevent redownloading of hosts-file by passing If-Modified-Since http header opener.addheaders = [('If-Modified-Since', last_modified)] log.debug('%s: appending If-Modified-Since: %s', source.name, last_modified) if source.etag: opener.addheaders = [('If-None-Match', source.etag)] log.debug('%s: appending If-None-Match: %s', source.name, source.etag) try: log.debug('%s: sending GET...', source.name) resp = opener.open(source.url, timeout=60) except socket.timeout: log.warning('%s: socket timeout', source.name) continue except urllib2.HTTPError, e: if e.code == 304: log.info('%s: not modified', source.name) source.last_success = datetime.datetime.utcnow() source.save() else: log.warning('%s: can\'t finish the request, error code: %s, reason: %s', source.name, e.code, e.reason) continue except urllib2.URLError, e: log.warning('%s: failed to reach server, reason: %s', source.name, e.reason) continue # read data from remote and write it to local file try: log.debug('%s: reading response data', source.name) content = resp.read() except: log.warning('%s: failed to read data', source.name) continue # save fetched content into temporary file fd, tmpfile = tempfile.mkstemp(text=True) f = os.fdopen(fd, 'w') f.write(content) f.close() # get last-modified info from header lm = resp.headers.get('Last-Modified') if lm: log.debug('%s: Last-Modified: %s', source.name, lm) source.last_modified = datetime.datetime.strptime(lm, '%a, %d %b %Y %H:%M:%S GMT') # get ETag etag = resp.headers.get('ETag') if etag: log.debug('%s: ETag: %s', source.name, etag) source.etag = etag # form commnd-line for invoke injector log.info('%s: adding hosts...', source.name) sp_args = ['py-i2phosts-injector', '-s', '-f', tmpfile, '-d', 'Auto-added from ' + source.name] try: p = subprocess.Popen(sp_args, shell=False, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError, e: log.error('failed to exec py-i2phosts-injector: %s', e) if e.errno == errno.ENOENT: log.error('check your PATH environment variable') sys.exit(1) out = p.communicate()[0] os.remove(tmpfile) log.info('%s: injector output: \n%s', source.name, out) # update last_success source.last_success = datetime.datetime.utcnow() log.debug('%s: updating last_success timestamp: %s', source.name, source.last_success) source.save()