2010-10-20 17:12:29 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
import re
|
|
|
|
import os
|
2010-10-20 17:12:29 +00:00
|
|
|
import os.path
|
2010-10-20 17:12:29 +00:00
|
|
|
import sys
|
2010-10-31 12:30:29 +00:00
|
|
|
import datetime
|
2010-10-20 17:12:29 +00:00
|
|
|
import urllib2
|
|
|
|
import subprocess
|
2010-10-30 17:48:04 +00:00
|
|
|
import argparse
|
2010-10-30 17:52:52 +00:00
|
|
|
import configobj
|
2010-11-02 16:07:08 +00:00
|
|
|
import tempfile
|
2010-10-20 17:12:29 +00:00
|
|
|
|
2010-10-30 17:48:04 +00:00
|
|
|
# parse command line options
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='Hosts fetcher for py-i2phosts.',
|
|
|
|
epilog='Report bugs to http://zzz.i2p/topics/733')
|
|
|
|
parser.add_argument('-d', '--debug', action='store_true',
|
|
|
|
help='write debug messages to stdout instead of log file'),
|
|
|
|
parser.add_argument('-c', '--config', default='/etc/py-i2phosts/fetcher.conf', dest='config_file',
|
|
|
|
help='config file to use')
|
|
|
|
args = parser.parse_args()
|
2010-10-20 17:12:29 +00:00
|
|
|
|
2010-10-30 17:52:52 +00:00
|
|
|
# read config
|
2010-11-02 16:19:18 +00:00
|
|
|
spec = '''
|
|
|
|
proxyurl = string(default='http://localhost:4444/')
|
|
|
|
log_file = string(default='/var/log/py-i2phosts/fetcher.log')
|
|
|
|
log_level = option('debug', 'info', 'warning', 'error', 'critical', default='info')
|
|
|
|
'''
|
|
|
|
spec = spec.split('\n')
|
|
|
|
config = configobj.ConfigObj(args.config_file, configspec=spec, file_error=True)
|
2010-10-30 17:52:52 +00:00
|
|
|
if 'include' in config:
|
|
|
|
config_included = configobj.ConfigObj(config['include'])
|
|
|
|
config.merge(config_included)
|
|
|
|
|
2010-10-31 12:30:29 +00:00
|
|
|
# django setup
|
|
|
|
DJANGO_SETTINGS_MODULE = 'settings'
|
2010-10-30 17:52:52 +00:00
|
|
|
if 'DJANGO_PROJECT_PATH' in config:
|
2010-10-31 12:30:29 +00:00
|
|
|
DJANGO_PROJECT_PATH = config['DJANGO_PROJECT_PATH']
|
2010-10-30 17:52:52 +00:00
|
|
|
else:
|
2010-10-31 12:30:29 +00:00
|
|
|
DJANGO_PROJECT_PATH = os.path.dirname(sys.argv[0]) + '/web'
|
|
|
|
sys.path.insert(1, DJANGO_PROJECT_PATH)
|
|
|
|
os.environ['DJANGO_SETTINGS_MODULE'] = DJANGO_SETTINGS_MODULE
|
2010-11-03 20:28:08 +00:00
|
|
|
from pyi2phosts.lib.utils import get_logger
|
|
|
|
from pyi2phosts.lib.utils import validate_config
|
|
|
|
from pyi2phosts.extsources.models import ExternalSource
|
2010-10-30 17:53:39 +00:00
|
|
|
|
2010-11-02 16:19:18 +00:00
|
|
|
# validate config
|
|
|
|
validate_config(config)
|
|
|
|
|
2010-10-30 17:53:39 +00:00
|
|
|
# configure logger
|
|
|
|
if args.debug == True:
|
|
|
|
log_level = 'debug'
|
|
|
|
log_file = None
|
|
|
|
else:
|
2010-11-01 21:13:57 +00:00
|
|
|
log_level = config['log_level']
|
|
|
|
log_file = config['log_file']
|
2010-10-30 17:53:39 +00:00
|
|
|
log = get_logger(filename=log_file, log_level=log_level)
|
2010-10-30 17:52:52 +00:00
|
|
|
|
2010-10-20 17:12:29 +00:00
|
|
|
# we want open urls through proxy
|
2010-11-02 16:19:18 +00:00
|
|
|
proxy_handler = urllib2.ProxyHandler({'http': config['proxyurl']})
|
2010-10-20 17:12:29 +00:00
|
|
|
opener = urllib2.build_opener(proxy_handler)
|
|
|
|
|
2010-10-31 12:30:29 +00:00
|
|
|
all_sources = ExternalSource.objects.filter(active=True)
|
2010-10-30 17:52:52 +00:00
|
|
|
|
2010-10-31 12:30:29 +00:00
|
|
|
for source in all_sources:
|
|
|
|
if source.last_modified:
|
|
|
|
last_modified = source.last_modified.strftime('%a, %d %b %Y %H:%M:%S GMT')
|
2010-10-20 17:12:29 +00:00
|
|
|
# prevent redownloading of hosts-file by passing If-Modified-Since http header
|
|
|
|
opener.addheaders = [('If-Modified-Since', last_modified)]
|
2010-10-31 12:30:29 +00:00
|
|
|
if source.etag:
|
|
|
|
opener.addheaders = [('If-None-Match', source.etag)]
|
2010-10-20 17:12:29 +00:00
|
|
|
try:
|
2010-10-31 12:30:29 +00:00
|
|
|
log.debug('fetching hosts from: %s', source.name)
|
|
|
|
resp = opener.open(source.url, timeout=60)
|
2010-10-20 17:12:29 +00:00
|
|
|
except urllib2.URLError, e:
|
|
|
|
if hasattr(e, 'reason'):
|
2010-10-31 12:30:29 +00:00
|
|
|
log.warning('failed to reach server %s, reason: %s', source.name, e.reason)
|
2010-10-20 17:12:29 +00:00
|
|
|
elif hasattr(e, 'code'):
|
|
|
|
if e.code == 304:
|
2010-10-31 12:30:29 +00:00
|
|
|
log.info('%s: not modified', source.name)
|
|
|
|
source.last_success = datetime.datetime.now()
|
|
|
|
source.save()
|
2010-10-20 17:12:29 +00:00
|
|
|
else:
|
2010-10-30 17:55:37 +00:00
|
|
|
log.warning('server %s can\'t finish the request, error code: %s',
|
2010-10-31 12:30:29 +00:00
|
|
|
source.name, e.code)
|
2010-10-20 17:12:29 +00:00
|
|
|
continue
|
|
|
|
# read data from remote and write it to local file
|
2010-10-30 17:57:34 +00:00
|
|
|
try:
|
|
|
|
content = resp.read()
|
|
|
|
except:
|
2010-10-31 12:30:29 +00:00
|
|
|
log.warning('failed to read data from %s', source.name)
|
2010-10-30 17:57:34 +00:00
|
|
|
continue
|
2010-11-02 16:07:08 +00:00
|
|
|
# save fetched content into temporary file
|
|
|
|
fd, tmpfile = tempfile.mkstemp(text=True)
|
|
|
|
f = os.fdopen(fd, 'w')
|
2010-10-20 17:12:29 +00:00
|
|
|
f.write(content)
|
|
|
|
f.close()
|
2010-10-31 12:30:29 +00:00
|
|
|
# get last-modified info from header
|
2010-10-20 17:12:29 +00:00
|
|
|
lm = resp.headers.get('Last-Modified')
|
|
|
|
if lm:
|
2010-10-31 12:30:29 +00:00
|
|
|
log.debug('%s Last-Modified: %s', source.name, lm)
|
|
|
|
source.last_modified = datetime.datetime.strptime(lm, '%a, %d %b %Y %H:%M:%S GMT')
|
|
|
|
# get ETag
|
|
|
|
etag = resp.headers.get('ETag')
|
|
|
|
if etag:
|
|
|
|
log.debug('%s ETag: %s', source.name, etag)
|
|
|
|
source.etag = etag
|
2010-10-20 17:12:29 +00:00
|
|
|
# form commnd-line for invoke injector
|
2010-10-31 12:30:29 +00:00
|
|
|
log.info('adding hosts from: %s', source.name)
|
2010-11-02 16:07:08 +00:00
|
|
|
sp_args = ['py-i2phosts-injector', '-s', '-a', '-f', tmpfile, '-d',
|
2010-10-31 12:30:29 +00:00
|
|
|
'Auto-added from ' + source.name]
|
2010-10-20 17:12:29 +00:00
|
|
|
p = subprocess.Popen(sp_args, shell=False, stdin=None,
|
2010-10-30 18:03:04 +00:00
|
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
|
|
out = p.communicate()[0]
|
2010-11-02 16:07:08 +00:00
|
|
|
os.remove(tmpfile)
|
2010-10-30 18:03:04 +00:00
|
|
|
log.info('injector output: \n%s', out)
|
2010-11-02 16:09:31 +00:00
|
|
|
# update last_success
|
|
|
|
source.last_success = datetime.datetime.now()
|
|
|
|
source.save()
|