1
0
mirror of https://github.com/r4sas/py-i2phosts synced 2025-01-18 18:50:11 +00:00
py-i2phosts/py-i2phosts-fetcher

126 lines
4.2 KiB
Plaintext
Raw Normal View History

#!/usr/bin/python
import re
import os
import os.path
import sys
import datetime
import urllib2
import subprocess
import argparse
import configobj
import tempfile
# parse command line options
parser = argparse.ArgumentParser(
description='Hosts fetcher for py-i2phosts.',
epilog='Report bugs to http://zzz.i2p/topics/733')
parser.add_argument('-d', '--debug', action='store_true',
help='write debug messages to stdout instead of log file'),
parser.add_argument('-c', '--config', default='/etc/py-i2phosts/fetcher.conf', dest='config_file',
help='config file to use')
args = parser.parse_args()
# read config
spec = '''
proxyurl = string(default='http://localhost:4444/')
log_file = string(default='/var/log/py-i2phosts/fetcher.log')
log_level = option('debug', 'info', 'warning', 'error', 'critical', default='info')
'''
spec = spec.split('\n')
config = configobj.ConfigObj(args.config_file, configspec=spec, file_error=True)
if 'include' in config:
config_included = configobj.ConfigObj(config['include'])
config.merge(config_included)
# django setup
DJANGO_SETTINGS_MODULE = 'settings'
if 'DJANGO_PROJECT_PATH' in config:
DJANGO_PROJECT_PATH = config['DJANGO_PROJECT_PATH']
else:
DJANGO_PROJECT_PATH = os.path.dirname(sys.argv[0]) + '/web'
sys.path.insert(1, DJANGO_PROJECT_PATH)
os.environ['DJANGO_SETTINGS_MODULE'] = DJANGO_SETTINGS_MODULE
from pyi2phosts.lib.utils import get_logger
from pyi2phosts.lib.utils import validate_config
from pyi2phosts.extsources.models import ExternalSource
# validate config
validate_config(config)
# configure logger
if args.debug == True:
log_level = 'debug'
log_file = None
else:
log_level = config['log_level']
log_file = config['log_file']
log = get_logger(filename=log_file, log_level=log_level)
# we want open urls through proxy
proxy_handler = urllib2.ProxyHandler({'http': config['proxyurl']})
opener = urllib2.build_opener(proxy_handler)
all_sources = ExternalSource.objects.filter(active=True)
for source in all_sources:
log.debug('%s: starting work', source.name)
if source.last_modified:
last_modified = source.last_modified.strftime('%a, %d %b %Y %H:%M:%S GMT')
# prevent redownloading of hosts-file by passing If-Modified-Since http header
opener.addheaders = [('If-Modified-Since', last_modified)]
log.debug('%s: appending If-Modified-Since: %s', source.name, last_modified)
if source.etag:
opener.addheaders = [('If-None-Match', source.etag)]
log.debug('%s: appending If-None-Match: %s', source.name, source.etag)
try:
log.debug('%s: sending GET...', source.name)
resp = opener.open(source.url, timeout=60)
except urllib2.URLError, e:
if hasattr(e, 'reason'):
log.warning('%s: failed to reach server, reason: %s', source.name, e.reason)
elif hasattr(e, 'code'):
if e.code == 304:
log.info('%s: not modified', source.name)
source.last_success = datetime.datetime.utcnow()
source.save()
else:
log.warning('%s: %s can\'t finish the request, error code: %s',
source.name, e.code)
continue
# read data from remote and write it to local file
try:
log.debug('%s: reading response data', source.name)
content = resp.read()
except:
log.warning('%s: failed to read data', source.name)
continue
# save fetched content into temporary file
fd, tmpfile = tempfile.mkstemp(text=True)
f = os.fdopen(fd, 'w')
f.write(content)
f.close()
# get last-modified info from header
lm = resp.headers.get('Last-Modified')
if lm:
log.debug('%s: Last-Modified: %s', source.name, lm)
source.last_modified = datetime.datetime.strptime(lm, '%a, %d %b %Y %H:%M:%S GMT')
# get ETag
etag = resp.headers.get('ETag')
if etag:
log.debug('%s: ETag: %s', source.name, etag)
source.etag = etag
# form commnd-line for invoke injector
log.info('%s: adding hosts...', source.name)
sp_args = ['py-i2phosts-injector', '-s', '-f', tmpfile, '-d',
'Auto-added from ' + source.name]
p = subprocess.Popen(sp_args, shell=False, stdin=None,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out = p.communicate()[0]
os.remove(tmpfile)
log.info('%s: injector output: \n%s', source.name, out)
# update last_success
source.last_success = datetime.datetime.utcnow()
log.debug('%s: updating last_success timestamp: %s', source.name, source.last_success)
source.save()