Browse Source

- Improve gzip file detection in HTTP downloader (do not trust HTTP header, check magic number)

adaptive-webui-19844
Christophe Dumez 15 years ago
parent
commit
949b4ce4e9
  1. 7
      src/search_engine/helpers.py

7
src/search_engine/helpers.py

@ -84,15 +84,14 @@ def download_file(url, referer=None):
req.add_header('referer', referer) req.add_header('referer', referer)
response = urllib2.urlopen(req) response = urllib2.urlopen(req)
dat = response.read() dat = response.read()
# Check if data is gzip encoded # Check if it is gzipped
response_info = response.info() if dat[:2] == '\037\213':
content_encoding = response_info.get('Content-Encoding')
if content_encoding is not None and 'gzip' in content_encoding:
# Data is gzip encoded, decode it # Data is gzip encoded, decode it
compressedstream = StringIO.StringIO(dat) compressedstream = StringIO.StringIO(dat)
gzipper = gzip.GzipFile(fileobj=compressedstream) gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read() extracted_data = gzipper.read()
dat = extracted_data dat = extracted_data
# Write it to a file # Write it to a file
file.write(dat) file.write(dat)
file.close() file.close()

Loading…
Cancel
Save