Browse Source

- Improve gzip file detection in HTTP downloader (do not trust HTTP header, check magic number)

adaptive-webui-19844
Christophe Dumez 15 years ago
parent
commit
949b4ce4e9
  1. 7
      src/search_engine/helpers.py

7
src/search_engine/helpers.py

@ -84,15 +84,14 @@ def download_file(url, referer=None): @@ -84,15 +84,14 @@ def download_file(url, referer=None):
req.add_header('referer', referer)
response = urllib2.urlopen(req)
dat = response.read()
# Check if data is gzip encoded
response_info = response.info()
content_encoding = response_info.get('Content-Encoding')
if content_encoding is not None and 'gzip' in content_encoding:
# Check if it is gzipped
if dat[:2] == '\037\213':
# Data is gzip encoded, decode it
compressedstream = StringIO.StringIO(dat)
gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read()
dat = extracted_data
# Write it to a file
file.write(dat)
file.close()

Loading…
Cancel
Save