Browse Source

Merge pull request #2670 from DoumanAsh/safe_url_retrieve

[search engine] Exception free url_retrieve
adaptive-webui-19844
sledgehammer999 10 years ago
parent
commit
e502ce38ec
  1. 16
      src/searchengine/nova/helpers.py
  2. 16
      src/searchengine/nova3/helpers.py

16
src/searchengine/nova/helpers.py

@ -22,7 +22,7 @@ @@ -22,7 +22,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.34
#VERSION: 1.40
# Author:
# Christophe DUMEZ (chris@qbittorrent.org)
@ -55,17 +55,21 @@ def htmlentitydecode(s): @@ -55,17 +55,21 @@ def htmlentitydecode(s):
return unichr(htmlentitydefs.name2codepoint[entity])
return u" " # Unknown entity: We replace with a space.
t = re.sub(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint), entity2char, s)
# Then convert numerical entities (such as é)
t = re.sub(u'&#(\d+);', lambda x: unichr(int(x.group(1))), t)
# Then convert hexa entities (such as é)
return re.sub(u'&#x(\w+);', lambda x: unichr(int(x.group(1),16)), t)
def retrieve_url(url):
""" Return the content of the url page as a string """
req = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(req)
try:
response = urllib2.urlopen(req)
except urllib2.URLError as errno:
print(" ".join(("Connection error:", str(errno.reason))))
return ""
dat = response.read()
# Check if it is gzipped
if dat[:2] == '\037\213':
@ -101,7 +105,7 @@ def download_file(url, referer=None): @@ -101,7 +105,7 @@ def download_file(url, referer=None):
gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read()
dat = extracted_data
# Write it to a file
file.write(dat)
file.close()

16
src/searchengine/nova3/helpers.py

@ -22,7 +22,7 @@ @@ -22,7 +22,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.35
#VERSION: 1.40
# Author:
# Christophe DUMEZ (chris@qbittorrent.org)
@ -55,17 +55,21 @@ def htmlentitydecode(s): @@ -55,17 +55,21 @@ def htmlentitydecode(s):
return chr(html.entities.name2codepoint[entity])
return " " # Unknown entity: We replace with a space.
t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
# Then convert numerical entities (such as é)
t = re.sub('&#(\d+);', lambda x: chr(int(x.group(1))), t)
# Then convert hexa entities (such as é)
return re.sub('&#x(\w+);', lambda x: chr(int(x.group(1),16)), t)
def retrieve_url(url):
""" Return the content of the url page as a string """
req = urllib.request.Request(url, headers = headers)
response = urllib.request.urlopen(req)
try:
response = urllib.request.urlopen(req)
except urllib.error.URLError as errno:
print(" ".join(("Connection error:", str(errno.reason))))
return ""
dat = response.read()
# Check if it is gzipped
if dat[:2] == b'\x1f\x8b':
@ -102,7 +106,7 @@ def download_file(url, referer=None): @@ -102,7 +106,7 @@ def download_file(url, referer=None):
gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read()
dat = extracted_data
# Write it to a file
file.write(dat)
file.close()

Loading…
Cancel
Save