Browse Source

Exception free retrieve_url()

Set 2 second timeout and handle any possible connection error
Return empty string to engine in case of connection exception
adaptive-webui-19844
DoumanAsh 10 years ago
parent
commit
86a0eaf317
  1. 16
      src/searchengine/nova/helpers.py
  2. 16
      src/searchengine/nova3/helpers.py

16
src/searchengine/nova/helpers.py

@ -22,7 +22,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.34 #VERSION: 1.40
# Author: # Author:
# Christophe DUMEZ (chris@qbittorrent.org) # Christophe DUMEZ (chris@qbittorrent.org)
@ -55,17 +55,21 @@ def htmlentitydecode(s):
return unichr(htmlentitydefs.name2codepoint[entity]) return unichr(htmlentitydefs.name2codepoint[entity])
return u" " # Unknown entity: We replace with a space. return u" " # Unknown entity: We replace with a space.
t = re.sub(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint), entity2char, s) t = re.sub(u'&(%s);' % u'|'.join(htmlentitydefs.name2codepoint), entity2char, s)
# Then convert numerical entities (such as é) # Then convert numerical entities (such as é)
t = re.sub(u'&#(\d+);', lambda x: unichr(int(x.group(1))), t) t = re.sub(u'&#(\d+);', lambda x: unichr(int(x.group(1))), t)
# Then convert hexa entities (such as é) # Then convert hexa entities (such as é)
return re.sub(u'&#x(\w+);', lambda x: unichr(int(x.group(1),16)), t) return re.sub(u'&#x(\w+);', lambda x: unichr(int(x.group(1),16)), t)
def retrieve_url(url): def retrieve_url(url):
""" Return the content of the url page as a string """ """ Return the content of the url page as a string """
req = urllib2.Request(url, headers = headers) req = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(req) try:
response = urllib2.urlopen(req)
except urllib2.URLError as errno:
print(" ".join(("Connection error:", str(errno.reason))))
return ""
dat = response.read() dat = response.read()
# Check if it is gzipped # Check if it is gzipped
if dat[:2] == '\037\213': if dat[:2] == '\037\213':
@ -101,7 +105,7 @@ def download_file(url, referer=None):
gzipper = gzip.GzipFile(fileobj=compressedstream) gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read() extracted_data = gzipper.read()
dat = extracted_data dat = extracted_data
# Write it to a file # Write it to a file
file.write(dat) file.write(dat)
file.close() file.close()

16
src/searchengine/nova3/helpers.py

@ -22,7 +22,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
#VERSION: 1.35 #VERSION: 1.40
# Author: # Author:
# Christophe DUMEZ (chris@qbittorrent.org) # Christophe DUMEZ (chris@qbittorrent.org)
@ -55,17 +55,21 @@ def htmlentitydecode(s):
return chr(html.entities.name2codepoint[entity]) return chr(html.entities.name2codepoint[entity])
return " " # Unknown entity: We replace with a space. return " " # Unknown entity: We replace with a space.
t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s) t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
# Then convert numerical entities (such as é) # Then convert numerical entities (such as é)
t = re.sub('&#(\d+);', lambda x: chr(int(x.group(1))), t) t = re.sub('&#(\d+);', lambda x: chr(int(x.group(1))), t)
# Then convert hexa entities (such as é) # Then convert hexa entities (such as é)
return re.sub('&#x(\w+);', lambda x: chr(int(x.group(1),16)), t) return re.sub('&#x(\w+);', lambda x: chr(int(x.group(1),16)), t)
def retrieve_url(url): def retrieve_url(url):
""" Return the content of the url page as a string """ """ Return the content of the url page as a string """
req = urllib.request.Request(url, headers = headers) req = urllib.request.Request(url, headers = headers)
response = urllib.request.urlopen(req) try:
response = urllib.request.urlopen(req)
except urllib.error.URLError as errno:
print(" ".join(("Connection error:", str(errno.reason))))
return ""
dat = response.read() dat = response.read()
# Check if it is gzipped # Check if it is gzipped
if dat[:2] == b'\x1f\x8b': if dat[:2] == b'\x1f\x8b':
@ -102,7 +106,7 @@ def download_file(url, referer=None):
gzipper = gzip.GzipFile(fileobj=compressedstream) gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read() extracted_data = gzipper.read()
dat = extracted_data dat = extracted_data
# Write it to a file # Write it to a file
file.write(dat) file.write(dat)
file.close() file.close()

Loading…
Cancel
Save