Browse Source

fix crawler for usage with python3

master
R4SAS 5 years ago
parent
commit
ae1fd2ef97
  1. 25
      api/crawler.py

25
api/crawler.py

@ -7,12 +7,11 @@
import psycopg2 import psycopg2
import json import json
import socket import socket
import sys
import time import time
import ipaddress import ipaddress
import traceback import traceback
from threading import Lock, Thread from threading import Lock, Thread
from Queue import Queue from queue import Queue
# Configuration to use TCP connection or unix domain socket for admin connection to yggdrasil # Configuration to use TCP connection or unix domain socket for admin connection to yggdrasil
useAdminSock = True useAdminSock = True
@ -72,18 +71,18 @@ def recv_until_done(soc):
soc.close() soc.close()
break break
all_data.append(incoming_data) all_data.append(incoming_data)
return ''.join(all_data) return b''.join(all_data)
def getDHTPingRequest(key, coords, target=None): def getDHTPingRequest(key, coords, target=None):
if target: if target:
return '{{"request":"dhtPing", "box_pub_key":"{}", "coords":"{}", "target":"{}"}}'.format(key, coords, target) return '{{"request":"dhtPing", "box_pub_key":"{}", "coords":"{}", "target":"{}"}}'.format(key, coords, target).encode('utf-8')
else: else:
return '{{"request":"dhtPing", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords) return '{{"request":"dhtPing", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords).encode('utf-8')
def getNodeInfoRequest(key, coords): def getNodeInfoRequest(key, coords):
return '{{"request":"getNodeInfo", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords) return '{{"request":"getNodeInfo", "box_pub_key":"{}", "coords":"{}"}}'.format(key, coords).encode('utf-8')
def getNodeInfoTask(address, info): def getNodeInfoTask(address, info):
@ -158,7 +157,7 @@ def insert_new_entry(ipv6, coords):
cur.close() cur.close()
dbconn.close() dbconn.close()
except Exception as e: except Exception as e:
print "database error inserting" print("database error inserting")
traceback.print_exc() traceback.print_exc()
def handleNodeInfo(address, data): def handleNodeInfo(address, data):
@ -187,7 +186,7 @@ def handleResponse(address, info, data):
return return
if 'nodes' not in data['response']: if 'nodes' not in data['response']:
return return
for addr,rumor in data['response']['nodes'].iteritems(): for addr,rumor in data['response']['nodes'].items():
if addr in visited: continue if addr in visited: continue
rumored[addr] = rumor rumored[addr] = rumor
if address not in visited: if address not in visited:
@ -198,16 +197,16 @@ def handleResponse(address, info, data):
nodeinfopool.add(getNodeInfoTask, address, info) nodeinfopool.add(getNodeInfoTask, address, info)
# Get self info # Get self info
selfInfo = doRequest('{"request":"getSelf"}') selfInfo = doRequest('{"request":"getSelf"}'.encode('utf-8'))
# Initialize dicts of visited/rumored nodes # Initialize dicts of visited/rumored nodes
for k,v in selfInfo['response']['self'].iteritems(): for k,v in selfInfo['response']['self'].items():
rumored[k] = v rumored[k] = v
# Loop over rumored nodes and ping them, adding to visited if they respond # Loop over rumored nodes and ping them, adding to visited if they respond
while len(rumored) > 0: while len(rumored) > 0:
for k,v in rumored.iteritems(): for k,v in rumored.items():
#print "Processing", v['coords'] #print("Processing", v['coords'])
handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords']))) handleResponse(k, v, doRequest(getDHTPingRequest(v['box_pub_key'], v['coords'])))
break break
del rumored[k] del rumored[k]
@ -215,6 +214,6 @@ while len(rumored) > 0:
nodeinfopool.wait() nodeinfopool.wait()
for x, y in visited.iteritems(): for x, y in visited.items():
if valid_ipv6_check(x) and check_coords(y): if valid_ipv6_check(x) and check_coords(y):
insert_new_entry(x, y) insert_new_entry(x, y)

Loading…
Cancel
Save