|
|
@ -183,18 +183,6 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET |
|
|
|
$hostRobots = $host->robots; |
|
|
|
$hostRobots = $host->robots; |
|
|
|
$hostRobotsPostfix = $host->robotsPostfix; |
|
|
|
$hostRobotsPostfix = $host->robotsPostfix; |
|
|
|
|
|
|
|
|
|
|
|
// Increase page rank when link does not match the current host |
|
|
|
|
|
|
|
if ($hostURL->scheme . '://' . |
|
|
|
|
|
|
|
$hostURL->name . |
|
|
|
|
|
|
|
($hostURL->port ? ':' . $hostURL->port : '') |
|
|
|
|
|
|
|
!= |
|
|
|
|
|
|
|
$queueHostPage->scheme . '://' . |
|
|
|
|
|
|
|
$queueHostPage->name . |
|
|
|
|
|
|
|
($queueHostPage->port ? ':' . $queueHostPage->port : '')) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Register new host |
|
|
|
// Register new host |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
|
|
|
|
|
|
|
@ -248,6 +236,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Increase page rank when link does not match the current host |
|
|
|
|
|
|
|
if ($hostURL->scheme . '://' . |
|
|
|
|
|
|
|
$hostURL->name . |
|
|
|
|
|
|
|
($hostURL->port ? ':' . $hostURL->port : '') |
|
|
|
|
|
|
|
!= |
|
|
|
|
|
|
|
$queueHostPage->scheme . '://' . |
|
|
|
|
|
|
|
$queueHostPage->name . |
|
|
|
|
|
|
|
($queueHostPage->port ? ':' . $queueHostPage->port : '')) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
$db->commit(); |
|
|
|
$db->commit(); |
|
|
|
|
|
|
|
|
|
|
|
} catch(Exception $e){ |
|
|
|
} catch(Exception $e){ |
|
|
|