diff --git a/crontab/crawler.php b/crontab/crawler.php index ed4969c..0258be8 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -183,18 +183,6 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET $hostRobots = $host->robots; $hostRobotsPostfix = $host->robotsPostfix; - // Increase page rank when link does not match the current host - if ($hostURL->scheme . '://' . - $hostURL->name . - ($hostURL->port ? ':' . $hostURL->port : '') - != - $queueHostPage->scheme . '://' . - $queueHostPage->name . - ($queueHostPage->port ? ':' . $queueHostPage->port : '')) { - - $db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1); - } - // Register new host } else { @@ -248,6 +236,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET } } + // Increase page rank when link does not match the current host + if ($hostURL->scheme . '://' . + $hostURL->name . + ($hostURL->port ? ':' . $hostURL->port : '') + != + $queueHostPage->scheme . '://' . + $queueHostPage->name . + ($queueHostPage->port ? ':' . $queueHostPage->port : '')) { + + $db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1); + } + $db->commit(); } catch(Exception $e){