Browse Source

fix PR update condition

main
ghost 2 years ago
parent
commit
5875dd58c9
  1. 24
      crontab/crawler.php

24
crontab/crawler.php

@ -183,18 +183,6 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
$hostRobots = $host->robots; $hostRobots = $host->robots;
$hostRobotsPostfix = $host->robotsPostfix; $hostRobotsPostfix = $host->robotsPostfix;
// Increase page rank when link does not match the current host
if ($hostURL->scheme . '://' .
$hostURL->name .
($hostURL->port ? ':' . $hostURL->port : '')
!=
$queueHostPage->scheme . '://' .
$queueHostPage->name .
($queueHostPage->port ? ':' . $queueHostPage->port : '')) {
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1);
}
// Register new host // Register new host
} else { } else {
@ -248,6 +236,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
} }
} }
// Increase page rank when link does not match the current host
if ($hostURL->scheme . '://' .
$hostURL->name .
($hostURL->port ? ':' . $hostURL->port : '')
!=
$queueHostPage->scheme . '://' .
$queueHostPage->name .
($queueHostPage->port ? ':' . $queueHostPage->port : '')) {
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1);
}
$db->commit(); $db->commit();
} catch(Exception $e){ } catch(Exception $e){

Loading…
Cancel
Save