Browse Source

add page rank update optional in the crawl queue

main
ghost 1 year ago
parent
commit
5df59661d8
  1. 10
      config/app.php.txt
  2. 29
      crontab/crawler.php

10
config/app.php.txt

@ -321,6 +321,16 @@ define('CRAWL_HOST_DEFAULT_NSFW', false);
*/ */
define('CRAWL_SITEMAPS', true); define('CRAWL_SITEMAPS', true);
/*
* Re-calculate page rank on page update
*
* When enabled, may enlarge execution time
*
* true|false
*
*/
define('CRAWL_PAGE_RANK_UPDATE', true);
/* /*
* Renew robots.txt index by timing offset provided * Renew robots.txt index by timing offset provided
* *

29
crontab/crawler.php

@ -357,26 +357,29 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
$httpRequestsTimeTotal += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
// Update page rank // Update page rank
// @TODO add common method if (CRAWL_PAGE_RANK_UPDATE) {
$hostPageRank = 0; // @TODO add common method
// Get referrers $hostPageRank = 0;
foreach ($db->getHostPagesToHostPageByHostPageIdTarget($queueHostPage->hostPageId) as $hostPageToHostPageByHostPageIdTarget) {
// Get source page details // Get referrers
if ($hostPageSource = $db->getHostPage($hostPageToHostPageByHostPageIdTarget->hostPageIdSource)) { foreach ($db->getHostPagesToHostPageByHostPageIdTarget($queueHostPage->hostPageId) as $hostPageToHostPageByHostPageIdTarget) {
// Increase PR on external referrer only // Get source page details
if ($hostPageSource->hostId != $queueHostPage->hostId) { if ($hostPageSource = $db->getHostPage($hostPageToHostPageByHostPageIdTarget->hostPageIdSource)) {
$hostPageRank++; // Increase PR on external referrer only
} if ($hostPageSource->hostId != $queueHostPage->hostId) {
$hostPageRank++;
}
// Delegate page rank value from redirected pages // Delegate page rank value from redirected pages
if (false !== strpos($hostPageSource->httpCode, '30')) { if (false !== strpos($hostPageSource->httpCode, '30')) {
$hostPageRank += $hostPageSource->rank; $hostPageRank += $hostPageSource->rank;
}
} }
} }
} }

Loading…
Cancel
Save