Browse Source

add page rank update optional in the crawl queue

main
ghost 1 year ago
parent
commit
5df59661d8
  1. 10
      config/app.php.txt
  2. 3
      crontab/crawler.php

10
config/app.php.txt

@ -321,6 +321,16 @@ define('CRAWL_HOST_DEFAULT_NSFW', false); @@ -321,6 +321,16 @@ define('CRAWL_HOST_DEFAULT_NSFW', false);
*/
define('CRAWL_SITEMAPS', true);
/*
* Re-calculate page rank on page update
*
* When enabled, may enlarge execution time
*
* true|false
*
*/
define('CRAWL_PAGE_RANK_UPDATE', true);
/*
* Renew robots.txt index by timing offset provided
*

3
crontab/crawler.php

@ -357,6 +357,8 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND @@ -357,6 +357,8 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
$httpRequestsTimeTotal += $curl->getTotalTime();
// Update page rank
if (CRAWL_PAGE_RANK_UPDATE) {
// @TODO add common method
$hostPageRank = 0;
@ -380,6 +382,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND @@ -380,6 +382,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
}
}
}
}
// Update registry
$db->updateHostPageRank($queueHostPage->hostPageId, $hostPageRank);

Loading…
Cancel
Save