Browse Source

reduce quantity of http requests for each page in queue by CRAWL_HOST_PAGE_SECONDS_DELAY setting

main
ghost 1 year ago
parent
commit
f9cf414901
  1. 8
      src/config/app.php.example
  2. 6
      src/crontab/crawler.php

8
src/config/app.php.example

@ -264,6 +264,14 @@ define('CRAWL_HOST_PAGE_QUEUE_LIMIT', 10); @@ -264,6 +264,14 @@ define('CRAWL_HOST_PAGE_QUEUE_LIMIT', 10);
*/
define('CRAWL_HOST_PAGE_QUEUE_SECONDS_OFFSET', 60*60*24*30*12);
/*
* Reduce quantity of http requests for each page in queue
*
* int|false
*
*/
define('CRAWL_HOST_PAGE_SECONDS_DELAY', 1);
/*
* Re-calculate page rank on page update
*

6
src/crontab/crawler.php

@ -1106,6 +1106,12 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_ @@ -1106,6 +1106,12 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
// Apply changes
$db->commit();
// Reduce quantity of http requests for each page in queue
if (CRAWL_HOST_PAGE_SECONDS_DELAY) {
sleep((int) CRAWL_HOST_PAGE_SECONDS_DELAY);
}
// Process update errors
} catch (Exception $e) {

Loading…
Cancel
Save