diff --git a/src/config/app.php.example b/src/config/app.php.example index 36245bf..6b815b2 100644 --- a/src/config/app.php.example +++ b/src/config/app.php.example @@ -264,6 +264,14 @@ define('CRAWL_HOST_PAGE_QUEUE_LIMIT', 10); */ define('CRAWL_HOST_PAGE_QUEUE_SECONDS_OFFSET', 60*60*24*30*12); +/* + * Reduce quantity of http requests for each page in queue + * + * int|false + * + */ +define('CRAWL_HOST_PAGE_SECONDS_DELAY', 1); + /* * Re-calculate page rank on page update * diff --git a/src/crontab/crawler.php b/src/crontab/crawler.php index d7ec768..95c38ca 100644 --- a/src/crontab/crawler.php +++ b/src/crontab/crawler.php @@ -1106,6 +1106,12 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_ // Apply changes $db->commit(); + // Reduce quantity of http requests for each page in queue + if (CRAWL_HOST_PAGE_SECONDS_DELAY) { + + sleep((int) CRAWL_HOST_PAGE_SECONDS_DELAY); + } + // Process update errors } catch (Exception $e) {