From f9cf414901bc1503215feb3bd31974dfb899aefd Mon Sep 17 00:00:00 2001 From: ghost Date: Thu, 17 Aug 2023 18:56:29 +0300 Subject: [PATCH] reduce quantity of http requests for each page in queue by CRAWL_HOST_PAGE_SECONDS_DELAY setting --- src/config/app.php.example | 8 ++++++++ src/crontab/crawler.php | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/src/config/app.php.example b/src/config/app.php.example index 36245bf..6b815b2 100644 --- a/src/config/app.php.example +++ b/src/config/app.php.example @@ -264,6 +264,14 @@ define('CRAWL_HOST_PAGE_QUEUE_LIMIT', 10); */ define('CRAWL_HOST_PAGE_QUEUE_SECONDS_OFFSET', 60*60*24*30*12); +/* + * Reduce quantity of http requests for each page in queue + * + * int|false + * + */ +define('CRAWL_HOST_PAGE_SECONDS_DELAY', 1); + /* * Re-calculate page rank on page update * diff --git a/src/crontab/crawler.php b/src/crontab/crawler.php index d7ec768..95c38ca 100644 --- a/src/crontab/crawler.php +++ b/src/crontab/crawler.php @@ -1106,6 +1106,12 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_ // Apply changes $db->commit(); + // Reduce quantity of http requests for each page in queue + if (CRAWL_HOST_PAGE_SECONDS_DELAY) { + + sleep((int) CRAWL_HOST_PAGE_SECONDS_DELAY); + } + // Process update errors } catch (Exception $e) {