From a1e27218491b3c4dcdc99b0f6f6d6c33df3ced7d Mon Sep 17 00:00:00 2001 From: ghost Date: Wed, 6 Sep 2023 00:34:59 +0300 Subject: [PATCH] skip links collect with rel=nofollow attribute --- src/crontab/crawler.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/crontab/crawler.php b/src/crontab/crawler.php index 95c38ca..44218fd 100644 --- a/src/crontab/crawler.php +++ b/src/crontab/crawler.php @@ -936,6 +936,15 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_ continue; } + // Skip links have rel=nofollow attribute + if ($rel = @$a->getAttribute('rel')) { + + if (false !== stripos($rel, 'nofollow')) { + + continue; + } + } + // Get title attribute if available if (!$title = @$a->getAttribute('title')) { $title = null;