From b433fa6b3c42f523a40b8c8a364295a51ef48cb0 Mon Sep 17 00:00:00 2001 From: ghost Date: Sun, 30 Jul 2023 00:17:28 +0300 Subject: [PATCH] add link tag support --- crontab/crawler.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crontab/crawler.php b/crontab/crawler.php index e30ef4d..2486778 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -954,6 +954,25 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND ]; } + foreach (@$dom->getElementsByTagName('link') as $link) { + + // Skip nodes without href attribute + if (!$href = @$link->getAttribute('href')) { + + continue; + } + + // Add link to queue + $links[] = [ + 'title' => null, + 'description' => null, + 'keywords' => null, + 'data' => null, + 'mime' => null, + 'ref' => $href, + ]; + } + // Collect internal links from page content foreach(@$dom->getElementsByTagName('a') as $a) {