diff --git a/crontab/crawler.php b/crontab/crawler.php index e30ef4d..2486778 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -954,6 +954,25 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND ]; } + foreach (@$dom->getElementsByTagName('link') as $link) { + + // Skip nodes without href attribute + if (!$href = @$link->getAttribute('href')) { + + continue; + } + + // Add link to queue + $links[] = [ + 'title' => null, + 'description' => null, + 'keywords' => null, + 'data' => null, + 'mime' => null, + 'ref' => $href, + ]; + } + // Collect internal links from page content foreach(@$dom->getElementsByTagName('a') as $a) {