Browse Source

add link tag support

main
ghost 1 year ago
parent
commit
b433fa6b3c
  1. 19
      crontab/crawler.php

19
crontab/crawler.php

@ -954,6 +954,25 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND @@ -954,6 +954,25 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
];
}
foreach (@$dom->getElementsByTagName('link') as $link) {
// Skip nodes without href attribute
if (!$href = @$link->getAttribute('href')) {
continue;
}
// Add link to queue
$links[] = [
'title' => null,
'description' => null,
'keywords' => null,
'data' => null,
'mime' => null,
'ref' => $href,
];
}
// Collect internal links from page content
foreach(@$dom->getElementsByTagName('a') as $a) {

Loading…
Cancel
Save