From 1f33205236d28ca4d5d57945228e8838181a29d4 Mon Sep 17 00:00:00 2001 From: ghost Date: Sun, 30 Jul 2023 00:52:55 +0300 Subject: [PATCH] add script tag support --- crontab/crawler.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crontab/crawler.php b/crontab/crawler.php index 2486778..98a2e8b 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -954,6 +954,25 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND ]; } + foreach (@$dom->getElementsByTagName('script') as $script) { + + // Skip nodes without href attribute + if (!$src = @$script->getAttribute('src')) { + + continue; + } + + // Add link to queue + $links[] = [ + 'title' => null, + 'description' => null, + 'keywords' => null, + 'data' => null, + 'mime' => null, + 'ref' => $src, + ]; + } + foreach (@$dom->getElementsByTagName('link') as $link) { // Skip nodes without href attribute