From 1d5d5ead5de77ee4a0bd5d83a0c2efb6bf775fb5 Mon Sep 17 00:00:00 2001 From: ghost <noreply@localhost> Date: Wed, 14 Jun 2023 02:20:00 +0300 Subject: [PATCH] fix DomDocument initiation without encoding provided --- crontab/crawler.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crontab/crawler.php b/crontab/crawler.php index bcc4fa2..de232e0 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -496,7 +496,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND // Parse content $dom = new DomDocument(); - @$dom->loadHTML($content); + @$dom->loadHTML(sprintf('<?xml encoding="%s" ?>', mb_detect_encoding($content)) . $content); // Skip index page links without titles $title = @$dom->getElementsByTagName('title');