mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-07 20:34:28 +00:00
ban host page on encoding not detected
This commit is contained in:
parent
d2469e9adc
commit
d96abb8ea8
@ -494,7 +494,18 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
// Parse content
|
// Parse content
|
||||||
$dom = new DomDocument();
|
$dom = new DomDocument();
|
||||||
|
|
||||||
@$dom->loadHTML(sprintf('<?xml encoding="%s" ?>', mb_detect_encoding($content)) . $content);
|
if ($encoding = mb_detect_encoding($content)) {
|
||||||
|
|
||||||
|
@$dom->loadHTML(sprintf('<?xml encoding="%s" ?>', $encoding) . $content);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
$hostPagesBanned += $db->updateHostPageTimeBanned($queueHostPage->hostPageId, time());
|
||||||
|
|
||||||
|
$db->commit();
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Skip index page links without titles
|
// Skip index page links without titles
|
||||||
$title = @$dom->getElementsByTagName('title');
|
$title = @$dom->getElementsByTagName('title');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user