mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-09-02 17:22:56 +00:00
fix host page mime detection
This commit is contained in:
parent
c07d6af52f
commit
93c6067fd9
@ -449,24 +449,20 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse index MIME
|
||||
$hostPageIsDom = false;
|
||||
// Parse MIME
|
||||
$hostPageIsHtml = false;
|
||||
$hostPageInMime = false;
|
||||
|
||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_INDEX) as $mime) {
|
||||
|
||||
$mime = Filter::mime($mime);
|
||||
|
||||
// Check for DOM
|
||||
if (false !== stripos('text/html', $mime)) {
|
||||
|
||||
$hostPageIsDom = true;
|
||||
$hostPageInMime = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Ban page on MIME type not allowed in settings
|
||||
if (false !== stripos(Filter::mime($contentType), $mime)) {
|
||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||
|
||||
// Check for HTML page
|
||||
if (false !== stripos(Filter::mime($contentType), 'text/html')) {
|
||||
|
||||
$hostPageIsHtml = true;
|
||||
}
|
||||
|
||||
$hostPageInMime = true;
|
||||
break;
|
||||
@ -503,7 +499,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
$yggoManifest = null;
|
||||
|
||||
// Is DOM content
|
||||
if ($hostPageIsDom) {
|
||||
if ($hostPageIsHtml) {
|
||||
|
||||
// Parse content
|
||||
$dom = new DomDocument();
|
||||
@ -598,10 +594,8 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
|
||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_LOCAL) as $mime) {
|
||||
|
||||
$mime = Filter::mime($mime);
|
||||
|
||||
// MIME type allowed in settings
|
||||
if (false !== stripos(Filter::mime($contentType), $mime)) {
|
||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||
|
||||
$snapLocal = true;
|
||||
break;
|
||||
@ -614,10 +608,8 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
|
||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_MEGA) as $mime) {
|
||||
|
||||
$mime = Filter::mime($mime);
|
||||
|
||||
// MIME type allowed in settings
|
||||
if (false !== stripos(Filter::mime($contentType), $mime)) {
|
||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||
|
||||
$snapMega = true;
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user