mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-03-13 05:41:02 +00:00
skip xmpp links
This commit is contained in:
parent
06c136f05c
commit
1655ec63b2
@ -362,13 +362,13 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
// Update page index anyway, with the current time and http code
|
||||
$hostPagesProcessed += $db->updateHostPageCrawlQueue($queueHostPage->hostPageId, time(), $curl->getCode(), $curl->getSizeDownload());
|
||||
|
||||
// This page has on 200 code
|
||||
// This page not available
|
||||
if (200 != $curl->getCode()) {
|
||||
|
||||
// Ban this page
|
||||
$hostPagesBanned += $db->updateHostPageTimeBanned($queueHostPage->hostPageId, time());
|
||||
|
||||
// Try to receive target page location on page redirect available
|
||||
// Try to receive target page location on page redirect available by following location
|
||||
$curl = new Curl($queueHostPage->hostPageURL, CRAWL_CURLOPT_USERAGENT, 10, true, true);
|
||||
|
||||
// Update curl stats
|
||||
@ -1028,6 +1028,12 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip xmpp links
|
||||
if (false !== stripos($href, 'xmpp:')) {
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip x-raw-image links
|
||||
/*
|
||||
if (false !== stripos($href, 'x-raw-image:')) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user