mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-03-13 05:41:02 +00:00
skip robots:noindex page without extra actions
This commit is contained in:
parent
00140e30a8
commit
a5f5541395
@ -106,17 +106,7 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
|
|||||||
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
||||||
if (false !== stripos($metaRobots, 'noindex')) {
|
if (false !== stripos($metaRobots, 'noindex')) {
|
||||||
|
|
||||||
$robots = new Robots($queueHostPage->robots);
|
continue;
|
||||||
$robotsPostfix = new Robots($queueHostPage->robotsPostfix);
|
|
||||||
|
|
||||||
// Ignore URI if does not match existing rules yet
|
|
||||||
if ($robotsPostfix->uriAllowed($queueHostPage->uri) &&
|
|
||||||
$robots->uriAllowed($queueHostPage->uri)) {
|
|
||||||
|
|
||||||
$robotsPostfix->append('Disallow:', $queueHostPage->uri);
|
|
||||||
|
|
||||||
$db->updateHostRobotsPostfix($queueHostPage->hostId, $robotsPostfix->getData(), time());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip page links following by robots:nofollow attribute detected
|
// Skip page links following by robots:nofollow attribute detected
|
||||||
|
@ -74,15 +74,6 @@ class MySQL {
|
|||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function updateHostRobotsPostfix(int $hostId, mixed $robotsPostfix, int $timeUpdated) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `host` SET `robotsPostfix` = ?, `timeUpdated` = ? WHERE `hostId` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$robotsPostfix, $timeUpdated, $hostId]);
|
|
||||||
|
|
||||||
return $query->rowCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pages
|
// Pages
|
||||||
public function getTotalHostPages(int $hostId) {
|
public function getTotalHostPages(int $hostId) {
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user