mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
skip robots:noindex page without extra actions
This commit is contained in:
parent
00140e30a8
commit
a5f5541395
@ -106,17 +106,7 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
|
||||
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
||||
if (false !== stripos($metaRobots, 'noindex')) {
|
||||
|
||||
$robots = new Robots($queueHostPage->robots);
|
||||
$robotsPostfix = new Robots($queueHostPage->robotsPostfix);
|
||||
|
||||
// Ignore URI if does not match existing rules yet
|
||||
if ($robotsPostfix->uriAllowed($queueHostPage->uri) &&
|
||||
$robots->uriAllowed($queueHostPage->uri)) {
|
||||
|
||||
$robotsPostfix->append('Disallow:', $queueHostPage->uri);
|
||||
|
||||
$db->updateHostRobotsPostfix($queueHostPage->hostId, $robotsPostfix->getData(), time());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip page links following by robots:nofollow attribute detected
|
||||
|
@ -74,15 +74,6 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function updateHostRobotsPostfix(int $hostId, mixed $robotsPostfix, int $timeUpdated) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `host` SET `robotsPostfix` = ?, `timeUpdated` = ? WHERE `hostId` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$robotsPostfix, $timeUpdated, $hostId]);
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
// Pages
|
||||
public function getTotalHostPages(int $hostId) {
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user