mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-08-26 05:42:05 +00:00
fix image crawling errors
This commit is contained in:
parent
baa8b0d2f0
commit
d4f66c83e7
@ -266,12 +266,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
$robots = new Robots(($hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . ($hostRobotsPostfix ? (string) $hostRobotsPostfix : (string) CRAWL_ROBOTS_POSTFIX_RULES));
|
$robots = new Robots(($hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . ($hostRobotsPostfix ? (string) $hostRobotsPostfix : (string) CRAWL_ROBOTS_POSTFIX_RULES));
|
||||||
|
|
||||||
// Save image info
|
// Save image info
|
||||||
$hostImageId = $db->getHostImage($hostId, crc32($hostImageURI->string));
|
|
||||||
|
|
||||||
if ($hostStatus && // host enabled
|
if ($hostStatus && // host enabled
|
||||||
$robots->uriAllowed($hostImageURI->string) && // src allowed by robots.txt rules
|
$robots->uriAllowed($hostImageURI->string) && // src allowed by robots.txt rules
|
||||||
$hostImageLimit > $db->getTotalHostImages($hostId) && // images quantity not reached host limit
|
$hostImageLimit > $db->getTotalHostImages($hostId) && // images quantity not reached host limit
|
||||||
!$hostImageId) { // image not exists
|
!$hostImageId = $db->getHostImageId($hostId, crc32($hostImageURI->string))) { // image not exists
|
||||||
|
|
||||||
// Add host image
|
// Add host image
|
||||||
if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time(), null, 200)) {
|
if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time(), null, 200)) {
|
||||||
@ -284,19 +282,17 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add host image description
|
// Add/update host image description
|
||||||
$hostImageDescriptionCRC32id = crc32(md5((string) $imageAlt . (string) $imageTitle));
|
$db->setHostImageDescription($hostImageId,
|
||||||
|
crc32(md5((string) $imageAlt . (string) $imageTitle)),
|
||||||
|
Filter::imageAlt($imageAlt),
|
||||||
|
Filter::imageTitle($imageTitle),
|
||||||
|
time(),
|
||||||
|
time());
|
||||||
|
|
||||||
if (!$db->getHostImageDescription($hostImageId, $hostImageDescriptionCRC32id)) {
|
|
||||||
$db->addHostImageDescription($hostImageId, $hostImageDescriptionCRC32id, Filter::imageAlt($imageAlt), Filter::imageTitle($imageTitle), time());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Relate host image with host page was found
|
// Relate host image with host page was found
|
||||||
if (!$db->getHostImageToHostPage($hostImageId, $queueHostPage->hostPageId)) {
|
$db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), time(), 1);
|
||||||
$db->addHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), null, 1);
|
|
||||||
} else {
|
|
||||||
$db->updateHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Increase page rank when link does not match the current host
|
// Increase page rank when link does not match the current host
|
||||||
if ($hostImageURL->scheme . '://' .
|
if ($hostImageURL->scheme . '://' .
|
||||||
@ -434,7 +430,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
if ($hostStatus && // host enabled
|
if ($hostStatus && // host enabled
|
||||||
$robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules
|
$robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules
|
||||||
$hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit
|
$hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit
|
||||||
!$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists
|
!$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists
|
||||||
|
|
||||||
if ($db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time())) {
|
if ($db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time())) {
|
||||||
|
|
||||||
|
Binary file not shown.
@ -103,13 +103,13 @@ class MySQL {
|
|||||||
return $query->fetch()->total;
|
return $query->fetch()->total;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getHostImage(int $hostId, int $crc32uri) {
|
public function getHostImageId(int $hostId, int $crc32uri) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1');
|
$query = $this->_db->prepare('SELECT `hostImageId` FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1');
|
||||||
|
|
||||||
$query->execute([$hostId, $crc32uri]);
|
$query->execute([$hostId, $crc32uri]);
|
||||||
|
|
||||||
return $query->fetch();
|
return $query->rowCount() ? $query->fetch()->hostImageId : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getHostImages(int $hostId) {
|
public function getHostImages(int $hostId) {
|
||||||
@ -208,24 +208,19 @@ class MySQL {
|
|||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getHostImageDescription(int $hostImageId, int $crc32id) {
|
public function setHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded, int $timeUpdated) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostImageDescription` WHERE `hostImageId` = ? AND `crc32id` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$hostImageId, $crc32id]);
|
|
||||||
|
|
||||||
return $query->fetch();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function addHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`,
|
$query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`,
|
||||||
`crc32id`,
|
`crc32id`,
|
||||||
`alt`,
|
`alt`,
|
||||||
`title`,
|
`title`,
|
||||||
`timeAdded`) VALUES (?, ?, ?, ?, ?)');
|
`timeAdded`) VALUES (?, ?, ?, ?, ?)
|
||||||
|
|
||||||
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded]);
|
ON DUPLICATE KEY UPDATE `alt` = ?,
|
||||||
|
`title` = ?,
|
||||||
|
`timeUpdated` = ?');
|
||||||
|
|
||||||
|
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded, $alt, $title, $timeUpdated]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
@ -239,15 +234,6 @@ class MySQL {
|
|||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getHostImageToHostPage(int $hostImageId, int $hostPageId) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ? AND `hostPageId` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$hostImageId, $hostPageId]);
|
|
||||||
|
|
||||||
return $query->fetch();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getHostImageHostPages(int $hostImageId) {
|
public function getHostImageHostPages(int $hostImageId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
|
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
|
||||||
@ -257,33 +243,22 @@ class MySQL {
|
|||||||
return $query->fetchAll();
|
return $query->fetchAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
|
public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
|
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
|
||||||
`hostPageId`,
|
`hostPageId`,
|
||||||
`timeAdded`,
|
`timeAdded`,
|
||||||
`timeUpdated`,
|
`timeUpdated`,
|
||||||
`quantity`) VALUES (?, ?, ?, ?, ?)');
|
`quantity`) VALUES (?, ?, ?, ?, ?)
|
||||||
|
|
||||||
$query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity]);
|
ON DUPLICATE KEY UPDATE `timeUpdated` = ?,
|
||||||
|
`quantity` = `quantity` + ' . (int) $quantity);
|
||||||
|
|
||||||
|
$query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity, $timeUpdated]);
|
||||||
|
|
||||||
return $query->rowCount(); // no primary key
|
return $query->rowCount(); // no primary key
|
||||||
}
|
}
|
||||||
|
|
||||||
public function updateHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, int $quantity) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `hostImageToHostPage` SET `quantity` = `quantity` + ' . (int) $quantity . ', `timeUpdated` = ?
|
|
||||||
|
|
||||||
WHERE `hostImageId` = ?
|
|
||||||
AND `hostPageId` = ?
|
|
||||||
|
|
||||||
LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$timeAdded, $hostImageId, $hostPageId]);
|
|
||||||
|
|
||||||
return $query->rowCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function deleteHostImageToHostPage(int $hostImageId) {
|
public function deleteHostImageToHostPage(int $hostImageId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('DELETE FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
|
$query = $this->_db->prepare('DELETE FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user