mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
fix image crawling errors
This commit is contained in:
parent
baa8b0d2f0
commit
d4f66c83e7
@ -266,12 +266,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
$robots = new Robots(($hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . ($hostRobotsPostfix ? (string) $hostRobotsPostfix : (string) CRAWL_ROBOTS_POSTFIX_RULES));
|
||||
|
||||
// Save image info
|
||||
$hostImageId = $db->getHostImage($hostId, crc32($hostImageURI->string));
|
||||
|
||||
if ($hostStatus && // host enabled
|
||||
$robots->uriAllowed($hostImageURI->string) && // src allowed by robots.txt rules
|
||||
$hostImageLimit > $db->getTotalHostImages($hostId) && // images quantity not reached host limit
|
||||
!$hostImageId) { // image not exists
|
||||
!$hostImageId = $db->getHostImageId($hostId, crc32($hostImageURI->string))) { // image not exists
|
||||
|
||||
// Add host image
|
||||
if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time(), null, 200)) {
|
||||
@ -284,19 +282,17 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
}
|
||||
}
|
||||
|
||||
// Add host image description
|
||||
$hostImageDescriptionCRC32id = crc32(md5((string) $imageAlt . (string) $imageTitle));
|
||||
// Add/update host image description
|
||||
$db->setHostImageDescription($hostImageId,
|
||||
crc32(md5((string) $imageAlt . (string) $imageTitle)),
|
||||
Filter::imageAlt($imageAlt),
|
||||
Filter::imageTitle($imageTitle),
|
||||
time(),
|
||||
time());
|
||||
|
||||
if (!$db->getHostImageDescription($hostImageId, $hostImageDescriptionCRC32id)) {
|
||||
$db->addHostImageDescription($hostImageId, $hostImageDescriptionCRC32id, Filter::imageAlt($imageAlt), Filter::imageTitle($imageTitle), time());
|
||||
}
|
||||
|
||||
// Relate host image with host page was found
|
||||
if (!$db->getHostImageToHostPage($hostImageId, $queueHostPage->hostPageId)) {
|
||||
$db->addHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), null, 1);
|
||||
} else {
|
||||
$db->updateHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), 1);
|
||||
}
|
||||
$db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), time(), 1);
|
||||
|
||||
// Increase page rank when link does not match the current host
|
||||
if ($hostImageURL->scheme . '://' .
|
||||
@ -434,7 +430,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
||||
if ($hostStatus && // host enabled
|
||||
$robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules
|
||||
$hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit
|
||||
!$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists
|
||||
!$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists
|
||||
|
||||
if ($db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time())) {
|
||||
|
||||
|
Binary file not shown.
@ -103,13 +103,13 @@ class MySQL {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function getHostImage(int $hostId, int $crc32uri) {
|
||||
public function getHostImageId(int $hostId, int $crc32uri) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1');
|
||||
$query = $this->_db->prepare('SELECT `hostImageId` FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$hostId, $crc32uri]);
|
||||
|
||||
return $query->fetch();
|
||||
return $query->rowCount() ? $query->fetch()->hostImageId : 0;
|
||||
}
|
||||
|
||||
public function getHostImages(int $hostId) {
|
||||
@ -208,24 +208,19 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function getHostImageDescription(int $hostImageId, int $crc32id) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostImageDescription` WHERE `hostImageId` = ? AND `crc32id` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$hostImageId, $crc32id]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function addHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded) {
|
||||
public function setHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded, int $timeUpdated) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`,
|
||||
`crc32id`,
|
||||
`alt`,
|
||||
`title`,
|
||||
`timeAdded`) VALUES (?, ?, ?, ?, ?)');
|
||||
`timeAdded`) VALUES (?, ?, ?, ?, ?)
|
||||
|
||||
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded]);
|
||||
ON DUPLICATE KEY UPDATE `alt` = ?,
|
||||
`title` = ?,
|
||||
`timeUpdated` = ?');
|
||||
|
||||
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded, $alt, $title, $timeUpdated]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
@ -239,15 +234,6 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function getHostImageToHostPage(int $hostImageId, int $hostPageId) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ? AND `hostPageId` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$hostImageId, $hostPageId]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function getHostImageHostPages(int $hostImageId) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
|
||||
@ -257,33 +243,22 @@ class MySQL {
|
||||
return $query->fetchAll();
|
||||
}
|
||||
|
||||
public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
|
||||
public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
|
||||
`hostPageId`,
|
||||
`timeAdded`,
|
||||
`timeUpdated`,
|
||||
`quantity`) VALUES (?, ?, ?, ?, ?)');
|
||||
`quantity`) VALUES (?, ?, ?, ?, ?)
|
||||
|
||||
$query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity]);
|
||||
ON DUPLICATE KEY UPDATE `timeUpdated` = ?,
|
||||
`quantity` = `quantity` + ' . (int) $quantity);
|
||||
|
||||
$query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity, $timeUpdated]);
|
||||
|
||||
return $query->rowCount(); // no primary key
|
||||
}
|
||||
|
||||
public function updateHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, int $quantity) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostImageToHostPage` SET `quantity` = `quantity` + ' . (int) $quantity . ', `timeUpdated` = ?
|
||||
|
||||
WHERE `hostImageId` = ?
|
||||
AND `hostPageId` = ?
|
||||
|
||||
LIMIT 1');
|
||||
|
||||
$query->execute([$timeAdded, $hostImageId, $hostPageId]);
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function deleteHostImageToHostPage(int $hostImageId) {
|
||||
|
||||
$query = $this->_db->prepare('DELETE FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
|
||||
|
Loading…
x
Reference in New Issue
Block a user