Browse Source

fix image crawling errors

main
ghost 2 years ago
parent
commit
d4f66c83e7
  1. 22
      crontab/crawler.php
  2. BIN
      database/yggo.mwb
  3. 57
      library/mysql.php

22
crontab/crawler.php

@ -266,12 +266,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND @@ -266,12 +266,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
$robots = new Robots(($hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . ($hostRobotsPostfix ? (string) $hostRobotsPostfix : (string) CRAWL_ROBOTS_POSTFIX_RULES));
// Save image info
$hostImageId = $db->getHostImage($hostId, crc32($hostImageURI->string));
if ($hostStatus && // host enabled
$robots->uriAllowed($hostImageURI->string) && // src allowed by robots.txt rules
$hostImageLimit > $db->getTotalHostImages($hostId) && // images quantity not reached host limit
!$hostImageId) { // image not exists
!$hostImageId = $db->getHostImageId($hostId, crc32($hostImageURI->string))) { // image not exists
// Add host image
if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time(), null, 200)) {
@ -284,19 +282,17 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND @@ -284,19 +282,17 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
}
}
// Add host image description
$hostImageDescriptionCRC32id = crc32(md5((string) $imageAlt . (string) $imageTitle));
// Add/update host image description
$db->setHostImageDescription($hostImageId,
crc32(md5((string) $imageAlt . (string) $imageTitle)),
Filter::imageAlt($imageAlt),
Filter::imageTitle($imageTitle),
time(),
time());
if (!$db->getHostImageDescription($hostImageId, $hostImageDescriptionCRC32id)) {
$db->addHostImageDescription($hostImageId, $hostImageDescriptionCRC32id, Filter::imageAlt($imageAlt), Filter::imageTitle($imageTitle), time());
}
// Relate host image with host page was found
if (!$db->getHostImageToHostPage($hostImageId, $queueHostPage->hostPageId)) {
$db->addHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), null, 1);
} else {
$db->updateHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), 1);
}
$db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), time(), 1);
// Increase page rank when link does not match the current host
if ($hostImageURL->scheme . '://' .

BIN
database/yggo.mwb

Binary file not shown.

57
library/mysql.php

@ -103,13 +103,13 @@ class MySQL { @@ -103,13 +103,13 @@ class MySQL {
return $query->fetch()->total;
}
public function getHostImage(int $hostId, int $crc32uri) {
public function getHostImageId(int $hostId, int $crc32uri) {
$query = $this->_db->prepare('SELECT * FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1');
$query = $this->_db->prepare('SELECT `hostImageId` FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1');
$query->execute([$hostId, $crc32uri]);
return $query->fetch();
return $query->rowCount() ? $query->fetch()->hostImageId : 0;
}
public function getHostImages(int $hostId) {
@ -208,24 +208,19 @@ class MySQL { @@ -208,24 +208,19 @@ class MySQL {
return $query->rowCount();
}
public function getHostImageDescription(int $hostImageId, int $crc32id) {
$query = $this->_db->prepare('SELECT * FROM `hostImageDescription` WHERE `hostImageId` = ? AND `crc32id` = ? LIMIT 1');
$query->execute([$hostImageId, $crc32id]);
return $query->fetch();
}
public function addHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded) {
public function setHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded, int $timeUpdated) {
$query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`,
`crc32id`,
`alt`,
`title`,
`timeAdded`) VALUES (?, ?, ?, ?, ?)');
`timeAdded`) VALUES (?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `alt` = ?,
`title` = ?,
`timeUpdated` = ?');
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded]);
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded, $alt, $title, $timeUpdated]);
return $this->_db->lastInsertId();
}
@ -239,15 +234,6 @@ class MySQL { @@ -239,15 +234,6 @@ class MySQL {
return $query->rowCount();
}
public function getHostImageToHostPage(int $hostImageId, int $hostPageId) {
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ? AND `hostPageId` = ? LIMIT 1');
$query->execute([$hostImageId, $hostPageId]);
return $query->fetch();
}
public function getHostImageHostPages(int $hostImageId) {
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
@ -257,31 +243,20 @@ class MySQL { @@ -257,31 +243,20 @@ class MySQL {
return $query->fetchAll();
}
public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
`hostPageId`,
`timeAdded`,
`timeUpdated`,
`quantity`) VALUES (?, ?, ?, ?, ?)');
$query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity]);
return $query->rowCount(); // no primary key
}
`quantity`) VALUES (?, ?, ?, ?, ?)
public function updateHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, int $quantity) {
ON DUPLICATE KEY UPDATE `timeUpdated` = ?,
`quantity` = `quantity` + ' . (int) $quantity);
$query = $this->_db->prepare('UPDATE `hostImageToHostPage` SET `quantity` = `quantity` + ' . (int) $quantity . ', `timeUpdated` = ?
$query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity, $timeUpdated]);
WHERE `hostImageId` = ?
AND `hostPageId` = ?
LIMIT 1');
$query->execute([$timeAdded, $hostImageId, $hostPageId]);
return $query->rowCount();
return $query->rowCount(); // no primary key
}
public function deleteHostImageToHostPage(int $hostImageId) {

Loading…
Cancel
Save