diff --git a/crontab/crawler.php b/crontab/crawler.php index e4f6408..1a44037 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -266,12 +266,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND $robots = new Robots(($hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . ($hostRobotsPostfix ? (string) $hostRobotsPostfix : (string) CRAWL_ROBOTS_POSTFIX_RULES)); // Save image info - $hostImageId = $db->getHostImage($hostId, crc32($hostImageURI->string)); - if ($hostStatus && // host enabled $robots->uriAllowed($hostImageURI->string) && // src allowed by robots.txt rules $hostImageLimit > $db->getTotalHostImages($hostId) && // images quantity not reached host limit - !$hostImageId) { // image not exists + !$hostImageId = $db->getHostImageId($hostId, crc32($hostImageURI->string))) { // image not exists // Add host image if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time(), null, 200)) { @@ -284,19 +282,17 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND } } - // Add host image description - $hostImageDescriptionCRC32id = crc32(md5((string) $imageAlt . (string) $imageTitle)); + // Add/update host image description + $db->setHostImageDescription($hostImageId, + crc32(md5((string) $imageAlt . (string) $imageTitle)), + Filter::imageAlt($imageAlt), + Filter::imageTitle($imageTitle), + time(), + time()); - if (!$db->getHostImageDescription($hostImageId, $hostImageDescriptionCRC32id)) { - $db->addHostImageDescription($hostImageId, $hostImageDescriptionCRC32id, Filter::imageAlt($imageAlt), Filter::imageTitle($imageTitle), time()); - } // Relate host image with host page was found - if (!$db->getHostImageToHostPage($hostImageId, $queueHostPage->hostPageId)) { - $db->addHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), null, 1); - } else { - $db->updateHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), 1); - } + $db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), time(), 1); // Increase page rank when link does not match the current host if ($hostImageURL->scheme . '://' . @@ -434,7 +430,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND if ($hostStatus && // host enabled $robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules $hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit - !$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists + !$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists if ($db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time())) { diff --git a/database/yggo.mwb b/database/yggo.mwb index d5c8f29..15ed8f4 100644 Binary files a/database/yggo.mwb and b/database/yggo.mwb differ diff --git a/library/mysql.php b/library/mysql.php index e2ee1ea..80f674e 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -103,13 +103,13 @@ class MySQL { return $query->fetch()->total; } - public function getHostImage(int $hostId, int $crc32uri) { + public function getHostImageId(int $hostId, int $crc32uri) { - $query = $this->_db->prepare('SELECT * FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1'); + $query = $this->_db->prepare('SELECT `hostImageId` FROM `hostImage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1'); $query->execute([$hostId, $crc32uri]); - return $query->fetch(); + return $query->rowCount() ? $query->fetch()->hostImageId : 0; } public function getHostImages(int $hostId) { @@ -208,24 +208,19 @@ class MySQL { return $query->rowCount(); } - public function getHostImageDescription(int $hostImageId, int $crc32id) { - - $query = $this->_db->prepare('SELECT * FROM `hostImageDescription` WHERE `hostImageId` = ? AND `crc32id` = ? LIMIT 1'); - - $query->execute([$hostImageId, $crc32id]); - - return $query->fetch(); - } - - public function addHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded) { + public function setHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded, int $timeUpdated) { $query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`, `crc32id`, `alt`, `title`, - `timeAdded`) VALUES (?, ?, ?, ?, ?)'); + `timeAdded`) VALUES (?, ?, ?, ?, ?) + + ON DUPLICATE KEY UPDATE `alt` = ?, + `title` = ?, + `timeUpdated` = ?'); - $query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded]); + $query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded, $alt, $title, $timeUpdated]); return $this->_db->lastInsertId(); } @@ -239,15 +234,6 @@ class MySQL { return $query->rowCount(); } - public function getHostImageToHostPage(int $hostImageId, int $hostPageId) { - - $query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ? AND `hostPageId` = ? LIMIT 1'); - - $query->execute([$hostImageId, $hostPageId]); - - return $query->fetch(); - } - public function getHostImageHostPages(int $hostImageId) { $query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?'); @@ -257,31 +243,20 @@ class MySQL { return $query->fetchAll(); } - public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) { + public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) { $query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`, `hostPageId`, `timeAdded`, `timeUpdated`, - `quantity`) VALUES (?, ?, ?, ?, ?)'); - - $query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity]); - - return $query->rowCount(); // no primary key - } + `quantity`) VALUES (?, ?, ?, ?, ?) - public function updateHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, int $quantity) { + ON DUPLICATE KEY UPDATE `timeUpdated` = ?, + `quantity` = `quantity` + ' . (int) $quantity); - $query = $this->_db->prepare('UPDATE `hostImageToHostPage` SET `quantity` = `quantity` + ' . (int) $quantity . ', `timeUpdated` = ? + $query->execute([$hostImageId, $hostPageId, $timeAdded, $timeUpdated, $quantity, $timeUpdated]); - WHERE `hostImageId` = ? - AND `hostPageId` = ? - - LIMIT 1'); - - $query->execute([$timeAdded, $hostImageId, $hostPageId]); - - return $query->rowCount(); + return $query->rowCount(); // no primary key } public function deleteHostImageToHostPage(int $hostImageId) {