diff --git a/crontab/crawler.php b/crontab/crawler.php index 2dd696d..d7af110 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -1035,21 +1035,37 @@ $executionTimeTotal = microtime(true) - $timeStart; $httpRequestsTimeTotal = $httpRequestsTimeTotal / 1000000; // Debug output -echo 'Hosts processed: ' . $hostsProcessed . PHP_EOL; -echo 'Hosts added: ' . $hostsAdded . PHP_EOL . PHP_EOL; +echo '[hosts]' . PHP_EOL; +echo 'processed: ' . $hostsProcessed . PHP_EOL; +echo 'added: ' . $hostsAdded . PHP_EOL . PHP_EOL; -echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL; -echo 'Pages added: ' . $hostPagesAdded . PHP_EOL; -echo 'Pages snaps added: ' . $hostPagesSnapAdded . PHP_EOL; -echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL . PHP_EOL; +echo '[hosts pages]' . PHP_EOL; +echo 'processed: ' . $hostPagesProcessed . PHP_EOL; +echo 'added: ' . $hostPagesAdded . PHP_EOL; +echo 'banned: ' . $hostPagesBanned . PHP_EOL . PHP_EOL; -echo 'Sitemaps processed: ' . $sitemapsProcessed . PHP_EOL . PHP_EOL; +echo '[host page snaps]' . PHP_EOL; +echo 'added: ' . $hostPagesSnapAdded . PHP_EOL . PHP_EOL; -echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL . PHP_EOL; +echo '[sitemaps]' . PHP_EOL; +echo 'processed: ' . $sitemapsProcessed . PHP_EOL . PHP_EOL; -echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL; -echo 'HTTP Requests total size: ' . $httpRequestsSizeTotal . PHP_EOL; -echo 'HTTP Download total size: ' . $httpDownloadSizeTotal . PHP_EOL; -echo 'HTTP Requests total time: ' . $httpRequestsTimeTotal . PHP_EOL . PHP_EOL; +echo '[manifests]' . PHP_EOL; +echo 'processed: ' . $manifestsProcessed . PHP_EOL . PHP_EOL; -echo 'Total time: ' . $executionTimeTotal . PHP_EOL . PHP_EOL; \ No newline at end of file +echo '[HTTP]' . PHP_EOL; +echo 'Requests total:' . $httpRequestsTotal . PHP_EOL; +echo 'Requests size: ' . $httpRequestsSizeTotal . PHP_EOL; +echo 'Download size: ' . $httpDownloadSizeTotal . PHP_EOL; +echo 'Requests time: ' . $httpRequestsTimeTotal . PHP_EOL . PHP_EOL; + +echo '[MySQL]' . PHP_EOL; +echo 'queries count:' . PHP_EOL; +echo ' select: ' . $db->getDebug()->query->select->total . PHP_EOL; +echo ' insert: ' . $db->getDebug()->query->insert->total . PHP_EOL; +echo ' update: ' . $db->getDebug()->query->update->total . PHP_EOL; +echo ' delete: ' . $db->getDebug()->query->delete->total . PHP_EOL . PHP_EOL; + +echo 'execution time: ' . $executionTimeTotal . PHP_EOL . PHP_EOL; + +echo '-----------------------' . PHP_EOL . PHP_EOL; \ No newline at end of file diff --git a/library/mysql.php b/library/mysql.php index 64b268a..cc6d748 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -4,12 +4,37 @@ class MySQL { private PDO $_db; + private object $_debug; + public function __construct(string $host, int $port, string $database, string $username, string $password) { $this->_db = new PDO('mysql:dbname=' . $database . ';host=' . $host . ';port=' . $port . ';charset=utf8', $username, $password, [PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8']); $this->_db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $this->_db->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ); $this->_db->setAttribute(PDO::ATTR_TIMEOUT, 600); + + $this->_debug = (object) + [ + 'query' => (object) + [ + 'select' => (object) + [ + 'total' => 0 + ], + 'insert' => (object) + [ + 'total' => 0 + ], + 'update' => (object) + [ + 'total' => 0 + ], + 'delete' => (object) + [ + 'total' => 0 + ], + ] + ]; } // System @@ -28,9 +53,16 @@ class MySQL { $this->_db->rollBack(); } + public function getDebug() { + + return $this->_debug; + } + // Host public function getAPIHosts(string $apiHostFields) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT ' . $apiHostFields . ' FROM `host`'); $query->execute(); @@ -40,6 +72,8 @@ class MySQL { public function getHosts() { + $this->_debug->query->select->total++; + $query = $this->_db->query('SELECT * FROM `host`'); return $query->fetchAll(); @@ -47,6 +81,8 @@ class MySQL { public function getHost(int $hostId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare("SELECT *, IF (`port` IS NOT NULL, CONCAT(`scheme`, '://', `name`, ':', `port`), @@ -62,6 +98,8 @@ class MySQL { public function findHostByCRC32URL(int $crc32url) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `host` WHERE `crc32url` = ? LIMIT 1'); $query->execute([$crc32url]); @@ -71,6 +109,8 @@ class MySQL { public function getTotalHosts() { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `host`'); $query->execute(); @@ -80,6 +120,8 @@ class MySQL { public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, @@ -94,6 +136,8 @@ class MySQL { // Host settings public function findHostSettingValue(int $hostId, string $key) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT `value` FROM `hostSetting` WHERE `hostId` = ? AND `key` = ? LIMIT 1'); $query->execute([$hostId, $key]); @@ -103,6 +147,8 @@ class MySQL { public function findHostSetting(int $hostId, string $key) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostSetting` WHERE `hostId` = ? AND `key` = ? LIMIT 1'); $query->execute([$hostId, $key]); @@ -112,6 +158,8 @@ class MySQL { public function addHostSetting(int $hostId, string $key, mixed $value, int $timeAdded) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostSetting` (`hostId`, `key`, `value`, `timeAdded`) VALUES (?, ?, ?, ?)'); $value = json_encode($value); @@ -130,6 +178,8 @@ class MySQL { public function updateHostSetting(int $hostSettingId, mixed $value, int $timeUpdated) { + $this->_debug->query->update->total++; + $query = $this->_db->query('UPDATE `hostSetting` SET `value` = ?, `timeUpdated` = ? @@ -152,6 +202,8 @@ class MySQL { public function deleteHostSetting(int $hostSettingId) { + $this->_debug->query->delete->total++; + $query = $this->_db->query('DELETE FROM `hostSetting` WHERE `hostSettingId` = ?'); $query->execute([$hostSettingId]); @@ -162,6 +214,8 @@ class MySQL { // Host pages public function getTotalHostPages(int $hostId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPage` WHERE `hostId` = ?'); $query->execute([$hostId]); @@ -171,6 +225,8 @@ class MySQL { public function getHostPage(int $hostPageId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1'); $query->execute([$hostPageId]); @@ -180,6 +236,8 @@ class MySQL { public function findHostPageByCRC32URI(int $hostId, int $crc32uri) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPage` WHERE `hostId` = ? AND `crc32uri` = ? LIMIT 1'); $query->execute([$hostId, $crc32uri]); @@ -189,6 +247,8 @@ class MySQL { public function getHostPages(int $hostId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPage` WHERE `hostId` = ?'); $query->execute([$hostId]); @@ -198,6 +258,8 @@ class MySQL { public function getTopHostPages(int $limit = 100) { + $this->_debug->query->select->total++; + // Get ID (to prevent memory over usage) $query = $this->_db->query("SELECT `hostPageId` FROM `hostPage` @@ -213,6 +275,8 @@ class MySQL { // Get required page details foreach ($query->fetchAll() as $top) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare("SELECT `hostPage`.`hostId`, `hostPage`.`hostPageId`, `hostPage`.`uri`, @@ -252,6 +316,8 @@ class MySQL { public function getHostPagesByIndexed() { + $this->_debug->query->select->total++; + $query = $this->_db->query('SELECT * FROM `hostPage` WHERE `timeUpdated` IS NOT NULL AND `timeBanned` IS NULL'); return $query->fetchAll(); @@ -259,6 +325,8 @@ class MySQL { public function getHostPagesByLimit(int $hostId, int $limit) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPage` WHERE `hostId` = ? ORDER BY `hostPageId` DESC LIMIT ' . (int) $limit); $query->execute([$hostId]); @@ -268,6 +336,8 @@ class MySQL { public function getLastPageDescription(int $hostPageId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageDescription` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC LIMIT 1'); $query->execute([$hostPageId]); @@ -284,6 +354,8 @@ class MySQL { mixed $httpCode = null, mixed $mime = null) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostPage` (`hostId`, `crc32uri`, `uri`, @@ -300,6 +372,8 @@ class MySQL { public function updateHostPageTimeBanned(int $hostPageId, int $timeBanned) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `hostPage` SET `timeBanned` = ? WHERE `hostPageId` = ? LIMIT 1'); $query->execute([$timeBanned, $hostPageId]); @@ -309,6 +383,8 @@ class MySQL { public function updateHostPageMime(int $hostPageId, string $mime) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `hostPage` SET `mime` = ? WHERE `hostPageId` = ? LIMIT 1'); $query->execute([$mime, $hostPageId]); @@ -318,6 +394,8 @@ class MySQL { public function updateHostPageRank(int $hostPageId, int $rank) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `hostPage` SET `rank` = ? WHERE `hostPageId` = ? LIMIT 1'); $query->execute([$rank, $hostPageId]); @@ -327,6 +405,8 @@ class MySQL { public function deleteHostPage(int $hostPageId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1'); $query->execute([$hostPageId]); @@ -336,6 +416,8 @@ class MySQL { public function deleteHostPageDescriptions(int $hostPageId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageDescription` WHERE `hostPageId` = ?'); $query->execute([$hostPageId]); @@ -350,6 +432,8 @@ class MySQL { mixed $data, int $timeAdded) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostPageDescription` ( `hostPageId`, `title`, `description`, @@ -372,6 +456,8 @@ class MySQL { public function setHostPageToHostPage(int $hostPageIdSource, int $hostPageIdTarget) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT IGNORE `hostPageToHostPage` (`hostPageIdSource`, `hostPageIdTarget`) VALUES (?, ?)'); $query->execute([$hostPageIdSource, $hostPageIdTarget]); @@ -379,6 +465,8 @@ class MySQL { public function deleteHostPageToHostPage(int $hostPageId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageToHostPage` WHERE `hostPageIdSource` = ? OR `hostPageIdTarget` = ?'); $query->execute([$hostPageId, $hostPageId]); @@ -388,6 +476,8 @@ class MySQL { public function getTotalHostPagesToHostPageByHostPageIdTarget(int $hostPageIdTarget) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageToHostPage` WHERE `hostPageIdTarget` = ?'); $query->execute([$hostPageIdTarget]); @@ -397,6 +487,8 @@ class MySQL { public function getHostPagesToHostPageByHostPageIdTarget(int $hostPageIdTarget, int $limit = 1000) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageToHostPage` WHERE `hostPageIdTarget` = ? LIMIT ' . (int) $limit); $query->execute([$hostPageIdTarget]); @@ -406,6 +498,8 @@ class MySQL { public function getHostPageToHostPage(int $hostPageIdSource, int $hostPageIdTarget) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageToHostPage` WHERE `hostPageIdSource` = ? AND `hostPageIdTarget` = ? LIMIT 1'); $query->execute([$hostPageIdSource, $hostPageIdTarget]); @@ -415,6 +509,8 @@ class MySQL { public function addHostPageSnap(int $hostPageId, int $timeAdded) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`, `timeAdded`) VALUES (?, ?)'); $query->execute([$hostPageId, $timeAdded]); @@ -424,6 +520,8 @@ class MySQL { public function deleteHostPageSnap(int $hostPageSnapId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1'); $query->execute([$hostPageSnapId]); @@ -433,6 +531,8 @@ class MySQL { public function getTotalHostPageSnaps(int $hostPageId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageSnap` WHERE `hostPageId` = ?'); $query->execute([$hostPageId]); @@ -442,6 +542,8 @@ class MySQL { public function getHostPageSnaps(int $hostPageId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC'); $query->execute([$hostPageId]); @@ -451,6 +553,8 @@ class MySQL { public function getHostPageSnap(int $hostPageSnapId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1'); $query->execute([$hostPageSnapId]); @@ -460,6 +564,8 @@ class MySQL { public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`, `crc32ip`, `timeAdded`) VALUES (?, ?, ?)'); @@ -471,6 +577,8 @@ class MySQL { public function addHostPageSnapStorage(int $hostPageSnapId, int $crc32name, int $timeAdded) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostPageSnapStorage` (`hostPageSnapId`, `crc32name`, `timeAdded`) VALUES (?, ?, ?)'); @@ -482,6 +590,8 @@ class MySQL { public function findHostPageSnapStorageByCRC32Name(int $hostPageSnapId, int $crc32name) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ? AND `crc32name` = ?'); $query->execute([$hostPageSnapId, $crc32name]); @@ -491,6 +601,8 @@ class MySQL { public function getHostPageSnapStorages(int $hostPageSnapId) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare('SELECT * FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ?'); $query->execute([$hostPageSnapId]); @@ -500,6 +612,8 @@ class MySQL { public function deleteHostPageSnapStorages(int $hostPageSnapId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ?'); $query->execute([$hostPageSnapId]); @@ -509,6 +623,8 @@ class MySQL { public function deleteHostPageSnapDownloads(int $hostPageSnapStorageId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapStorageId` = ?'); $query->execute([$hostPageSnapStorageId]); @@ -518,6 +634,8 @@ class MySQL { public function addHostPageDom(int $hostPageId, int $timeAdded, string $selector, string $value) { + $this->_debug->query->insert->total++; + $query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?, `selector` = ?, `value` = ?'); $query->execute([$hostPageId, $timeAdded, $selector, $value]); @@ -525,6 +643,8 @@ class MySQL { public function deleteHostPageDoms(int $hostPageId) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageDom` WHERE `hostPageId` = ?'); $query->execute([$hostPageId]); @@ -534,6 +654,8 @@ class MySQL { public function deleteHostPageDomsByTimeAdded(int $timeOffset) { + $this->_debug->query->delete->total++; + $query = $this->_db->prepare('DELETE FROM `hostPageDom` WHERE `timeAdded` < ' . (int) $timeOffset); $query->execute(); @@ -549,6 +671,8 @@ class MySQL { // Cleaner tools public function resetBannedHostPages(int $timeOffset) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `hostPage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ?'); $query->execute([$timeOffset]); @@ -558,6 +682,8 @@ class MySQL { public function resetBannedHosts(int $timeOffset) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `host` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ?'); $query->execute([$timeOffset]); @@ -568,6 +694,8 @@ class MySQL { // Crawler tools public function getHostPageCrawlQueueTotal(int $timeFrom) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare("SELECT COUNT(*) AS `total` FROM `hostPage` WHERE (`timeUpdated` IS NULL OR `timeUpdated` < ?) AND `hostPage`.`timeBanned` IS NULL"); @@ -579,6 +707,8 @@ class MySQL { public function getHostPageCrawlQueue(int $limit, int $timeFrom) { + $this->_debug->query->select->total++; + $result = []; // Get ID (to prevent memory over usage) @@ -595,6 +725,8 @@ class MySQL { // Get required page details foreach ($query->fetchAll() as $queue) { + $this->_debug->query->select->total++; + $query = $this->_db->prepare("SELECT `hostPage`.`hostId`, `hostPage`.`hostPageId`, `hostPage`.`uri`, @@ -631,6 +763,8 @@ class MySQL { public function updateHostPageCrawlQueue(int $hostPageId, int $timeUpdated, int $httpCode, int $size) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `hostPage` SET `timeUpdated` = ?, `httpCode` = ?, `size` = ? WHERE `hostPageId` = ? LIMIT 1'); $query->execute([$timeUpdated, $httpCode, $size, $hostPageId]); @@ -640,6 +774,8 @@ class MySQL { public function getHostCrawlQueue(int $limit, int $timeFrom) { + $this->_debug->query->select->total++; + $result = []; // Get ID (to prevent memory over usage) @@ -666,6 +802,8 @@ class MySQL { public function updateHostCrawlQueue(int $hostId, int $timeUpdated) { + $this->_debug->query->update->total++; + $query = $this->_db->prepare('UPDATE `host` SET `timeUpdated` = ? WHERE `hostId` = ? LIMIT 1'); $query->execute([$timeUpdated, $hostId]);