Browse Source

add crawler/cleaner logs

main
ghost 2 years ago
parent
commit
25b6bce2ec
  1. 18
      README.md
  2. 24
      config/app.php.txt
  3. 86
      crontab/cleaner.php
  4. 113
      crontab/crawler.php
  5. BIN
      database/yggo.mwb
  6. 137
      library/mysql.php
  7. BIN
      media/db-prototype.png

18
README.md

@ -169,19 +169,35 @@ GET m=SphinxQL
##### Crawler ##### Crawler
* [x] Auto crawl links by regular expression rules * [x] Auto crawl links by regular expression rules
+ [x] Pages
+ [x] Images
+ [x] Manifests
* [x] Robots.txt / robots meta tags support (#2) * [x] Robots.txt / robots meta tags support (#2)
* [x] Specific rules configuration for every host * [x] Specific rules configuration for every host
* [x] Deprecated index auto cleaner
* [x] Auto stop crawling on disk quota reached * [x] Auto stop crawling on disk quota reached
* [x] Transactions support to prevent data loss on queue failures * [x] Transactions support to prevent data loss on queue failures
* [x] Distributed index crawling between YGGo nodes trough manifest API * [x] Distributed index crawling between YGGo nodes trough manifest API
* [x] MIME Content-type crawler settings * [x] MIME Content-type crawler settings
* [x] Ban non-condition links to prevent extra requests * [x] Ban non-condition links to prevent extra requests
* [x] Debug log
* [ ] Indexing new sites homepage in higher priority * [ ] Indexing new sites homepage in higher priority
* [ ] Redirect codes extended processing * [ ] Redirect codes extended processing
* [ ] Palette image index / filter * [ ] Palette image index / filter
* [ ] Crawl queue balancer, that depends of CPU available * [ ] Crawl queue balancer, that depends of CPU available
##### Cleaner
* [x] Deprecated DB items auto deletion / host settings update
+ [x] Pages
+ [x] Images
+ [x] Manifests
+ [x] Logs
+ [x] Crawler
+ [x] Cleaner
* [x] Banned resources reset by timeout
+ [x] Pages
+ [x] Images
* [x] Debug log
##### Other ##### Other
* [ ] Administrative panel for useful index moderation * [ ] Administrative panel for useful index moderation

24
config/app.php.txt

@ -98,6 +98,18 @@ define('PROXY_CURLOPT_USERAGENT', 'YGGo Search Proxy ( https://github.com/YGGver
// Crawl settings // Crawl settings
/*
* Save crawler debug to `logCrawler` table
*
*/
define('CRAWL_LOG_ENABLED', true);
/*
* Auto clean `logCrawler` items older seconds offset
*
*/
define('CRAWL_LOG_SECONDS_OFFSET', 60*60*24*30);
/* /*
* Crawler / Bot User Agent name * Crawler / Bot User Agent name
* *
@ -311,6 +323,18 @@ define('CRAWL_MANIFEST_DEFAULT_STATUS', true);
// Cleaner settings // Cleaner settings
/*
* Save cleaner debug to `logCleaner` table
*
*/
define('CLEAN_LOG_ENABLED', true);
/*
* Auto clean `logCleaner` items older seconds offset
*
*/
define('CLEAN_LOG_SECONDS_OFFSET', 60*60*24*30);
/* /*
* Hosts limit per crontab execution step (https://github.com/YGGverse/YGGo#crontab) * Hosts limit per crontab execution step (https://github.com/YGGverse/YGGo#crontab)
* *

86
crontab/cleaner.php

@ -21,20 +21,23 @@ $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
// Debug // Debug
$timeStart = microtime(true); $timeStart = microtime(true);
$requestsTotal = 0; $httpRequestsTotal = 0;
$requestSizeTotal = 0; $httpRequestsSizeTotal = 0;
$downloadSizeTotal = 0; $httpDownloadSizeTotal = 0;
$requestsTotalTime = 0; $httpRequestsTimeTotal = 0;
$hostsTotal = $db->getTotalHosts(); $hostsTotal = $db->getTotalHosts();
$manifestsTotal = $db->getTotalManifests(); $manifestsTotal = $db->getTotalManifests();
$hostsUpdated = 0; $hostsUpdated = 0;
$hostsPagesDeleted = 0; $hostPagesDeleted = 0;
$hostsImagesDeleted = 0; $hostImagesDeleted = 0;
$manifestsDeleted = 0; $manifestsDeleted = 0;
$hostPagesBansRemoved = 0; $hostPagesBansRemoved = 0;
$hostImagesBansRemoved = 0; $hostImagesBansRemoved = 0;
$logsCleanerDeleted = 0;
$logsCrawlerDeleted = 0;
// Begin update // Begin update
$db->beginTransaction(); $db->beginTransaction();
@ -50,10 +53,10 @@ try {
$curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT); $curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
$hostRobots = $curl->getContent(); $hostRobots = $curl->getContent();
@ -76,7 +79,7 @@ try {
$db->deleteHostImageToHostPage($hostImage->hostImageId); $db->deleteHostImageToHostPage($hostImage->hostImageId);
// Delete host image // Delete host image
$hostsImagesDeleted += $db->deleteHostImage($hostImage->hostImageId); $hostImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
} }
} }
@ -91,7 +94,7 @@ try {
$db->deleteHostPageToHostImage($hostPage->hostPageId); $db->deleteHostPageToHostImage($hostPage->hostPageId);
// Delete host page // Delete host page
$hostsPagesDeleted += $db->deleteHostPage($hostPage->hostPageId); $hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
} }
} }
@ -107,7 +110,7 @@ try {
$db->deleteHostImageToHostPage($hostImage->hostImageId); $db->deleteHostImageToHostPage($hostImage->hostImageId);
// Delete host image // Delete host image
$hostsImagesDeleted += $db->deleteHostImage($hostImage->hostImageId); $hostImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
} }
} }
@ -119,7 +122,7 @@ try {
$db->deleteHostPageToHostImage($hostPage->hostPageId); $db->deleteHostPageToHostImage($hostPage->hostPageId);
// Delete host page // Delete host page
$hostsPagesDeleted += $db->deleteHostPage($hostPage->hostPageId); $hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
} }
} }
@ -131,7 +134,7 @@ try {
$db->deleteHostImageToHostPage($hostImage->hostImageId); $db->deleteHostImageToHostPage($hostImage->hostImageId);
// Delete host image // Delete host image
$hostsImagesDeleted += $db->deleteHostImage($hostImage->hostImageId); $hostImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
} }
} }
@ -143,10 +146,10 @@ try {
$curl = new Curl($manifest->url); $curl = new Curl($manifest->url);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
// Skip processing non 200 code // Skip processing non 200 code
if (200 != $curl->getCode()) { if (200 != $curl->getCode()) {
@ -198,6 +201,10 @@ try {
// Reset banned images // Reset banned images
$hostImagesBansRemoved += $db->resetBannedHostImages(time() - CLEAN_IMAGE_BAN_SECONDS_OFFSET); $hostImagesBansRemoved += $db->resetBannedHostImages(time() - CLEAN_IMAGE_BAN_SECONDS_OFFSET);
// Delete deprecated logs
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
$logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET);
$db->commit(); $db->commit();
} catch(Exception $e){ } catch(Exception $e){
@ -208,10 +215,34 @@ try {
} }
// Debug // Debug
$executionTimeTotal = microtime(true) - $timeStart;
$httpRequestsTimeTotal = $httpRequestsTimeTotal / 1000000;
if (CLEAN_LOG_ENABLED) {
$db->addCleanerLog( time(),
$hostsTotal,
$hostsUpdated,
$hostPagesDeleted,
$hostPagesBansRemoved,
$hostImagesDeleted,
$hostImagesBansRemoved,
$manifestsTotal,
$manifestsDeleted,
$logsCleanerDeleted,
$logsCrawlerDeleted,
$httpRequestsTotal,
$httpRequestsSizeTotal,
$httpDownloadSizeTotal,
$httpRequestsTimeTotal,
$executionTimeTotal);
}
echo 'Hosts total: ' . $hostsTotal . PHP_EOL; echo 'Hosts total: ' . $hostsTotal . PHP_EOL;
echo 'Hosts updated: ' . $hostsUpdated . PHP_EOL; echo 'Hosts updated: ' . $hostsUpdated . PHP_EOL;
echo 'Hosts pages deleted: ' . $hostsPagesDeleted . PHP_EOL; echo 'Hosts pages deleted: ' . $hostPagesDeleted . PHP_EOL;
echo 'Hosts images deleted: ' . $hostsImagesDeleted . PHP_EOL; echo 'Hosts images deleted: ' . $hostImagesDeleted . PHP_EOL;
echo 'Manifests total: ' . $manifestsTotal . PHP_EOL; echo 'Manifests total: ' . $manifestsTotal . PHP_EOL;
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL; echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
@ -219,9 +250,12 @@ echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL; echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL; echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL;
echo 'HTTP Requests total: ' . $requestsTotal . PHP_EOL; echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL;
echo 'HTTP Requests total size: ' . $requestSizeTotal . PHP_EOL; echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL;
echo 'HTTP Download total size: ' . $downloadSizeTotal . PHP_EOL;
echo 'HTTP Requests total time: ' . $requestsTotalTime / 1000000 . PHP_EOL; echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL;
echo 'HTTP Requests total size: ' . $httpRequestsSizeTotal . PHP_EOL;
echo 'HTTP Download total size: ' . $httpDownloadSizeTotal . PHP_EOL;
echo 'HTTP Requests total time: ' . $httpRequestsTimeTotal . PHP_EOL;
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL; echo 'Total time: ' . $executionTimeTotal . PHP_EOL . PHP_EOL;

113
crontab/crawler.php

@ -27,17 +27,17 @@ if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) {
// Debug // Debug
$timeStart = microtime(true); $timeStart = microtime(true);
$requestsTotal = 0; $httpRequestsTotal = 0;
$requestSizeTotal = 0; $httpRequestsSizeTotal = 0;
$downloadSizeTotal = 0; $httpDownloadSizeTotal = 0;
$requestsTotalTime = 0; $httpRequestsTimeTotal = 0;
$hostPagesProcessed = 0; $hostPagesProcessed = 0;
$hostImagesProcessed = 0; $hostImagesProcessed = 0;
$manifestsProcessed = 0; $manifestsProcessed = 0;
$hostPagesIndexed = 0; $hostPagesIndexed = 0;
$hostImagesIndexed = 0; $hostImagesIndexed = 0;
$manifestsIndexed = 0; $manifestsAdded = 0;
$hostPagesAdded = 0; $hostPagesAdded = 0;
$hostImagesAdded = 0; $hostImagesAdded = 0;
$hostsAdded = 0; $hostsAdded = 0;
@ -57,10 +57,10 @@ try {
$curl = new Curl($queueManifest->url); $curl = new Curl($queueManifest->url);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
// Update manifest index anyway, with the current time and http code // Update manifest index anyway, with the current time and http code
$manifestsProcessed += $db->updateManifestCrawlQueue($queueManifest->manifestId, time(), $curl->getCode()); $manifestsProcessed += $db->updateManifestCrawlQueue($queueManifest->manifestId, time(), $curl->getCode());
@ -120,10 +120,10 @@ try {
$curl = new Curl($remoteManifest->result->api->hosts); $curl = new Curl($remoteManifest->result->api->hosts);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
// Skip processing non 200 code // Skip processing non 200 code
if (200 != $curl->getCode()) { if (200 != $curl->getCode()) {
@ -184,10 +184,10 @@ try {
$curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT); $curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
$hostRobots = $curl->getContent(); $hostRobots = $curl->getContent();
@ -254,10 +254,10 @@ try {
$curl = new Curl($queueHostImageURL, CRAWL_CURLOPT_USERAGENT); $curl = new Curl($queueHostImageURL, CRAWL_CURLOPT_USERAGENT);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
// Update image index anyway, with the current time and http code // Update image index anyway, with the current time and http code
$hostImagesProcessed += $db->updateHostImageCrawlQueue($queueHostImage->hostImageId, time(), $curl->getCode()); $hostImagesProcessed += $db->updateHostImageCrawlQueue($queueHostImage->hostImageId, time(), $curl->getCode());
@ -334,10 +334,10 @@ try {
$curl = new Curl($queueHostPageURL, CRAWL_CURLOPT_USERAGENT); $curl = new Curl($queueHostPageURL, CRAWL_CURLOPT_USERAGENT);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
// Update page index anyway, with the current time and http code // Update page index anyway, with the current time and http code
$hostPagesProcessed += $db->updateHostPageCrawlQueue($queueHostPage->hostPageId, time(), $curl->getCode()); $hostPagesProcessed += $db->updateHostPageCrawlQueue($queueHostPage->hostPageId, time(), $curl->getCode());
@ -447,6 +447,8 @@ try {
$metaYggoManifest, $metaYggoManifest,
(string) CRAWL_MANIFEST_DEFAULT_STATUS, (string) CRAWL_MANIFEST_DEFAULT_STATUS,
time()); time());
$manifestsAdded++;
} }
} }
@ -504,10 +506,10 @@ try {
$curl = new Curl($hostImageURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT); $curl = new Curl($hostImageURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
$hostRobots = $curl->getContent(); $hostRobots = $curl->getContent();
@ -666,10 +668,10 @@ try {
$curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT); $curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
// Update curl stats // Update curl stats
$requestsTotal++; $httpRequestsTotal++;
$requestSizeTotal += $curl->getSizeRequest(); $httpRequestsSizeTotal += $curl->getSizeRequest();
$downloadSizeTotal += $curl->getSizeDownload(); $httpDownloadSizeTotal += $curl->getSizeDownload();
$requestsTotalTime += $curl->getTotalTime(); $httpRequestsTimeTotal += $curl->getTotalTime();
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
$hostRobots = $curl->getContent(); $hostRobots = $curl->getContent();
@ -745,24 +747,49 @@ try {
} }
// Debug // Debug
$executionTimeTotal = microtime(true) - $timeStart;
$httpRequestsTimeTotal = $httpRequestsTimeTotal / 1000000;
if (CRAWL_LOG_ENABLED) {
$db->addCrawlerLog(time(),
$hostsAdded,
$hostPagesProcessed,
$hostPagesIndexed,
$hostPagesAdded,
$hostPagesBanned,
$hostImagesIndexed,
$hostImagesProcessed,
$hostImagesAdded,
$hostImagesBanned,
$manifestsProcessed,
$manifestsAdded,
$httpRequestsTotal,
$httpRequestsSizeTotal,
$httpDownloadSizeTotal,
$httpRequestsTimeTotal,
$executionTimeTotal);
}
// Debug output
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL; echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL;
echo 'Pages indexed: ' . $hostPagesIndexed . PHP_EOL; echo 'Pages indexed: ' . $hostPagesIndexed . PHP_EOL;
echo 'Pages added: ' . $hostPagesAdded . PHP_EOL; echo 'Pages added: ' . $hostPagesAdded . PHP_EOL;
echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL;
echo 'Images processed: ' . $hostImagesProcessed . PHP_EOL; echo 'Images processed: ' . $hostImagesProcessed . PHP_EOL;
echo 'Images indexed: ' . $hostImagesIndexed . PHP_EOL; echo 'Images indexed: ' . $hostImagesIndexed . PHP_EOL;
echo 'Images added: ' . $hostImagesAdded . PHP_EOL; echo 'Images added: ' . $hostImagesAdded . PHP_EOL;
echo 'Images banned: ' . $hostImagesBanned . PHP_EOL;
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL; echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
echo 'Manifests indexed: ' . $manifestsIndexed . PHP_EOL; echo 'Manifests added: ' . $manifestsAdded . PHP_EOL;
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
echo 'Hosts pages banned: ' . $hostPagesBanned . PHP_EOL;
echo 'Hosts images banned: ' . $hostImagesBanned . PHP_EOL;
echo 'HTTP Requests total: ' . $requestsTotal . PHP_EOL; echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL;
echo 'HTTP Requests total size: ' . $requestSizeTotal . PHP_EOL; echo 'HTTP Requests total size: ' . $httpRequestsSizeTotal . PHP_EOL;
echo 'HTTP Download total size: ' . $downloadSizeTotal . PHP_EOL; echo 'HTTP Download total size: ' . $httpDownloadSizeTotal . PHP_EOL;
echo 'HTTP Requests total time: ' . $requestsTotalTime / 1000000 . PHP_EOL; echo 'HTTP Requests total time: ' . $httpRequestsTimeTotal . PHP_EOL;
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL; echo 'Total time: ' . $executionTimeTotal . PHP_EOL . PHP_EOL;

BIN
database/yggo.mwb

Binary file not shown.

137
library/mysql.php

@ -564,7 +564,7 @@ class MySQL {
public function resetBannedHostPages(int $timeOffset) { public function resetBannedHostPages(int $timeOffset) {
$query = $this->_db->prepare('UPDATE `hostPage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` > ' . (int) $timeOffset); $query = $this->_db->prepare('UPDATE `hostPage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset);
$query->execute(); $query->execute();
@ -573,7 +573,72 @@ class MySQL {
public function resetBannedHostImages(int $timeOffset) { public function resetBannedHostImages(int $timeOffset) {
$query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` > ' . (int) $timeOffset); $query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset);
$query->execute();
return $query->rowCount();
}
public function addCleanerLog(int $timeAdded,
int $hostsTotal,
int $hostsUpdated,
int $hostPagesDeleted,
int $hostPagesBansRemoved,
int $hostImagesDeleted,
int $hostImagesBansRemoved,
int $manifestsTotal,
int $manifestsDeleted,
int $logsCleanerDeleted,
int $logsCrawlerDeleted,
int $httpRequestsTotal,
int $httpRequestsSizeTotal,
int $httpDownloadSizeTotal,
float $httpRequestsTimeTotal,
float $executionTimeTotal) {
$query = $this->_db->prepare('INSERT INTO `logCleaner` (`timeAdded`,
`hostsTotal`,
`hostsUpdated`,
`hostPagesDeleted`,
`hostPagesBansRemoved`,
`hostImagesDeleted`,
`hostImagesBansRemoved`,
`manifestsTotal`,
`manifestsDeleted`,
`logsCleanerDeleted`,
`logsCrawlerDeleted`,
`httpRequestsTotal`,
`httpRequestsSizeTotal`,
`httpDownloadSizeTotal`,
`httpRequestsTimeTotal`,
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
$query->execute([
$timeAdded,
$hostsTotal,
$hostsUpdated,
$hostPagesDeleted,
$hostPagesBansRemoved,
$hostImagesDeleted,
$hostImagesBansRemoved,
$manifestsTotal,
$manifestsDeleted,
$logsCleanerDeleted,
$logsCrawlerDeleted,
$httpRequestsTotal,
$httpRequestsSizeTotal,
$httpDownloadSizeTotal,
$httpRequestsTimeTotal,
$executionTimeTotal
]);
return $this->_db->lastInsertId();
}
public function deleteLogCleaner(int $timeOffset) {
$query = $this->_db->prepare('DELETE FROM `logCleaner` WHERE `timeAdded` < ' . (int) $timeOffset);
$query->execute(); $query->execute();
@ -675,4 +740,72 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function addCrawlerLog(int $timeAdded,
int $hostsAdded,
int $hostPagesProcessed,
int $hostPagesIndexed,
int $hostPagesAdded,
int $hostPagesBanned,
int $hostImagesIndexed,
int $hostImagesProcessed,
int $hostImagesAdded,
int $hostImagesBanned,
int $manifestsProcessed,
int $manifestsAdded,
int $httpRequestsTotal,
int $httpRequestsSizeTotal,
int $httpDownloadSizeTotal,
float $httpRequestsTimeTotal,
float $executionTimeTotal) {
$query = $this->_db->prepare('INSERT INTO `logCrawler` (`timeAdded`,
`hostsAdded`,
`hostPagesProcessed`,
`hostPagesIndexed`,
`hostPagesAdded`,
`hostPagesBanned`,
`hostImagesIndexed`,
`hostImagesProcessed`,
`hostImagesAdded`,
`hostImagesBanned`,
`manifestsProcessed`,
`manifestsAdded`,
`httpRequestsTotal`,
`httpRequestsSizeTotal`,
`httpDownloadSizeTotal`,
`httpRequestsTimeTotal`,
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
$query->execute([
$timeAdded,
$hostsAdded,
$hostPagesProcessed,
$hostPagesIndexed,
$hostPagesAdded,
$hostPagesBanned,
$hostImagesIndexed,
$hostImagesProcessed,
$hostImagesAdded,
$hostImagesBanned,
$manifestsProcessed,
$manifestsAdded,
$httpRequestsTotal,
$httpRequestsSizeTotal,
$httpDownloadSizeTotal,
$httpRequestsTimeTotal,
$executionTimeTotal
]);
return $this->_db->lastInsertId();
}
public function deleteLogCrawler(int $timeOffset) {
$query = $this->_db->prepare('DELETE FROM `logCrawler` WHERE `timeAdded` < ' . (int) $timeOffset);
$query->execute();
return $query->rowCount();
}
} }

BIN
media/db-prototype.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

After

Width:  |  Height:  |  Size: 126 KiB

Loading…
Cancel
Save