mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-24 21:44:59 +00:00
add crawler/cleaner logs
This commit is contained in:
parent
dcdc2c50ad
commit
25b6bce2ec
18
README.md
18
README.md
@ -169,19 +169,35 @@ GET m=SphinxQL
|
||||
##### Crawler
|
||||
|
||||
* [x] Auto crawl links by regular expression rules
|
||||
+ [x] Pages
|
||||
+ [x] Images
|
||||
+ [x] Manifests
|
||||
* [x] Robots.txt / robots meta tags support (#2)
|
||||
* [x] Specific rules configuration for every host
|
||||
* [x] Deprecated index auto cleaner
|
||||
* [x] Auto stop crawling on disk quota reached
|
||||
* [x] Transactions support to prevent data loss on queue failures
|
||||
* [x] Distributed index crawling between YGGo nodes trough manifest API
|
||||
* [x] MIME Content-type crawler settings
|
||||
* [x] Ban non-condition links to prevent extra requests
|
||||
* [x] Debug log
|
||||
* [ ] Indexing new sites homepage in higher priority
|
||||
* [ ] Redirect codes extended processing
|
||||
* [ ] Palette image index / filter
|
||||
* [ ] Crawl queue balancer, that depends of CPU available
|
||||
|
||||
##### Cleaner
|
||||
* [x] Deprecated DB items auto deletion / host settings update
|
||||
+ [x] Pages
|
||||
+ [x] Images
|
||||
+ [x] Manifests
|
||||
+ [x] Logs
|
||||
+ [x] Crawler
|
||||
+ [x] Cleaner
|
||||
* [x] Banned resources reset by timeout
|
||||
+ [x] Pages
|
||||
+ [x] Images
|
||||
* [x] Debug log
|
||||
|
||||
##### Other
|
||||
|
||||
* [ ] Administrative panel for useful index moderation
|
||||
|
@ -98,6 +98,18 @@ define('PROXY_CURLOPT_USERAGENT', 'YGGo Search Proxy ( https://github.com/YGGver
|
||||
|
||||
// Crawl settings
|
||||
|
||||
/*
|
||||
* Save crawler debug to `logCrawler` table
|
||||
*
|
||||
*/
|
||||
define('CRAWL_LOG_ENABLED', true);
|
||||
|
||||
/*
|
||||
* Auto clean `logCrawler` items older seconds offset
|
||||
*
|
||||
*/
|
||||
define('CRAWL_LOG_SECONDS_OFFSET', 60*60*24*30);
|
||||
|
||||
/*
|
||||
* Crawler / Bot User Agent name
|
||||
*
|
||||
@ -311,6 +323,18 @@ define('CRAWL_MANIFEST_DEFAULT_STATUS', true);
|
||||
|
||||
// Cleaner settings
|
||||
|
||||
/*
|
||||
* Save cleaner debug to `logCleaner` table
|
||||
*
|
||||
*/
|
||||
define('CLEAN_LOG_ENABLED', true);
|
||||
|
||||
/*
|
||||
* Auto clean `logCleaner` items older seconds offset
|
||||
*
|
||||
*/
|
||||
define('CLEAN_LOG_SECONDS_OFFSET', 60*60*24*30);
|
||||
|
||||
/*
|
||||
* Hosts limit per crontab execution step (https://github.com/YGGverse/YGGo#crontab)
|
||||
*
|
||||
|
@ -21,20 +21,23 @@ $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||
// Debug
|
||||
$timeStart = microtime(true);
|
||||
|
||||
$requestsTotal = 0;
|
||||
$requestSizeTotal = 0;
|
||||
$downloadSizeTotal = 0;
|
||||
$requestsTotalTime = 0;
|
||||
$httpRequestsTotal = 0;
|
||||
$httpRequestsSizeTotal = 0;
|
||||
$httpDownloadSizeTotal = 0;
|
||||
$httpRequestsTimeTotal = 0;
|
||||
|
||||
$hostsTotal = $db->getTotalHosts();
|
||||
$manifestsTotal = $db->getTotalManifests();
|
||||
$hostsUpdated = 0;
|
||||
$hostsPagesDeleted = 0;
|
||||
$hostsImagesDeleted = 0;
|
||||
$hostPagesDeleted = 0;
|
||||
$hostImagesDeleted = 0;
|
||||
$manifestsDeleted = 0;
|
||||
$hostPagesBansRemoved = 0;
|
||||
$hostImagesBansRemoved = 0;
|
||||
|
||||
$logsCleanerDeleted = 0;
|
||||
$logsCrawlerDeleted = 0;
|
||||
|
||||
// Begin update
|
||||
$db->beginTransaction();
|
||||
|
||||
@ -50,10 +53,10 @@ try {
|
||||
$curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
@ -76,7 +79,7 @@ try {
|
||||
$db->deleteHostImageToHostPage($hostImage->hostImageId);
|
||||
|
||||
// Delete host image
|
||||
$hostsImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
|
||||
$hostImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,7 +94,7 @@ try {
|
||||
$db->deleteHostPageToHostImage($hostPage->hostPageId);
|
||||
|
||||
// Delete host page
|
||||
$hostsPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
||||
$hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,7 +110,7 @@ try {
|
||||
$db->deleteHostImageToHostPage($hostImage->hostImageId);
|
||||
|
||||
// Delete host image
|
||||
$hostsImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
|
||||
$hostImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,7 +122,7 @@ try {
|
||||
$db->deleteHostPageToHostImage($hostPage->hostPageId);
|
||||
|
||||
// Delete host page
|
||||
$hostsPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
||||
$hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,7 +134,7 @@ try {
|
||||
$db->deleteHostImageToHostPage($hostImage->hostImageId);
|
||||
|
||||
// Delete host image
|
||||
$hostsImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
|
||||
$hostImagesDeleted += $db->deleteHostImage($hostImage->hostImageId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -143,10 +146,10 @@ try {
|
||||
$curl = new Curl($manifest->url);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
// Skip processing non 200 code
|
||||
if (200 != $curl->getCode()) {
|
||||
@ -198,6 +201,10 @@ try {
|
||||
// Reset banned images
|
||||
$hostImagesBansRemoved += $db->resetBannedHostImages(time() - CLEAN_IMAGE_BAN_SECONDS_OFFSET);
|
||||
|
||||
// Delete deprecated logs
|
||||
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
|
||||
$logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET);
|
||||
|
||||
$db->commit();
|
||||
|
||||
} catch(Exception $e){
|
||||
@ -208,10 +215,34 @@ try {
|
||||
}
|
||||
|
||||
// Debug
|
||||
$executionTimeTotal = microtime(true) - $timeStart;
|
||||
$httpRequestsTimeTotal = $httpRequestsTimeTotal / 1000000;
|
||||
|
||||
if (CLEAN_LOG_ENABLED) {
|
||||
|
||||
$db->addCleanerLog( time(),
|
||||
$hostsTotal,
|
||||
$hostsUpdated,
|
||||
$hostPagesDeleted,
|
||||
$hostPagesBansRemoved,
|
||||
$hostImagesDeleted,
|
||||
$hostImagesBansRemoved,
|
||||
$manifestsTotal,
|
||||
$manifestsDeleted,
|
||||
$logsCleanerDeleted,
|
||||
$logsCrawlerDeleted,
|
||||
$httpRequestsTotal,
|
||||
$httpRequestsSizeTotal,
|
||||
$httpDownloadSizeTotal,
|
||||
$httpRequestsTimeTotal,
|
||||
$executionTimeTotal);
|
||||
|
||||
}
|
||||
|
||||
echo 'Hosts total: ' . $hostsTotal . PHP_EOL;
|
||||
echo 'Hosts updated: ' . $hostsUpdated . PHP_EOL;
|
||||
echo 'Hosts pages deleted: ' . $hostsPagesDeleted . PHP_EOL;
|
||||
echo 'Hosts images deleted: ' . $hostsImagesDeleted . PHP_EOL;
|
||||
echo 'Hosts pages deleted: ' . $hostPagesDeleted . PHP_EOL;
|
||||
echo 'Hosts images deleted: ' . $hostImagesDeleted . PHP_EOL;
|
||||
|
||||
echo 'Manifests total: ' . $manifestsTotal . PHP_EOL;
|
||||
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
|
||||
@ -219,9 +250,12 @@ echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
|
||||
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
|
||||
echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL;
|
||||
|
||||
echo 'HTTP Requests total: ' . $requestsTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total size: ' . $requestSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Download total size: ' . $downloadSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total time: ' . $requestsTotalTime / 1000000 . PHP_EOL;
|
||||
echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL;
|
||||
echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL;
|
||||
|
||||
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
||||
echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total size: ' . $httpRequestsSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Download total size: ' . $httpDownloadSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total time: ' . $httpRequestsTimeTotal . PHP_EOL;
|
||||
|
||||
echo 'Total time: ' . $executionTimeTotal . PHP_EOL . PHP_EOL;
|
@ -27,22 +27,22 @@ if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) {
|
||||
// Debug
|
||||
$timeStart = microtime(true);
|
||||
|
||||
$requestsTotal = 0;
|
||||
$requestSizeTotal = 0;
|
||||
$downloadSizeTotal = 0;
|
||||
$requestsTotalTime = 0;
|
||||
$httpRequestsTotal = 0;
|
||||
$httpRequestsSizeTotal = 0;
|
||||
$httpDownloadSizeTotal = 0;
|
||||
$httpRequestsTimeTotal = 0;
|
||||
|
||||
$hostPagesProcessed = 0;
|
||||
$hostImagesProcessed = 0;
|
||||
$manifestsProcessed = 0;
|
||||
$hostPagesIndexed = 0;
|
||||
$hostImagesIndexed = 0;
|
||||
$manifestsIndexed = 0;
|
||||
$hostPagesAdded = 0;
|
||||
$hostImagesAdded = 0;
|
||||
$hostsAdded = 0;
|
||||
$hostPagesBanned = 0;
|
||||
$hostImagesBanned = 0;
|
||||
$hostPagesProcessed = 0;
|
||||
$hostImagesProcessed = 0;
|
||||
$manifestsProcessed = 0;
|
||||
$hostPagesIndexed = 0;
|
||||
$hostImagesIndexed = 0;
|
||||
$manifestsAdded = 0;
|
||||
$hostPagesAdded = 0;
|
||||
$hostImagesAdded = 0;
|
||||
$hostsAdded = 0;
|
||||
$hostPagesBanned = 0;
|
||||
$hostImagesBanned = 0;
|
||||
|
||||
// Connect database
|
||||
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||
@ -57,10 +57,10 @@ try {
|
||||
$curl = new Curl($queueManifest->url);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
// Update manifest index anyway, with the current time and http code
|
||||
$manifestsProcessed += $db->updateManifestCrawlQueue($queueManifest->manifestId, time(), $curl->getCode());
|
||||
@ -120,10 +120,10 @@ try {
|
||||
$curl = new Curl($remoteManifest->result->api->hosts);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
// Skip processing non 200 code
|
||||
if (200 != $curl->getCode()) {
|
||||
@ -184,10 +184,10 @@ try {
|
||||
$curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
@ -254,10 +254,10 @@ try {
|
||||
$curl = new Curl($queueHostImageURL, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
// Update image index anyway, with the current time and http code
|
||||
$hostImagesProcessed += $db->updateHostImageCrawlQueue($queueHostImage->hostImageId, time(), $curl->getCode());
|
||||
@ -334,10 +334,10 @@ try {
|
||||
$curl = new Curl($queueHostPageURL, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
// Update page index anyway, with the current time and http code
|
||||
$hostPagesProcessed += $db->updateHostPageCrawlQueue($queueHostPage->hostPageId, time(), $curl->getCode());
|
||||
@ -447,6 +447,8 @@ try {
|
||||
$metaYggoManifest,
|
||||
(string) CRAWL_MANIFEST_DEFAULT_STATUS,
|
||||
time());
|
||||
|
||||
$manifestsAdded++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -504,10 +506,10 @@ try {
|
||||
$curl = new Curl($hostImageURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
@ -666,10 +668,10 @@ try {
|
||||
$curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
@ -745,24 +747,49 @@ try {
|
||||
}
|
||||
|
||||
// Debug
|
||||
$executionTimeTotal = microtime(true) - $timeStart;
|
||||
$httpRequestsTimeTotal = $httpRequestsTimeTotal / 1000000;
|
||||
|
||||
if (CRAWL_LOG_ENABLED) {
|
||||
|
||||
$db->addCrawlerLog(time(),
|
||||
$hostsAdded,
|
||||
$hostPagesProcessed,
|
||||
$hostPagesIndexed,
|
||||
$hostPagesAdded,
|
||||
$hostPagesBanned,
|
||||
$hostImagesIndexed,
|
||||
$hostImagesProcessed,
|
||||
$hostImagesAdded,
|
||||
$hostImagesBanned,
|
||||
$manifestsProcessed,
|
||||
$manifestsAdded,
|
||||
$httpRequestsTotal,
|
||||
$httpRequestsSizeTotal,
|
||||
$httpDownloadSizeTotal,
|
||||
$httpRequestsTimeTotal,
|
||||
$executionTimeTotal);
|
||||
}
|
||||
|
||||
// Debug output
|
||||
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
|
||||
|
||||
echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL;
|
||||
echo 'Pages indexed: ' . $hostPagesIndexed . PHP_EOL;
|
||||
echo 'Pages added: ' . $hostPagesAdded . PHP_EOL;
|
||||
echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL;
|
||||
|
||||
echo 'Images processed: ' . $hostImagesProcessed . PHP_EOL;
|
||||
echo 'Images indexed: ' . $hostImagesIndexed . PHP_EOL;
|
||||
echo 'Images added: ' . $hostImagesAdded . PHP_EOL;
|
||||
echo 'Images banned: ' . $hostImagesBanned . PHP_EOL;
|
||||
|
||||
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
|
||||
echo 'Manifests indexed: ' . $manifestsIndexed . PHP_EOL;
|
||||
echo 'Manifests added: ' . $manifestsAdded . PHP_EOL;
|
||||
|
||||
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
|
||||
echo 'Hosts pages banned: ' . $hostPagesBanned . PHP_EOL;
|
||||
echo 'Hosts images banned: ' . $hostImagesBanned . PHP_EOL;
|
||||
echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total size: ' . $httpRequestsSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Download total size: ' . $httpDownloadSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total time: ' . $httpRequestsTimeTotal . PHP_EOL;
|
||||
|
||||
echo 'HTTP Requests total: ' . $requestsTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total size: ' . $requestSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Download total size: ' . $downloadSizeTotal . PHP_EOL;
|
||||
echo 'HTTP Requests total time: ' . $requestsTotalTime / 1000000 . PHP_EOL;
|
||||
|
||||
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
||||
echo 'Total time: ' . $executionTimeTotal . PHP_EOL . PHP_EOL;
|
||||
|
Binary file not shown.
@ -564,7 +564,7 @@ class MySQL {
|
||||
|
||||
public function resetBannedHostPages(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostPage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` > ' . (int) $timeOffset);
|
||||
$query = $this->_db->prepare('UPDATE `hostPage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset);
|
||||
|
||||
$query->execute();
|
||||
|
||||
@ -573,7 +573,72 @@ class MySQL {
|
||||
|
||||
public function resetBannedHostImages(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` > ' . (int) $timeOffset);
|
||||
$query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset);
|
||||
|
||||
$query->execute();
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function addCleanerLog(int $timeAdded,
|
||||
int $hostsTotal,
|
||||
int $hostsUpdated,
|
||||
int $hostPagesDeleted,
|
||||
int $hostPagesBansRemoved,
|
||||
int $hostImagesDeleted,
|
||||
int $hostImagesBansRemoved,
|
||||
int $manifestsTotal,
|
||||
int $manifestsDeleted,
|
||||
int $logsCleanerDeleted,
|
||||
int $logsCrawlerDeleted,
|
||||
int $httpRequestsTotal,
|
||||
int $httpRequestsSizeTotal,
|
||||
int $httpDownloadSizeTotal,
|
||||
float $httpRequestsTimeTotal,
|
||||
float $executionTimeTotal) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `logCleaner` (`timeAdded`,
|
||||
`hostsTotal`,
|
||||
`hostsUpdated`,
|
||||
`hostPagesDeleted`,
|
||||
`hostPagesBansRemoved`,
|
||||
`hostImagesDeleted`,
|
||||
`hostImagesBansRemoved`,
|
||||
`manifestsTotal`,
|
||||
`manifestsDeleted`,
|
||||
`logsCleanerDeleted`,
|
||||
`logsCrawlerDeleted`,
|
||||
`httpRequestsTotal`,
|
||||
`httpRequestsSizeTotal`,
|
||||
`httpDownloadSizeTotal`,
|
||||
`httpRequestsTimeTotal`,
|
||||
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$query->execute([
|
||||
$timeAdded,
|
||||
$hostsTotal,
|
||||
$hostsUpdated,
|
||||
$hostPagesDeleted,
|
||||
$hostPagesBansRemoved,
|
||||
$hostImagesDeleted,
|
||||
$hostImagesBansRemoved,
|
||||
$manifestsTotal,
|
||||
$manifestsDeleted,
|
||||
$logsCleanerDeleted,
|
||||
$logsCrawlerDeleted,
|
||||
$httpRequestsTotal,
|
||||
$httpRequestsSizeTotal,
|
||||
$httpDownloadSizeTotal,
|
||||
$httpRequestsTimeTotal,
|
||||
$executionTimeTotal
|
||||
]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function deleteLogCleaner(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('DELETE FROM `logCleaner` WHERE `timeAdded` < ' . (int) $timeOffset);
|
||||
|
||||
$query->execute();
|
||||
|
||||
@ -675,4 +740,72 @@ class MySQL {
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function addCrawlerLog(int $timeAdded,
|
||||
int $hostsAdded,
|
||||
int $hostPagesProcessed,
|
||||
int $hostPagesIndexed,
|
||||
int $hostPagesAdded,
|
||||
int $hostPagesBanned,
|
||||
int $hostImagesIndexed,
|
||||
int $hostImagesProcessed,
|
||||
int $hostImagesAdded,
|
||||
int $hostImagesBanned,
|
||||
int $manifestsProcessed,
|
||||
int $manifestsAdded,
|
||||
int $httpRequestsTotal,
|
||||
int $httpRequestsSizeTotal,
|
||||
int $httpDownloadSizeTotal,
|
||||
float $httpRequestsTimeTotal,
|
||||
float $executionTimeTotal) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `logCrawler` (`timeAdded`,
|
||||
`hostsAdded`,
|
||||
`hostPagesProcessed`,
|
||||
`hostPagesIndexed`,
|
||||
`hostPagesAdded`,
|
||||
`hostPagesBanned`,
|
||||
`hostImagesIndexed`,
|
||||
`hostImagesProcessed`,
|
||||
`hostImagesAdded`,
|
||||
`hostImagesBanned`,
|
||||
`manifestsProcessed`,
|
||||
`manifestsAdded`,
|
||||
`httpRequestsTotal`,
|
||||
`httpRequestsSizeTotal`,
|
||||
`httpDownloadSizeTotal`,
|
||||
`httpRequestsTimeTotal`,
|
||||
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$query->execute([
|
||||
$timeAdded,
|
||||
$hostsAdded,
|
||||
$hostPagesProcessed,
|
||||
$hostPagesIndexed,
|
||||
$hostPagesAdded,
|
||||
$hostPagesBanned,
|
||||
$hostImagesIndexed,
|
||||
$hostImagesProcessed,
|
||||
$hostImagesAdded,
|
||||
$hostImagesBanned,
|
||||
$manifestsProcessed,
|
||||
$manifestsAdded,
|
||||
$httpRequestsTotal,
|
||||
$httpRequestsSizeTotal,
|
||||
$httpDownloadSizeTotal,
|
||||
$httpRequestsTimeTotal,
|
||||
$executionTimeTotal
|
||||
]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function deleteLogCrawler(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('DELETE FROM `logCrawler` WHERE `timeAdded` < ' . (int) $timeOffset);
|
||||
|
||||
$query->execute();
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
}
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 110 KiB After Width: | Height: | Size: 126 KiB |
Loading…
x
Reference in New Issue
Block a user