From 000b9ad8dd49401d4a23a6ee31cc9838f546f18a Mon Sep 17 00:00:00 2001 From: ghost Date: Sun, 30 Jul 2023 21:53:30 +0300 Subject: [PATCH] add FS cleaning features, lock execution on active crontab tasks, disable hostPageSnap/localhost untested constructions --- README.md | 12 +++- cli/yggo.php | 149 ++++++++++++++++++++++++++++++++++----------- config/app.php.txt | 10 +-- library/ftp.php | 21 ++++++- library/mysql.php | 9 +++ 5 files changed, 158 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index b11e834..9b9c388 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ GET m=SphinxQL * [x] CSS only, JS-less interface * [x] Unique host ident icons -* [x] Content genre tabs (#1) +* [x] Content MIME tabs (#1) * [x] Page index explorer + [x] Meta + [x] Snaps history @@ -225,8 +225,14 @@ GET m=SphinxQL * [x] help * [x] crawl * [x] clean -* [x] snap - + [x] reindex +* [x] hostPageSnap + + [x] repair + + [x] _sync DB-FS relations_ + + [x] _FTP_ + + [ ] _localhost (not tested)_ + + [x] _delete FS missed in the DB_ + + [x] _FTP_ + + [ ] _localhost_ * [x] hostPageDom + [x] generate + [x] truncate diff --git a/cli/yggo.php b/cli/yggo.php index a00cc64..b84c2a1 100644 --- a/cli/yggo.php +++ b/cli/yggo.php @@ -1,28 +1,50 @@ getHosts() as $host) { @@ -74,20 +99,21 @@ switch ($argv[1]) { $snapPath = chunk_split($hostPage->hostPageId, 1, '/'); // Check file exists - foreach (json_decode(SNAP_STORAGE) as $name => $storages) { + foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) { foreach ($storages as $i => $storage) { // Generate storage id - $crc32name = crc32(sprintf('%s.%s', $name, $i)); + $crc32name = crc32(sprintf('%s.%s', $hostPageSnapStorageName, $i)); - switch ($name) { + switch ($hostPageSnapStorageName) { case 'localhost': - $filename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip'; + /* @TODO implemented, not tested + $hostPageSnapFilename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip'; - if (file_exists($filename)) { + if (file_exists($hostPageSnapFilename)) { $snapFilesExists = true; @@ -95,14 +121,15 @@ switch ($argv[1]) { if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) { - CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); + CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i)); } } else { - CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); + CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i)); } } + */ break; @@ -112,9 +139,9 @@ switch ($argv[1]) { if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { - $filename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip'; + $hostPageSnapFilename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip'; - if ($ftp->size($filename)) { + if ($ftp->size($hostPageSnapFilename)) { $snapFilesExists = true; @@ -122,13 +149,20 @@ switch ($argv[1]) { if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) { - CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); + CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i)); } } else { - CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); + CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i)); } } + + // Prevent snap deletion from registry on FTP connection lost + } else { + + CLI::danger(sprintf(_('could not connect to storage %s index %s. operation stopped to prevent the data lose.'), $hostPageSnapStorageName, $i)); + CLI::break(); + exit; } $ftp->close(); @@ -154,7 +188,7 @@ switch ($argv[1]) { $db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId); $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); - CLI::danger(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId)); + CLI::warning(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId)); $db->commit(); @@ -169,23 +203,70 @@ switch ($argv[1]) { } } - CLI::notice(_('optimize database tables...')); + // Cleanup FS + CLI::notice(_('scan storage for snap files missed in the DB...')); - $db->optimize(); + foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) { - CLI::success(_('tables successfully optimized!')); + foreach ($storages as $i => $storage) { - CLI::notice(_('scan storage locations for snap files not registered in the DB...')); + switch ($hostPageSnapStorageName) { - CLI::success(_('snap index successfully updated!')); + case 'localhost': - // Cleanup FS items on missed DB registry - // @TODO + // @TODO + + break; + + case 'ftp': + + $ftp = new Ftp(); + + if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { + + foreach ($ftp->nlistr($storage->directory) as $hostPageSnapFilename) { + + if (false !== preg_match(sprintf('!/hp/([\d/]+)/([\d]+)\.zip$!ui', $storage->directory), $hostPageSnapFilename, $matches)) { + + if (!empty($matches[1]) && // hostPageSnapId + !empty($matches[2])) { // timeAdded + + if (!$db->findHostPageSnapByTimeAdded($matches[1], $matches[2])) { + + if ($ftp->delete($hostPageSnapFilename)) { + + CLI::warning(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i)); + + } else { + + CLI::danger(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i)); + } + } + } + } + } + } + + $ftp->close(); + + break; + } + } + } + + CLI::success(_('missed snap files successfully deleted!')); + + // Optimize DB tables + CLI::notice(_('optimize database tables...')); + + $db->optimize(); + + CLI::success(_('tables successfully optimized!')); break; default: - CLI::danger(_('undefined action argument')); + CLI::danger(_('undefined action argument!')); } break; @@ -344,8 +425,8 @@ CLI::default('available options:'); CLI::default(' help - this message'); CLI::default(' crawl - execute crawler step in the crontab queue'); CLI::default(' clean - execute cleaner step in the crontab queue'); -CLI::default(' snap reindex - sync DB/FS relations'); CLI::default(' hostPage rank reindex - generate rank indexes in hostPage table'); +CLI::default(' hostPageSnap repair - sync DB/FS relations'); CLI::default(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field'); CLI::default(' hostPageDom truncate - flush hostPageDom table'); CLI::break(); diff --git a/config/app.php.txt b/config/app.php.txt index 3e56d50..473140a 100644 --- a/config/app.php.txt +++ b/config/app.php.txt @@ -92,15 +92,15 @@ define('MEMCACHED_PORT', 11211); */ define('SNAP_STORAGE', json_encode((object) [ - 'localhost' => [ + 'localhost' => [ // @TODO see https://github.com/YGGverse/YGGo#roadmap [ 'directory' => __DIR__ . '/../storage/snap/hp/', 'quota' => [ 'mime' => false, 'size' => 10000000024, // @TODO - 'request' => [ + 'request' => [ // @TODO 'download' => [ - 'size' => 10000024, // @TODO + 'size' => 10000024, 'seconds' => 60*60 ] ] @@ -120,9 +120,9 @@ define('SNAP_STORAGE', json_encode((object) 'quota' => [ 'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico', 'size' => 10000000024, // @TODO - 'request' => [ + 'request' => [ // @TODO 'download' => [ - 'size' => 10000024, // @TODO + 'size' => 10000024, 'seconds' => 60*60 ] ] diff --git a/library/ftp.php b/library/ftp.php index e2472f7..ae949bc 100644 --- a/library/ftp.php +++ b/library/ftp.php @@ -84,11 +84,30 @@ class Ftp { return false; } - public function list(string $path) { + public function nlist(string $path) { return ftp_nlist($this->_connection, $path); } + public function nlistr(string $path) { + + $result = []; + + foreach ($this->nlist($path) as $line) { + + if (ftp_size($this->_connection, $line) == -1) { + + $result = array_merge($result, $this->nlistr($line)); + + } else{ + + $result[] = $line; + } + } + + return $result; + } + public function close() { return ftp_close($this->_connection); diff --git a/library/mysql.php b/library/mysql.php index ef39df1..f59e4b1 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -528,6 +528,15 @@ class MySQL { return $query->fetch(); } + public function findHostPageSnapByTimeAdded(int $hostPageSnapId, int $timeAdded) { + + $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? AND `timeAdded` = ? LIMIT 1'); + + $query->execute([$hostPageSnapId, $timeAdded]); + + return $query->fetch(); + } + public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) { $query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,