Browse Source

add FS cleaning features, lock execution on active crontab tasks, disable hostPageSnap/localhost untested constructions

main
ghost 1 year ago
parent
commit
000b9ad8dd
  1. 12
      README.md
  2. 147
      cli/yggo.php
  3. 10
      config/app.php.txt
  4. 21
      library/ftp.php
  5. 9
      library/mysql.php

12
README.md

@ -163,7 +163,7 @@ GET m=SphinxQL
* [x] CSS only, JS-less interface * [x] CSS only, JS-less interface
* [x] Unique host ident icons * [x] Unique host ident icons
* [x] Content genre tabs (#1) * [x] Content MIME tabs (#1)
* [x] Page index explorer * [x] Page index explorer
+ [x] Meta + [x] Meta
+ [x] Snaps history + [x] Snaps history
@ -225,8 +225,14 @@ GET m=SphinxQL
* [x] help * [x] help
* [x] crawl * [x] crawl
* [x] clean * [x] clean
* [x] snap * [x] hostPageSnap
+ [x] reindex + [x] repair
+ [x] _sync DB-FS relations_
+ [x] _FTP_
+ [ ] _localhost (not tested)_
+ [x] _delete FS missed in the DB_
+ [x] _FTP_
+ [ ] _localhost_
* [x] hostPageDom * [x] hostPageDom
+ [x] generate + [x] generate
+ [x] truncate + [x] truncate

147
cli/yggo.php

@ -1,28 +1,50 @@
<?php <?php
// Load system dependencies
require_once(__DIR__ . '/../config/app.php');
require_once(__DIR__ . '/../library/cli.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/ftp.php');
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
// CLI only to prevent https server connection timeout // CLI only to prevent https server connection timeout
if (php_sapi_name() != 'cli') { if (php_sapi_name() != 'cli') {
CLI::danger(_('supported command line interface only')); CLI::danger(_('supported command line interface only'));
CLI::break();
exit; exit;
} }
// Lock multi-thread execution // Lock multi-thread execution
$semaphore = sem_get(crc32('cli.yggo'), 1); $semaphore = sem_get(crc32('crontab.crawler'), 1);
if (false === sem_acquire($semaphore, true)) { if (false === sem_acquire($semaphore, true)) {
CLI::danger(_('Process locked by another thread.')); CLI::danger(_('process locked by another thread.'));
CLI::break();
exit; exit;
} }
// Load system dependencies // Stop CLI execution on cleaner process running
require_once(__DIR__ . '/../config/app.php'); $semaphore = sem_get(crc32('crontab.cleaner'), 1);
require_once(__DIR__ . '/../library/cli.php');
require_once(__DIR__ . '/../library/mysql.php'); if (false === sem_acquire($semaphore, true)) {
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/ftp.php'); CLI::danger(_('stop crontab.cleaner is running in another thread.'));
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php'); CLI::break();
exit;
}
// Stop CLI execution on crawler process running
$semaphore = sem_get(crc32('crontab.crawler'), 1);
if (false === sem_acquire($semaphore, true)) {
CLI::danger(_('stop crontab.crawler is running in another thread.'));
CLI::break();
exit;
}
// Connect database // Connect database
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
@ -49,17 +71,20 @@ switch ($argv[1]) {
CLI::notice(_('cleaner queue step completed.')); CLI::notice(_('cleaner queue step completed.'));
break; break;
case 'snap': case 'hostPageSnap':
if (empty($argv[2])) { if (empty($argv[2])) {
CLI::danger(_('snap method requires action argument'));
CLI::danger(_('hostPageSnap method requires action argument'));
CLI::break();
exit;
} }
switch ($argv[2]) { switch ($argv[2]) {
case 'reindex': case 'repair':
// Scan for new files/storages // Normalize & cleanup DB
CLI::notice(_('scan database registry for missed snap files...')); CLI::notice(_('scan database registry for missed snap files...'));
foreach ($db->getHosts() as $host) { foreach ($db->getHosts() as $host) {
@ -74,20 +99,21 @@ switch ($argv[1]) {
$snapPath = chunk_split($hostPage->hostPageId, 1, '/'); $snapPath = chunk_split($hostPage->hostPageId, 1, '/');
// Check file exists // Check file exists
foreach (json_decode(SNAP_STORAGE) as $name => $storages) { foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
foreach ($storages as $i => $storage) { foreach ($storages as $i => $storage) {
// Generate storage id // Generate storage id
$crc32name = crc32(sprintf('%s.%s', $name, $i)); $crc32name = crc32(sprintf('%s.%s', $hostPageSnapStorageName, $i));
switch ($name) { switch ($hostPageSnapStorageName) {
case 'localhost': case 'localhost':
$filename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip'; /* @TODO implemented, not tested
$hostPageSnapFilename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip';
if (file_exists($filename)) { if (file_exists($hostPageSnapFilename)) {
$snapFilesExists = true; $snapFilesExists = true;
@ -95,14 +121,15 @@ switch ($argv[1]) {
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) { if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
} }
} else { } else {
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
} }
} }
*/
break; break;
@ -112,9 +139,9 @@ switch ($argv[1]) {
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
$filename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip'; $hostPageSnapFilename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip';
if ($ftp->size($filename)) { if ($ftp->size($hostPageSnapFilename)) {
$snapFilesExists = true; $snapFilesExists = true;
@ -122,13 +149,20 @@ switch ($argv[1]) {
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) { if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
} }
} else { } else {
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i)); CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
} }
} }
// Prevent snap deletion from registry on FTP connection lost
} else {
CLI::danger(sprintf(_('could not connect to storage %s index %s. operation stopped to prevent the data lose.'), $hostPageSnapStorageName, $i));
CLI::break();
exit;
} }
$ftp->close(); $ftp->close();
@ -154,7 +188,7 @@ switch ($argv[1]) {
$db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId); $db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId);
$db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
CLI::danger(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId)); CLI::warning(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId));
$db->commit(); $db->commit();
@ -169,23 +203,70 @@ switch ($argv[1]) {
} }
} }
CLI::notice(_('optimize database tables...')); // Cleanup FS
CLI::notice(_('scan storage for snap files missed in the DB...'));
$db->optimize(); foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
CLI::success(_('tables successfully optimized!')); foreach ($storages as $i => $storage) {
CLI::notice(_('scan storage locations for snap files not registered in the DB...')); switch ($hostPageSnapStorageName) {
CLI::success(_('snap index successfully updated!')); case 'localhost':
// Cleanup FS items on missed DB registry
// @TODO // @TODO
break;
case 'ftp':
$ftp = new Ftp();
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
foreach ($ftp->nlistr($storage->directory) as $hostPageSnapFilename) {
if (false !== preg_match(sprintf('!/hp/([\d/]+)/([\d]+)\.zip$!ui', $storage->directory), $hostPageSnapFilename, $matches)) {
if (!empty($matches[1]) && // hostPageSnapId
!empty($matches[2])) { // timeAdded
if (!$db->findHostPageSnapByTimeAdded($matches[1], $matches[2])) {
if ($ftp->delete($hostPageSnapFilename)) {
CLI::warning(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
} else {
CLI::danger(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
}
}
}
}
}
}
$ftp->close();
break;
}
}
}
CLI::success(_('missed snap files successfully deleted!'));
// Optimize DB tables
CLI::notice(_('optimize database tables...'));
$db->optimize();
CLI::success(_('tables successfully optimized!'));
break; break;
default: default:
CLI::danger(_('undefined action argument')); CLI::danger(_('undefined action argument!'));
} }
break; break;
@ -344,8 +425,8 @@ CLI::default('available options:');
CLI::default(' help - this message'); CLI::default(' help - this message');
CLI::default(' crawl - execute crawler step in the crontab queue'); CLI::default(' crawl - execute crawler step in the crontab queue');
CLI::default(' clean - execute cleaner step in the crontab queue'); CLI::default(' clean - execute cleaner step in the crontab queue');
CLI::default(' snap reindex - sync DB/FS relations');
CLI::default(' hostPage rank reindex - generate rank indexes in hostPage table'); CLI::default(' hostPage rank reindex - generate rank indexes in hostPage table');
CLI::default(' hostPageSnap repair - sync DB/FS relations');
CLI::default(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field'); CLI::default(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field');
CLI::default(' hostPageDom truncate - flush hostPageDom table'); CLI::default(' hostPageDom truncate - flush hostPageDom table');
CLI::break(); CLI::break();

10
config/app.php.txt

@ -92,15 +92,15 @@ define('MEMCACHED_PORT', 11211);
*/ */
define('SNAP_STORAGE', json_encode((object) define('SNAP_STORAGE', json_encode((object)
[ [
'localhost' => [ 'localhost' => [ // @TODO see https://github.com/YGGverse/YGGo#roadmap
[ [
'directory' => __DIR__ . '/../storage/snap/hp/', 'directory' => __DIR__ . '/../storage/snap/hp/',
'quota' => [ 'quota' => [
'mime' => false, 'mime' => false,
'size' => 10000000024, // @TODO 'size' => 10000000024, // @TODO
'request' => [ 'request' => [ // @TODO
'download' => [ 'download' => [
'size' => 10000024, // @TODO 'size' => 10000024,
'seconds' => 60*60 'seconds' => 60*60
] ]
] ]
@ -120,9 +120,9 @@ define('SNAP_STORAGE', json_encode((object)
'quota' => [ 'quota' => [
'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico', 'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico',
'size' => 10000000024, // @TODO 'size' => 10000000024, // @TODO
'request' => [ 'request' => [ // @TODO
'download' => [ 'download' => [
'size' => 10000024, // @TODO 'size' => 10000024,
'seconds' => 60*60 'seconds' => 60*60
] ]
] ]

21
library/ftp.php

@ -84,11 +84,30 @@ class Ftp {
return false; return false;
} }
public function list(string $path) { public function nlist(string $path) {
return ftp_nlist($this->_connection, $path); return ftp_nlist($this->_connection, $path);
} }
public function nlistr(string $path) {
$result = [];
foreach ($this->nlist($path) as $line) {
if (ftp_size($this->_connection, $line) == -1) {
$result = array_merge($result, $this->nlistr($line));
} else{
$result[] = $line;
}
}
return $result;
}
public function close() { public function close() {
return ftp_close($this->_connection); return ftp_close($this->_connection);

9
library/mysql.php

@ -528,6 +528,15 @@ class MySQL {
return $query->fetch(); return $query->fetch();
} }
public function findHostPageSnapByTimeAdded(int $hostPageSnapId, int $timeAdded) {
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? AND `timeAdded` = ? LIMIT 1');
$query->execute([$hostPageSnapId, $timeAdded]);
return $query->fetch();
}
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) { public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`, $query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,

Loading…
Cancel
Save