mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
add FS cleaning features, lock execution on active crontab tasks, disable hostPageSnap/localhost untested constructions
This commit is contained in:
parent
547cd6717b
commit
000b9ad8dd
12
README.md
12
README.md
@ -163,7 +163,7 @@ GET m=SphinxQL
|
||||
|
||||
* [x] CSS only, JS-less interface
|
||||
* [x] Unique host ident icons
|
||||
* [x] Content genre tabs (#1)
|
||||
* [x] Content MIME tabs (#1)
|
||||
* [x] Page index explorer
|
||||
+ [x] Meta
|
||||
+ [x] Snaps history
|
||||
@ -225,8 +225,14 @@ GET m=SphinxQL
|
||||
* [x] help
|
||||
* [x] crawl
|
||||
* [x] clean
|
||||
* [x] snap
|
||||
+ [x] reindex
|
||||
* [x] hostPageSnap
|
||||
+ [x] repair
|
||||
+ [x] _sync DB-FS relations_
|
||||
+ [x] _FTP_
|
||||
+ [ ] _localhost (not tested)_
|
||||
+ [x] _delete FS missed in the DB_
|
||||
+ [x] _FTP_
|
||||
+ [ ] _localhost_
|
||||
* [x] hostPageDom
|
||||
+ [x] generate
|
||||
+ [x] truncate
|
||||
|
163
cli/yggo.php
163
cli/yggo.php
@ -1,21 +1,5 @@
|
||||
<?php
|
||||
|
||||
// CLI only to prevent https server connection timeout
|
||||
if (php_sapi_name() != 'cli') {
|
||||
|
||||
CLI::danger(_('supported command line interface only'));
|
||||
exit;
|
||||
}
|
||||
|
||||
// Lock multi-thread execution
|
||||
$semaphore = sem_get(crc32('cli.yggo'), 1);
|
||||
|
||||
if (false === sem_acquire($semaphore, true)) {
|
||||
|
||||
CLI::danger(_('Process locked by another thread.'));
|
||||
exit;
|
||||
}
|
||||
|
||||
// Load system dependencies
|
||||
require_once(__DIR__ . '/../config/app.php');
|
||||
require_once(__DIR__ . '/../library/cli.php');
|
||||
@ -24,6 +8,44 @@ require_once(__DIR__ . '/../library/filter.php');
|
||||
require_once(__DIR__ . '/../library/ftp.php');
|
||||
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
|
||||
|
||||
// CLI only to prevent https server connection timeout
|
||||
if (php_sapi_name() != 'cli') {
|
||||
|
||||
CLI::danger(_('supported command line interface only'));
|
||||
CLI::break();
|
||||
exit;
|
||||
}
|
||||
|
||||
// Lock multi-thread execution
|
||||
$semaphore = sem_get(crc32('crontab.crawler'), 1);
|
||||
|
||||
if (false === sem_acquire($semaphore, true)) {
|
||||
|
||||
CLI::danger(_('process locked by another thread.'));
|
||||
CLI::break();
|
||||
exit;
|
||||
}
|
||||
|
||||
// Stop CLI execution on cleaner process running
|
||||
$semaphore = sem_get(crc32('crontab.cleaner'), 1);
|
||||
|
||||
if (false === sem_acquire($semaphore, true)) {
|
||||
|
||||
CLI::danger(_('stop crontab.cleaner is running in another thread.'));
|
||||
CLI::break();
|
||||
exit;
|
||||
}
|
||||
|
||||
// Stop CLI execution on crawler process running
|
||||
$semaphore = sem_get(crc32('crontab.crawler'), 1);
|
||||
|
||||
if (false === sem_acquire($semaphore, true)) {
|
||||
|
||||
CLI::danger(_('stop crontab.crawler is running in another thread.'));
|
||||
CLI::break();
|
||||
exit;
|
||||
}
|
||||
|
||||
// Connect database
|
||||
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||
|
||||
@ -49,17 +71,20 @@ switch ($argv[1]) {
|
||||
CLI::notice(_('cleaner queue step completed.'));
|
||||
|
||||
break;
|
||||
case 'snap':
|
||||
case 'hostPageSnap':
|
||||
|
||||
if (empty($argv[2])) {
|
||||
CLI::danger(_('snap method requires action argument'));
|
||||
|
||||
CLI::danger(_('hostPageSnap method requires action argument'));
|
||||
CLI::break();
|
||||
exit;
|
||||
}
|
||||
|
||||
switch ($argv[2]) {
|
||||
|
||||
case 'reindex':
|
||||
case 'repair':
|
||||
|
||||
// Scan for new files/storages
|
||||
// Normalize & cleanup DB
|
||||
CLI::notice(_('scan database registry for missed snap files...'));
|
||||
|
||||
foreach ($db->getHosts() as $host) {
|
||||
@ -74,20 +99,21 @@ switch ($argv[1]) {
|
||||
$snapPath = chunk_split($hostPage->hostPageId, 1, '/');
|
||||
|
||||
// Check file exists
|
||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||
foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
|
||||
|
||||
foreach ($storages as $i => $storage) {
|
||||
|
||||
// Generate storage id
|
||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
||||
$crc32name = crc32(sprintf('%s.%s', $hostPageSnapStorageName, $i));
|
||||
|
||||
switch ($name) {
|
||||
switch ($hostPageSnapStorageName) {
|
||||
|
||||
case 'localhost':
|
||||
|
||||
$filename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip';
|
||||
/* @TODO implemented, not tested
|
||||
$hostPageSnapFilename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip';
|
||||
|
||||
if (file_exists($filename)) {
|
||||
if (file_exists($hostPageSnapFilename)) {
|
||||
|
||||
$snapFilesExists = true;
|
||||
|
||||
@ -95,14 +121,15 @@ switch ($argv[1]) {
|
||||
|
||||
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
|
||||
|
||||
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
|
||||
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
|
||||
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
break;
|
||||
|
||||
@ -112,9 +139,9 @@ switch ($argv[1]) {
|
||||
|
||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||
|
||||
$filename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip';
|
||||
$hostPageSnapFilename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip';
|
||||
|
||||
if ($ftp->size($filename)) {
|
||||
if ($ftp->size($hostPageSnapFilename)) {
|
||||
|
||||
$snapFilesExists = true;
|
||||
|
||||
@ -122,13 +149,20 @@ switch ($argv[1]) {
|
||||
|
||||
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
|
||||
|
||||
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
|
||||
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
||||
}
|
||||
} else {
|
||||
|
||||
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
|
||||
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
||||
}
|
||||
}
|
||||
|
||||
// Prevent snap deletion from registry on FTP connection lost
|
||||
} else {
|
||||
|
||||
CLI::danger(sprintf(_('could not connect to storage %s index %s. operation stopped to prevent the data lose.'), $hostPageSnapStorageName, $i));
|
||||
CLI::break();
|
||||
exit;
|
||||
}
|
||||
|
||||
$ftp->close();
|
||||
@ -154,7 +188,7 @@ switch ($argv[1]) {
|
||||
$db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId);
|
||||
$db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
||||
|
||||
CLI::danger(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId));
|
||||
CLI::warning(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId));
|
||||
|
||||
$db->commit();
|
||||
|
||||
@ -169,23 +203,70 @@ switch ($argv[1]) {
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup FS
|
||||
CLI::notice(_('scan storage for snap files missed in the DB...'));
|
||||
|
||||
foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
|
||||
|
||||
foreach ($storages as $i => $storage) {
|
||||
|
||||
switch ($hostPageSnapStorageName) {
|
||||
|
||||
case 'localhost':
|
||||
|
||||
// @TODO
|
||||
|
||||
break;
|
||||
|
||||
case 'ftp':
|
||||
|
||||
$ftp = new Ftp();
|
||||
|
||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||
|
||||
foreach ($ftp->nlistr($storage->directory) as $hostPageSnapFilename) {
|
||||
|
||||
if (false !== preg_match(sprintf('!/hp/([\d/]+)/([\d]+)\.zip$!ui', $storage->directory), $hostPageSnapFilename, $matches)) {
|
||||
|
||||
if (!empty($matches[1]) && // hostPageSnapId
|
||||
!empty($matches[2])) { // timeAdded
|
||||
|
||||
if (!$db->findHostPageSnapByTimeAdded($matches[1], $matches[2])) {
|
||||
|
||||
if ($ftp->delete($hostPageSnapFilename)) {
|
||||
|
||||
CLI::warning(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
||||
|
||||
} else {
|
||||
|
||||
CLI::danger(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$ftp->close();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CLI::success(_('missed snap files successfully deleted!'));
|
||||
|
||||
// Optimize DB tables
|
||||
CLI::notice(_('optimize database tables...'));
|
||||
|
||||
$db->optimize();
|
||||
|
||||
CLI::success(_('tables successfully optimized!'));
|
||||
|
||||
CLI::notice(_('scan storage locations for snap files not registered in the DB...'));
|
||||
|
||||
CLI::success(_('snap index successfully updated!'));
|
||||
|
||||
// Cleanup FS items on missed DB registry
|
||||
// @TODO
|
||||
|
||||
break;
|
||||
default:
|
||||
|
||||
CLI::danger(_('undefined action argument'));
|
||||
CLI::danger(_('undefined action argument!'));
|
||||
}
|
||||
|
||||
break;
|
||||
@ -344,8 +425,8 @@ CLI::default('available options:');
|
||||
CLI::default(' help - this message');
|
||||
CLI::default(' crawl - execute crawler step in the crontab queue');
|
||||
CLI::default(' clean - execute cleaner step in the crontab queue');
|
||||
CLI::default(' snap reindex - sync DB/FS relations');
|
||||
CLI::default(' hostPage rank reindex - generate rank indexes in hostPage table');
|
||||
CLI::default(' hostPageSnap repair - sync DB/FS relations');
|
||||
CLI::default(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field');
|
||||
CLI::default(' hostPageDom truncate - flush hostPageDom table');
|
||||
CLI::break();
|
||||
|
@ -92,15 +92,15 @@ define('MEMCACHED_PORT', 11211);
|
||||
*/
|
||||
define('SNAP_STORAGE', json_encode((object)
|
||||
[
|
||||
'localhost' => [
|
||||
'localhost' => [ // @TODO see https://github.com/YGGverse/YGGo#roadmap
|
||||
[
|
||||
'directory' => __DIR__ . '/../storage/snap/hp/',
|
||||
'quota' => [
|
||||
'mime' => false,
|
||||
'size' => 10000000024, // @TODO
|
||||
'request' => [
|
||||
'request' => [ // @TODO
|
||||
'download' => [
|
||||
'size' => 10000024, // @TODO
|
||||
'size' => 10000024,
|
||||
'seconds' => 60*60
|
||||
]
|
||||
]
|
||||
@ -120,9 +120,9 @@ define('SNAP_STORAGE', json_encode((object)
|
||||
'quota' => [
|
||||
'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico',
|
||||
'size' => 10000000024, // @TODO
|
||||
'request' => [
|
||||
'request' => [ // @TODO
|
||||
'download' => [
|
||||
'size' => 10000024, // @TODO
|
||||
'size' => 10000024,
|
||||
'seconds' => 60*60
|
||||
]
|
||||
]
|
||||
|
@ -84,11 +84,30 @@ class Ftp {
|
||||
return false;
|
||||
}
|
||||
|
||||
public function list(string $path) {
|
||||
public function nlist(string $path) {
|
||||
|
||||
return ftp_nlist($this->_connection, $path);
|
||||
}
|
||||
|
||||
public function nlistr(string $path) {
|
||||
|
||||
$result = [];
|
||||
|
||||
foreach ($this->nlist($path) as $line) {
|
||||
|
||||
if (ftp_size($this->_connection, $line) == -1) {
|
||||
|
||||
$result = array_merge($result, $this->nlistr($line));
|
||||
|
||||
} else{
|
||||
|
||||
$result[] = $line;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
public function close() {
|
||||
|
||||
return ftp_close($this->_connection);
|
||||
|
@ -528,6 +528,15 @@ class MySQL {
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function findHostPageSnapByTimeAdded(int $hostPageSnapId, int $timeAdded) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? AND `timeAdded` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$hostPageSnapId, $timeAdded]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,
|
||||
|
Loading…
x
Reference in New Issue
Block a user