Browse Source

add FS cleaning features, lock execution on active crontab tasks, disable hostPageSnap/localhost untested constructions

main
ghost 1 year ago
parent
commit
000b9ad8dd
  1. 12
      README.md
  2. 149
      cli/yggo.php
  3. 10
      config/app.php.txt
  4. 21
      library/ftp.php
  5. 9
      library/mysql.php

12
README.md

@ -163,7 +163,7 @@ GET m=SphinxQL @@ -163,7 +163,7 @@ GET m=SphinxQL
* [x] CSS only, JS-less interface
* [x] Unique host ident icons
* [x] Content genre tabs (#1)
* [x] Content MIME tabs (#1)
* [x] Page index explorer
+ [x] Meta
+ [x] Snaps history
@ -225,8 +225,14 @@ GET m=SphinxQL @@ -225,8 +225,14 @@ GET m=SphinxQL
* [x] help
* [x] crawl
* [x] clean
* [x] snap
+ [x] reindex
* [x] hostPageSnap
+ [x] repair
+ [x] _sync DB-FS relations_
+ [x] _FTP_
+ [ ] _localhost (not tested)_
+ [x] _delete FS missed in the DB_
+ [x] _FTP_
+ [ ] _localhost_
* [x] hostPageDom
+ [x] generate
+ [x] truncate

149
cli/yggo.php

@ -1,28 +1,50 @@ @@ -1,28 +1,50 @@
<?php
// Load system dependencies
require_once(__DIR__ . '/../config/app.php');
require_once(__DIR__ . '/../library/cli.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/ftp.php');
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
// CLI only to prevent https server connection timeout
if (php_sapi_name() != 'cli') {
CLI::danger(_('supported command line interface only'));
CLI::break();
exit;
}
// Lock multi-thread execution
$semaphore = sem_get(crc32('cli.yggo'), 1);
$semaphore = sem_get(crc32('crontab.crawler'), 1);
if (false === sem_acquire($semaphore, true)) {
CLI::danger(_('Process locked by another thread.'));
CLI::danger(_('process locked by another thread.'));
CLI::break();
exit;
}
// Load system dependencies
require_once(__DIR__ . '/../config/app.php');
require_once(__DIR__ . '/../library/cli.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/ftp.php');
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
// Stop CLI execution on cleaner process running
$semaphore = sem_get(crc32('crontab.cleaner'), 1);
if (false === sem_acquire($semaphore, true)) {
CLI::danger(_('stop crontab.cleaner is running in another thread.'));
CLI::break();
exit;
}
// Stop CLI execution on crawler process running
$semaphore = sem_get(crc32('crontab.crawler'), 1);
if (false === sem_acquire($semaphore, true)) {
CLI::danger(_('stop crontab.crawler is running in another thread.'));
CLI::break();
exit;
}
// Connect database
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
@ -49,17 +71,20 @@ switch ($argv[1]) { @@ -49,17 +71,20 @@ switch ($argv[1]) {
CLI::notice(_('cleaner queue step completed.'));
break;
case 'snap':
case 'hostPageSnap':
if (empty($argv[2])) {
CLI::danger(_('snap method requires action argument'));
CLI::danger(_('hostPageSnap method requires action argument'));
CLI::break();
exit;
}
switch ($argv[2]) {
case 'reindex':
case 'repair':
// Scan for new files/storages
// Normalize & cleanup DB
CLI::notice(_('scan database registry for missed snap files...'));
foreach ($db->getHosts() as $host) {
@ -74,20 +99,21 @@ switch ($argv[1]) { @@ -74,20 +99,21 @@ switch ($argv[1]) {
$snapPath = chunk_split($hostPage->hostPageId, 1, '/');
// Check file exists
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
foreach ($storages as $i => $storage) {
// Generate storage id
$crc32name = crc32(sprintf('%s.%s', $name, $i));
$crc32name = crc32(sprintf('%s.%s', $hostPageSnapStorageName, $i));
switch ($name) {
switch ($hostPageSnapStorageName) {
case 'localhost':
$filename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip';
/* @TODO implemented, not tested
$hostPageSnapFilename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip';
if (file_exists($filename)) {
if (file_exists($hostPageSnapFilename)) {
$snapFilesExists = true;
@ -95,14 +121,15 @@ switch ($argv[1]) { @@ -95,14 +121,15 @@ switch ($argv[1]) {
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
}
} else {
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
}
}
*/
break;
@ -112,9 +139,9 @@ switch ($argv[1]) { @@ -112,9 +139,9 @@ switch ($argv[1]) {
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
$filename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip';
$hostPageSnapFilename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip';
if ($ftp->size($filename)) {
if ($ftp->size($hostPageSnapFilename)) {
$snapFilesExists = true;
@ -122,13 +149,20 @@ switch ($argv[1]) { @@ -122,13 +149,20 @@ switch ($argv[1]) {
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
CLI::warning(sprintf(_('register snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
}
} else {
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $filename, $name, $i));
CLI::success(sprintf(_('skip related snap #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
}
}
// Prevent snap deletion from registry on FTP connection lost
} else {
CLI::danger(sprintf(_('could not connect to storage %s index %s. operation stopped to prevent the data lose.'), $hostPageSnapStorageName, $i));
CLI::break();
exit;
}
$ftp->close();
@ -154,7 +188,7 @@ switch ($argv[1]) { @@ -154,7 +188,7 @@ switch ($argv[1]) {
$db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId);
$db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
CLI::danger(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId));
CLI::warning(sprintf(_('delete snap index: #%s file not found in the any of storage;'), $hostPageSnap->hostPageSnapId));
$db->commit();
@ -169,23 +203,70 @@ switch ($argv[1]) { @@ -169,23 +203,70 @@ switch ($argv[1]) {
}
}
CLI::notice(_('optimize database tables...'));
// Cleanup FS
CLI::notice(_('scan storage for snap files missed in the DB...'));
$db->optimize();
foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
CLI::success(_('tables successfully optimized!'));
foreach ($storages as $i => $storage) {
CLI::notice(_('scan storage locations for snap files not registered in the DB...'));
switch ($hostPageSnapStorageName) {
CLI::success(_('snap index successfully updated!'));
case 'localhost':
// Cleanup FS items on missed DB registry
// @TODO
// @TODO
break;
case 'ftp':
$ftp = new Ftp();
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
foreach ($ftp->nlistr($storage->directory) as $hostPageSnapFilename) {
if (false !== preg_match(sprintf('!/hp/([\d/]+)/([\d]+)\.zip$!ui', $storage->directory), $hostPageSnapFilename, $matches)) {
if (!empty($matches[1]) && // hostPageSnapId
!empty($matches[2])) { // timeAdded
if (!$db->findHostPageSnapByTimeAdded($matches[1], $matches[2])) {
if ($ftp->delete($hostPageSnapFilename)) {
CLI::warning(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
} else {
CLI::danger(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
}
}
}
}
}
}
$ftp->close();
break;
}
}
}
CLI::success(_('missed snap files successfully deleted!'));
// Optimize DB tables
CLI::notice(_('optimize database tables...'));
$db->optimize();
CLI::success(_('tables successfully optimized!'));
break;
default:
CLI::danger(_('undefined action argument'));
CLI::danger(_('undefined action argument!'));
}
break;
@ -344,8 +425,8 @@ CLI::default('available options:'); @@ -344,8 +425,8 @@ CLI::default('available options:');
CLI::default(' help - this message');
CLI::default(' crawl - execute crawler step in the crontab queue');
CLI::default(' clean - execute cleaner step in the crontab queue');
CLI::default(' snap reindex - sync DB/FS relations');
CLI::default(' hostPage rank reindex - generate rank indexes in hostPage table');
CLI::default(' hostPageSnap repair - sync DB/FS relations');
CLI::default(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field');
CLI::default(' hostPageDom truncate - flush hostPageDom table');
CLI::break();

10
config/app.php.txt

@ -92,15 +92,15 @@ define('MEMCACHED_PORT', 11211); @@ -92,15 +92,15 @@ define('MEMCACHED_PORT', 11211);
*/
define('SNAP_STORAGE', json_encode((object)
[
'localhost' => [
'localhost' => [ // @TODO see https://github.com/YGGverse/YGGo#roadmap
[
'directory' => __DIR__ . '/../storage/snap/hp/',
'quota' => [
'mime' => false,
'size' => 10000000024, // @TODO
'request' => [
'request' => [ // @TODO
'download' => [
'size' => 10000024, // @TODO
'size' => 10000024,
'seconds' => 60*60
]
]
@ -120,9 +120,9 @@ define('SNAP_STORAGE', json_encode((object) @@ -120,9 +120,9 @@ define('SNAP_STORAGE', json_encode((object)
'quota' => [
'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico',
'size' => 10000000024, // @TODO
'request' => [
'request' => [ // @TODO
'download' => [
'size' => 10000024, // @TODO
'size' => 10000024,
'seconds' => 60*60
]
]

21
library/ftp.php

@ -84,11 +84,30 @@ class Ftp { @@ -84,11 +84,30 @@ class Ftp {
return false;
}
public function list(string $path) {
public function nlist(string $path) {
return ftp_nlist($this->_connection, $path);
}
public function nlistr(string $path) {
$result = [];
foreach ($this->nlist($path) as $line) {
if (ftp_size($this->_connection, $line) == -1) {
$result = array_merge($result, $this->nlistr($line));
} else{
$result[] = $line;
}
}
return $result;
}
public function close() {
return ftp_close($this->_connection);

9
library/mysql.php

@ -528,6 +528,15 @@ class MySQL { @@ -528,6 +528,15 @@ class MySQL {
return $query->fetch();
}
public function findHostPageSnapByTimeAdded(int $hostPageSnapId, int $timeAdded) {
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? AND `timeAdded` = ? LIMIT 1');
$query->execute([$hostPageSnapId, $timeAdded]);
return $query->fetch();
}
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,

Loading…
Cancel
Save