Browse Source

delete deprecated constructions

main
ghost 1 year ago
parent
commit
468ef50ee3
  1. 4
      README.md
  2. 194
      src/cli/yggo.php

4
README.md

@ -251,10 +251,6 @@ _*CLI interface still under construction, use it for your own risk!_
+ [x] fs + [x] fs
+ [ ] reindex + [ ] reindex
+ [ ] truncate + [ ] truncate
* [x] hostPageDom
+ [x] generate
+ [x] delete
+ [x] truncate
##### Other ##### Other

194
src/cli/yggo.php

@ -544,195 +544,6 @@ if (!empty($argv[1])) {
} }
break; break;
case 'hostPageDom':
if (empty($argv[2])) {
CLI::danger(_('action required'));
exit;
}
switch ($argv[2]) {
case 'parse':
// Validate hostId
if (empty($argv[3])) {
CLI::danger(_('hostId required'));
exit;
}
if (!$db->getHost($argv[3])) {
CLI::danger(_('hostId not found'));
exit;
}
// Validate selector source
if (empty($argv[4])) {
CLI::danger(_('CSS selector source required'));
exit;
}
// Validate selector target
if (empty($argv[5])) {
CLI::danger(_('CSS selector target required'));
exit;
}
// Init variables
$hostPagesProcessedTotal = 0;
$hostPagesSkippedTotal = 0;
$hostPageDomAddedTotal = 0;
try {
$db->beginTransaction();
// Begin selectors values processing by hostId
foreach ($db->getHostPages($argv[3]) as $hostPage) {
$hostPagesProcessedTotal++;
if (!$hostPageDomSelectorSource = $db->findLastHostPageDomBySelector($hostPage->hostPageId, $argv[4])) {
CLI::warning(sprintf(_('[selector source "%s"] not found for hostPageId "%s", skipped'), $argv[4], $hostPage->hostPageId));
$hostPagesSkippedTotal++;
continue;
}
if (empty($hostPageDomSelectorSource->value)) {
CLI::warning(sprintf(_('[selector source "%s"] value is empty for hostPageId "%s", skipped'), $argv[4], $hostPage->hostPageId));
$hostPagesSkippedTotal++;
continue;
}
// Init crawler
$crawler = new Symfony\Component\DomCrawler\Crawler();
$crawler->addHtmlContent($hostPageDomSelectorSource->value);
// Extract target selector data
foreach ($crawler->filter($argv[5]) as $selectorTarget) {
foreach ($selectorTarget->childNodes as $node) {
$value = trim($node->ownerDocument->saveHtml());
if (empty($value)) {
CLI::warning(sprintf(_('[selector target "%s"] value is empty for hostPageId "%s", skipped'), $argv[5], $hostPage->hostPageId));
$hostPagesSkippedTotal++;
continue;
}
// Save selector value
$db->addHostPageDom(
$hostPage->hostPageId,
$argv[5],
$value,
time()
);
$hostPageDomAddedTotal++;
}
}
}
$db->commit();
CLI::success(sprintf(_('Host pages processed: %s'), $hostPagesProcessedTotal));
CLI::success(sprintf(_('Host pages skipped: %s'), $hostPagesSkippedTotal));
CLI::success(sprintf(_('Host page DOM elements added: %s'), $hostPageDomAddedTotal));
} catch(Exception $e) {
$db->rollBack();
var_dump($e);
exit;
}
break;
case 'delete':
// Validate hostId
if (empty($argv[3])) {
CLI::danger(_('hostId required'));
exit;
}
if (!$db->getHost($argv[3])) {
CLI::danger(_('hostId not found'));
exit;
}
// Validate selector source
if (empty($argv[4])) {
CLI::danger(_('CSS selector required'));
exit;
}
// Init variables
$hostPagesProcessedTotal = 0;
$hostPageDomDeletedTotal = 0;
try {
$db->beginTransaction();
// Begin selectors values processing by hostId
foreach ($db->getHostPages($argv[3]) as $hostPage) {
$hostPagesProcessedTotal++;
$hostPageDomDeletedTotal += $db->deleteHostPageDomBySelector($hostPage->hostPageId, $argv[4]);
}
$db->commit();
CLI::success(sprintf(_('Host pages processed: %s'), $hostPagesProcessedTotal));
CLI::success(sprintf(_('Host page DOM elements deleted: %s'), $hostPageDomDeletedTotal));
exit;
} catch(Exception $e) {
$db->rollBack();
var_dump($e);
exit;
}
break;
case 'truncate':
$db->truncateHostPageDom();
CLI::success(_('hostPageDom table successfully truncated'));
exit;
break;
default:
CLI::danger(_('unknown action'));
exit;
}
break;
} }
} }
@ -769,11 +580,6 @@ CLI::default(' repair ');
CLI::default(' db - scan database registry for new or deprecated snap files'); CLI::default(' db - scan database registry for new or deprecated snap files');
CLI::default(' fs - check all storages for snap files not registered in hostPageSnapStorage, cleanup filesystem'); CLI::default(' fs - check all storages for snap files not registered in hostPageSnapStorage, cleanup filesystem');
CLI::default(' reindex - search for host pages without snap records, add found pages to the crawl queue'); CLI::default(' reindex - search for host pages without snap records, add found pages to the crawl queue');
CLI::break();
CLI::default(' hostPageDom ');
CLI::default(' parse [hostId] [selector source] [selector target] - parse new hostPageDom.selector target based on hostPageDom.selector source');
CLI::default(' delete [hostId] [selector] - delete DOM records from hostPageDom table by hostId and selector name');
CLI::default(' truncate - flush hostPageDom table');
CLI::break(); CLI::break();

Loading…
Cancel
Save