|
|
@ -544,195 +544,6 @@ if (!empty($argv[1])) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
break; |
|
|
|
break; |
|
|
|
case 'hostPageDom': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (empty($argv[2])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('action required')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch ($argv[2]) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case 'parse': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Validate hostId |
|
|
|
|
|
|
|
if (empty($argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('hostId required')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!$db->getHost($argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('hostId not found')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Validate selector source |
|
|
|
|
|
|
|
if (empty($argv[4])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('CSS selector source required')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Validate selector target |
|
|
|
|
|
|
|
if (empty($argv[5])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('CSS selector target required')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Init variables |
|
|
|
|
|
|
|
$hostPagesProcessedTotal = 0; |
|
|
|
|
|
|
|
$hostPagesSkippedTotal = 0; |
|
|
|
|
|
|
|
$hostPageDomAddedTotal = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->beginTransaction(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Begin selectors values processing by hostId |
|
|
|
|
|
|
|
foreach ($db->getHostPages($argv[3]) as $hostPage) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPagesProcessedTotal++; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!$hostPageDomSelectorSource = $db->findLastHostPageDomBySelector($hostPage->hostPageId, $argv[4])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::warning(sprintf(_('[selector source "%s"] not found for hostPageId "%s", skipped'), $argv[4], $hostPage->hostPageId)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPagesSkippedTotal++; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
continue; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (empty($hostPageDomSelectorSource->value)) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::warning(sprintf(_('[selector source "%s"] value is empty for hostPageId "%s", skipped'), $argv[4], $hostPage->hostPageId)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPagesSkippedTotal++; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
continue; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Init crawler |
|
|
|
|
|
|
|
$crawler = new Symfony\Component\DomCrawler\Crawler(); |
|
|
|
|
|
|
|
$crawler->addHtmlContent($hostPageDomSelectorSource->value); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Extract target selector data |
|
|
|
|
|
|
|
foreach ($crawler->filter($argv[5]) as $selectorTarget) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
foreach ($selectorTarget->childNodes as $node) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$value = trim($node->ownerDocument->saveHtml()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (empty($value)) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::warning(sprintf(_('[selector target "%s"] value is empty for hostPageId "%s", skipped'), $argv[5], $hostPage->hostPageId)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPagesSkippedTotal++; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
continue; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Save selector value |
|
|
|
|
|
|
|
$db->addHostPageDom( |
|
|
|
|
|
|
|
$hostPage->hostPageId, |
|
|
|
|
|
|
|
$argv[5], |
|
|
|
|
|
|
|
$value, |
|
|
|
|
|
|
|
time() |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPageDomAddedTotal++; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->commit(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::success(sprintf(_('Host pages processed: %s'), $hostPagesProcessedTotal)); |
|
|
|
|
|
|
|
CLI::success(sprintf(_('Host pages skipped: %s'), $hostPagesSkippedTotal)); |
|
|
|
|
|
|
|
CLI::success(sprintf(_('Host page DOM elements added: %s'), $hostPageDomAddedTotal)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} catch(Exception $e) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->rollBack(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var_dump($e); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 'delete': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Validate hostId |
|
|
|
|
|
|
|
if (empty($argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('hostId required')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!$db->getHost($argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('hostId not found')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Validate selector source |
|
|
|
|
|
|
|
if (empty($argv[4])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('CSS selector required')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Init variables |
|
|
|
|
|
|
|
$hostPagesProcessedTotal = 0; |
|
|
|
|
|
|
|
$hostPageDomDeletedTotal = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->beginTransaction(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Begin selectors values processing by hostId |
|
|
|
|
|
|
|
foreach ($db->getHostPages($argv[3]) as $hostPage) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPagesProcessedTotal++; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPageDomDeletedTotal += $db->deleteHostPageDomBySelector($hostPage->hostPageId, $argv[4]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->commit(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::success(sprintf(_('Host pages processed: %s'), $hostPagesProcessedTotal)); |
|
|
|
|
|
|
|
CLI::success(sprintf(_('Host page DOM elements deleted: %s'), $hostPageDomDeletedTotal)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} catch(Exception $e) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->rollBack(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var_dump($e); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case 'truncate': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->truncateHostPageDom(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::success(_('hostPageDom table successfully truncated')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger(_('unknown action')); |
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -769,11 +580,6 @@ CLI::default(' repair '); |
|
|
|
CLI::default(' db - scan database registry for new or deprecated snap files'); |
|
|
|
CLI::default(' db - scan database registry for new or deprecated snap files'); |
|
|
|
CLI::default(' fs - check all storages for snap files not registered in hostPageSnapStorage, cleanup filesystem'); |
|
|
|
CLI::default(' fs - check all storages for snap files not registered in hostPageSnapStorage, cleanup filesystem'); |
|
|
|
CLI::default(' reindex - search for host pages without snap records, add found pages to the crawl queue'); |
|
|
|
CLI::default(' reindex - search for host pages without snap records, add found pages to the crawl queue'); |
|
|
|
CLI::break(); |
|
|
|
|
|
|
|
CLI::default(' hostPageDom '); |
|
|
|
|
|
|
|
CLI::default(' parse [hostId] [selector source] [selector target] - parse new hostPageDom.selector target based on hostPageDom.selector source'); |
|
|
|
|
|
|
|
CLI::default(' delete [hostId] [selector] - delete DOM records from hostPageDom table by hostId and selector name'); |
|
|
|
|
|
|
|
CLI::default(' truncate - flush hostPageDom table'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::break(); |
|
|
|
CLI::break(); |
|
|
|
|
|
|
|
|
|
|
|