mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-08-31 16:21:56 +00:00
refactor hostPageDom tables, add multiple selectors and children values support
This commit is contained in:
parent
42b34d0783
commit
eccb7ea241
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 138 KiB After Width: | Height: | Size: 168 KiB |
@ -997,55 +997,63 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
|||||||
// Process selectors configuration
|
// Process selectors configuration
|
||||||
if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', json_decode(DEFAULT_HOST_PAGES_DOM_SELECTORS))) {
|
if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', json_decode(DEFAULT_HOST_PAGES_DOM_SELECTORS))) {
|
||||||
|
|
||||||
|
$hostPageDomId = $db->addHostPageDom(
|
||||||
|
$queueHostPage->hostPageId,
|
||||||
|
time()
|
||||||
|
);
|
||||||
|
|
||||||
foreach ($hostPageDomSelectors as $selector => $settings) {
|
foreach ($hostPageDomSelectors as $selector => $settings) {
|
||||||
|
|
||||||
// Extract target selector data
|
$hostPageDomSelectorId = $db->addHostPageDomSelector(
|
||||||
foreach ($crawler->filter($selector) as $data) {
|
$hostPageDomId,
|
||||||
|
$selector
|
||||||
|
);
|
||||||
|
|
||||||
foreach ($data->childNodes as $node) {
|
// Extract selectors data
|
||||||
|
foreach ($crawler->filter($selector)->each(function($node) {
|
||||||
|
|
||||||
$value = trim($node->ownerDocument->saveHtml());
|
return $node->html();
|
||||||
|
|
||||||
// Apply selector settings
|
}) as $value) {
|
||||||
foreach ($settings as $key => $setting) {
|
|
||||||
|
|
||||||
switch ($key) {
|
foreach ($settings as $name => $setting) {
|
||||||
|
|
||||||
|
// Apply value settings
|
||||||
|
switch ($name) {
|
||||||
|
|
||||||
case 'strip_tags':
|
case 'strip_tags':
|
||||||
|
|
||||||
if (!isset($setting->enabled)) {
|
if (!isset($setting->enabled)) {
|
||||||
|
|
||||||
continue 2;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (false === $setting->enabled) {
|
if (false === $setting->enabled) {
|
||||||
|
|
||||||
continue 2;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isset($setting->allowed_tags)) {
|
if (!isset($setting->allowed_tags)) {
|
||||||
|
|
||||||
continue 2;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
$value = strip_tags($value, $setting->allowed_tags);
|
$value = strip_tags($value, $setting->allowed_tags);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Skip empty selector values save
|
$value = trim($value);
|
||||||
|
|
||||||
if (empty($value)) {
|
if (empty($value)) {
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save selector value
|
// Save selector data
|
||||||
$db->addHostPageDom(
|
$db->addHostPageDomSelectorData(
|
||||||
$queueHostPage->hostPageId,
|
$hostPageDomSelectorId,
|
||||||
$selector,
|
$value
|
||||||
$value,
|
|
||||||
time()
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -620,40 +620,37 @@ class MySQL {
|
|||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHostPageDom(int $hostPageId, string $selector, string $value, int $timeAdded) {
|
public function addHostPageDom(int $hostPageId, int $timeAdded) {
|
||||||
|
|
||||||
$this->_debug->query->insert->total++;
|
$this->_debug->query->insert->total++;
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?, `selector` = ?, `value` = ?');
|
$query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?');
|
||||||
|
|
||||||
$query->execute([$hostPageId, $timeAdded, $selector, $value]);
|
$query->execute([$hostPageId, $timeAdded]);
|
||||||
|
|
||||||
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function deleteHostPageDomBySelector(int $hostPageId, string $selector) {
|
public function addHostPageDomSelector(int $hostPageDomId, string $name) {
|
||||||
|
|
||||||
$this->_debug->query->delete->total++;
|
$this->_debug->query->insert->total++;
|
||||||
|
|
||||||
$query = $this->_db->prepare('DELETE FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ?');
|
$query = $this->_db->prepare('INSERT INTO `hostPageDomSelector` SET `hostPageDomId` = ?, `name` = ?');
|
||||||
|
|
||||||
$query->execute([$hostPageId, $selector]);
|
$query->execute([$hostPageDomId, $name]);
|
||||||
|
|
||||||
return $query->rowCount();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function findLastHostPageDomBySelector(int $hostPageId, string $selector) {
|
public function addHostPageDomSelectorData(int $hostPageDomSelectorId, string $value) {
|
||||||
|
|
||||||
$this->_debug->query->select->total++;
|
$this->_debug->query->insert->total++;
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ? ORDER BY `timeAdded` DESC LIMIT 1');
|
$query = $this->_db->prepare('INSERT INTO `hostPageDomSelectorData` SET `hostPageDomSelectorId` = ?, `value` = ?');
|
||||||
|
|
||||||
$query->execute([$hostPageId, $selector]);
|
$query->execute([$hostPageDomSelectorId, $value]);
|
||||||
|
|
||||||
return $query->fetch();
|
return $this->_db->lastInsertId();
|
||||||
}
|
|
||||||
|
|
||||||
public function truncateHostPageDom() {
|
|
||||||
|
|
||||||
$query = $this->_db->query('TRUNCATE `hostPageDom`');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cleaner tools
|
// Cleaner tools
|
||||||
@ -806,6 +803,8 @@ class MySQL {
|
|||||||
$this->_db->query('OPTIMIZE TABLE `hostPage`');
|
$this->_db->query('OPTIMIZE TABLE `hostPage`');
|
||||||
$this->_db->query('OPTIMIZE TABLE `hostPageDescription`');
|
$this->_db->query('OPTIMIZE TABLE `hostPageDescription`');
|
||||||
$this->_db->query('OPTIMIZE TABLE `hostPageDom`');
|
$this->_db->query('OPTIMIZE TABLE `hostPageDom`');
|
||||||
|
$this->_db->query('OPTIMIZE TABLE `hostPageDomSelector`');
|
||||||
|
$this->_db->query('OPTIMIZE TABLE `hostPageDomSelectorData`');
|
||||||
$this->_db->query('OPTIMIZE TABLE `hostPageSnap`');
|
$this->_db->query('OPTIMIZE TABLE `hostPageSnap`');
|
||||||
$this->_db->query('OPTIMIZE TABLE `hostPageSnapStorage`');
|
$this->_db->query('OPTIMIZE TABLE `hostPageSnapStorage`');
|
||||||
$this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`');
|
$this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user