1
0
mirror of https://github.com/YGGverse/YGGo.git synced 2025-01-11 15:27:56 +00:00

refactor hostPageDom tables, add multiple selectors and children values support

This commit is contained in:
ghost 2023-08-17 18:32:48 +03:00
parent 42b34d0783
commit eccb7ea241
4 changed files with 52 additions and 45 deletions
database
media
src
crontab
library

Binary file not shown.

Binary file not shown.

Before

(image error) Size: 138 KiB

After

(image error) Size: 168 KiB

View File

@ -997,55 +997,63 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
// Process selectors configuration
if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', json_decode(DEFAULT_HOST_PAGES_DOM_SELECTORS))) {
$hostPageDomId = $db->addHostPageDom(
$queueHostPage->hostPageId,
time()
);
foreach ($hostPageDomSelectors as $selector => $settings) {
// Extract target selector data
foreach ($crawler->filter($selector) as $data) {
$hostPageDomSelectorId = $db->addHostPageDomSelector(
$hostPageDomId,
$selector
);
foreach ($data->childNodes as $node) {
// Extract selectors data
foreach ($crawler->filter($selector)->each(function($node) {
$value = trim($node->ownerDocument->saveHtml());
return $node->html();
// Apply selector settings
foreach ($settings as $key => $setting) {
}) as $value) {
switch ($key) {
foreach ($settings as $name => $setting) {
case 'strip_tags':
// Apply value settings
switch ($name) {
if (!isset($setting->enabled)) {
case 'strip_tags':
continue 2;
}
if (!isset($setting->enabled)) {
if (false === $setting->enabled) {
break;
}
continue 2;
}
if (false === $setting->enabled) {
if (!isset($setting->allowed_tags)) {
break;
}
continue 2;
}
if (!isset($setting->allowed_tags)) {
$value = strip_tags($value, $setting->allowed_tags);
break;
}
break;
}
$value = strip_tags($value, $setting->allowed_tags);
break;
}
// Skip empty selector values save
$value = trim($value);
if (empty($value)) {
continue;
}
// Save selector value
$db->addHostPageDom(
$queueHostPage->hostPageId,
$selector,
$value,
time()
// Save selector data
$db->addHostPageDomSelectorData(
$hostPageDomSelectorId,
$value
);
}
}

View File

@ -620,40 +620,37 @@ class MySQL {
return $query->rowCount();
}
public function addHostPageDom(int $hostPageId, string $selector, string $value, int $timeAdded) {
public function addHostPageDom(int $hostPageId, int $timeAdded) {
$this->_debug->query->insert->total++;
$query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?, `selector` = ?, `value` = ?');
$query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?');
$query->execute([$hostPageId, $timeAdded, $selector, $value]);
$query->execute([$hostPageId, $timeAdded]);
return $this->_db->lastInsertId();
}
public function deleteHostPageDomBySelector(int $hostPageId, string $selector) {
public function addHostPageDomSelector(int $hostPageDomId, string $name) {
$this->_debug->query->delete->total++;
$this->_debug->query->insert->total++;
$query = $this->_db->prepare('DELETE FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ?');
$query = $this->_db->prepare('INSERT INTO `hostPageDomSelector` SET `hostPageDomId` = ?, `name` = ?');
$query->execute([$hostPageId, $selector]);
$query->execute([$hostPageDomId, $name]);
return $query->rowCount();
return $this->_db->lastInsertId();
}
public function findLastHostPageDomBySelector(int $hostPageId, string $selector) {
public function addHostPageDomSelectorData(int $hostPageDomSelectorId, string $value) {
$this->_debug->query->select->total++;
$this->_debug->query->insert->total++;
$query = $this->_db->prepare('SELECT * FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ? ORDER BY `timeAdded` DESC LIMIT 1');
$query = $this->_db->prepare('INSERT INTO `hostPageDomSelectorData` SET `hostPageDomSelectorId` = ?, `value` = ?');
$query->execute([$hostPageId, $selector]);
$query->execute([$hostPageDomSelectorId, $value]);
return $query->fetch();
}
public function truncateHostPageDom() {
$query = $this->_db->query('TRUNCATE `hostPageDom`');
return $this->_db->lastInsertId();
}
// Cleaner tools
@ -806,6 +803,8 @@ class MySQL {
$this->_db->query('OPTIMIZE TABLE `hostPage`');
$this->_db->query('OPTIMIZE TABLE `hostPageDescription`');
$this->_db->query('OPTIMIZE TABLE `hostPageDom`');
$this->_db->query('OPTIMIZE TABLE `hostPageDomSelector`');
$this->_db->query('OPTIMIZE TABLE `hostPageDomSelectorData`');
$this->_db->query('OPTIMIZE TABLE `hostPageSnap`');
$this->_db->query('OPTIMIZE TABLE `hostPageSnapStorage`');
$this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`');