mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-11 15:27:56 +00:00
refactor hostPageDom tables, add multiple selectors and children values support
This commit is contained in:
parent
42b34d0783
commit
eccb7ea241
Binary file not shown.
Binary file not shown.
Before (image error) Size: 138 KiB After (image error) Size: 168 KiB |
@ -997,55 +997,63 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
||||
// Process selectors configuration
|
||||
if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', json_decode(DEFAULT_HOST_PAGES_DOM_SELECTORS))) {
|
||||
|
||||
$hostPageDomId = $db->addHostPageDom(
|
||||
$queueHostPage->hostPageId,
|
||||
time()
|
||||
);
|
||||
|
||||
foreach ($hostPageDomSelectors as $selector => $settings) {
|
||||
|
||||
// Extract target selector data
|
||||
foreach ($crawler->filter($selector) as $data) {
|
||||
$hostPageDomSelectorId = $db->addHostPageDomSelector(
|
||||
$hostPageDomId,
|
||||
$selector
|
||||
);
|
||||
|
||||
foreach ($data->childNodes as $node) {
|
||||
// Extract selectors data
|
||||
foreach ($crawler->filter($selector)->each(function($node) {
|
||||
|
||||
$value = trim($node->ownerDocument->saveHtml());
|
||||
return $node->html();
|
||||
|
||||
// Apply selector settings
|
||||
foreach ($settings as $key => $setting) {
|
||||
}) as $value) {
|
||||
|
||||
switch ($key) {
|
||||
foreach ($settings as $name => $setting) {
|
||||
|
||||
case 'strip_tags':
|
||||
// Apply value settings
|
||||
switch ($name) {
|
||||
|
||||
if (!isset($setting->enabled)) {
|
||||
case 'strip_tags':
|
||||
|
||||
continue 2;
|
||||
}
|
||||
if (!isset($setting->enabled)) {
|
||||
|
||||
if (false === $setting->enabled) {
|
||||
break;
|
||||
}
|
||||
|
||||
continue 2;
|
||||
}
|
||||
if (false === $setting->enabled) {
|
||||
|
||||
if (!isset($setting->allowed_tags)) {
|
||||
break;
|
||||
}
|
||||
|
||||
continue 2;
|
||||
}
|
||||
if (!isset($setting->allowed_tags)) {
|
||||
|
||||
$value = strip_tags($value, $setting->allowed_tags);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
$value = strip_tags($value, $setting->allowed_tags);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip empty selector values save
|
||||
$value = trim($value);
|
||||
|
||||
if (empty($value)) {
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Save selector value
|
||||
$db->addHostPageDom(
|
||||
$queueHostPage->hostPageId,
|
||||
$selector,
|
||||
$value,
|
||||
time()
|
||||
// Save selector data
|
||||
$db->addHostPageDomSelectorData(
|
||||
$hostPageDomSelectorId,
|
||||
$value
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -620,40 +620,37 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function addHostPageDom(int $hostPageId, string $selector, string $value, int $timeAdded) {
|
||||
public function addHostPageDom(int $hostPageId, int $timeAdded) {
|
||||
|
||||
$this->_debug->query->insert->total++;
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?, `selector` = ?, `value` = ?');
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?');
|
||||
|
||||
$query->execute([$hostPageId, $timeAdded, $selector, $value]);
|
||||
$query->execute([$hostPageId, $timeAdded]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function deleteHostPageDomBySelector(int $hostPageId, string $selector) {
|
||||
public function addHostPageDomSelector(int $hostPageDomId, string $name) {
|
||||
|
||||
$this->_debug->query->delete->total++;
|
||||
$this->_debug->query->insert->total++;
|
||||
|
||||
$query = $this->_db->prepare('DELETE FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ?');
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageDomSelector` SET `hostPageDomId` = ?, `name` = ?');
|
||||
|
||||
$query->execute([$hostPageId, $selector]);
|
||||
$query->execute([$hostPageDomId, $name]);
|
||||
|
||||
return $query->rowCount();
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function findLastHostPageDomBySelector(int $hostPageId, string $selector) {
|
||||
public function addHostPageDomSelectorData(int $hostPageDomSelectorId, string $value) {
|
||||
|
||||
$this->_debug->query->select->total++;
|
||||
$this->_debug->query->insert->total++;
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ? ORDER BY `timeAdded` DESC LIMIT 1');
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageDomSelectorData` SET `hostPageDomSelectorId` = ?, `value` = ?');
|
||||
|
||||
$query->execute([$hostPageId, $selector]);
|
||||
$query->execute([$hostPageDomSelectorId, $value]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function truncateHostPageDom() {
|
||||
|
||||
$query = $this->_db->query('TRUNCATE `hostPageDom`');
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
// Cleaner tools
|
||||
@ -806,6 +803,8 @@ class MySQL {
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPage`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageDescription`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageDom`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageDomSelector`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageDomSelectorData`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageSnap`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageSnapStorage`');
|
||||
$this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`');
|
||||
|
Loading…
Reference in New Issue
Block a user