diff --git a/database/yggo.mwb b/database/yggo.mwb index 0c177e9..0358a62 100644 Binary files a/database/yggo.mwb and b/database/yggo.mwb differ diff --git a/media/db-prototype.png b/media/db-prototype.png index 92bf9d1..3210b03 100644 Binary files a/media/db-prototype.png and b/media/db-prototype.png differ diff --git a/src/crontab/crawler.php b/src/crontab/crawler.php index 395fbf5..d7ec768 100644 --- a/src/crontab/crawler.php +++ b/src/crontab/crawler.php @@ -997,55 +997,63 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_ // Process selectors configuration if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', json_decode(DEFAULT_HOST_PAGES_DOM_SELECTORS))) { + $hostPageDomId = $db->addHostPageDom( + $queueHostPage->hostPageId, + time() + ); + foreach ($hostPageDomSelectors as $selector => $settings) { - // Extract target selector data - foreach ($crawler->filter($selector) as $data) { + $hostPageDomSelectorId = $db->addHostPageDomSelector( + $hostPageDomId, + $selector + ); - foreach ($data->childNodes as $node) { + // Extract selectors data + foreach ($crawler->filter($selector)->each(function($node) { - $value = trim($node->ownerDocument->saveHtml()); + return $node->html(); - // Apply selector settings - foreach ($settings as $key => $setting) { + }) as $value) { - switch ($key) { + foreach ($settings as $name => $setting) { - case 'strip_tags': + // Apply value settings + switch ($name) { - if (!isset($setting->enabled)) { + case 'strip_tags': - continue 2; - } + if (!isset($setting->enabled)) { - if (false === $setting->enabled) { + break; + } - continue 2; - } + if (false === $setting->enabled) { - if (!isset($setting->allowed_tags)) { + break; + } - continue 2; - } + if (!isset($setting->allowed_tags)) { - $value = strip_tags($value, $setting->allowed_tags); + break; + } - break; - } + $value = strip_tags($value, $setting->allowed_tags); + + break; } - // Skip empty selector values save + $value = trim($value); + if (empty($value)) { continue; } - // Save selector value - $db->addHostPageDom( - $queueHostPage->hostPageId, - $selector, - $value, - time() + // Save selector data + $db->addHostPageDomSelectorData( + $hostPageDomSelectorId, + $value ); } } diff --git a/src/library/mysql.php b/src/library/mysql.php index e2d26ea..998b995 100644 --- a/src/library/mysql.php +++ b/src/library/mysql.php @@ -620,40 +620,37 @@ class MySQL { return $query->rowCount(); } - public function addHostPageDom(int $hostPageId, string $selector, string $value, int $timeAdded) { + public function addHostPageDom(int $hostPageId, int $timeAdded) { $this->_debug->query->insert->total++; - $query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?, `selector` = ?, `value` = ?'); + $query = $this->_db->prepare('INSERT INTO `hostPageDom` SET `hostPageId` = ?, `timeAdded` = ?'); - $query->execute([$hostPageId, $timeAdded, $selector, $value]); + $query->execute([$hostPageId, $timeAdded]); + + return $this->_db->lastInsertId(); } - public function deleteHostPageDomBySelector(int $hostPageId, string $selector) { + public function addHostPageDomSelector(int $hostPageDomId, string $name) { - $this->_debug->query->delete->total++; + $this->_debug->query->insert->total++; - $query = $this->_db->prepare('DELETE FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ?'); + $query = $this->_db->prepare('INSERT INTO `hostPageDomSelector` SET `hostPageDomId` = ?, `name` = ?'); - $query->execute([$hostPageId, $selector]); + $query->execute([$hostPageDomId, $name]); - return $query->rowCount(); + return $this->_db->lastInsertId(); } - public function findLastHostPageDomBySelector(int $hostPageId, string $selector) { + public function addHostPageDomSelectorData(int $hostPageDomSelectorId, string $value) { - $this->_debug->query->select->total++; + $this->_debug->query->insert->total++; - $query = $this->_db->prepare('SELECT * FROM `hostPageDom` WHERE `hostPageId` = ? AND `selector` = ? ORDER BY `timeAdded` DESC LIMIT 1'); + $query = $this->_db->prepare('INSERT INTO `hostPageDomSelectorData` SET `hostPageDomSelectorId` = ?, `value` = ?'); - $query->execute([$hostPageId, $selector]); + $query->execute([$hostPageDomSelectorId, $value]); - return $query->fetch(); - } - - public function truncateHostPageDom() { - - $query = $this->_db->query('TRUNCATE `hostPageDom`'); + return $this->_db->lastInsertId(); } // Cleaner tools @@ -806,6 +803,8 @@ class MySQL { $this->_db->query('OPTIMIZE TABLE `hostPage`'); $this->_db->query('OPTIMIZE TABLE `hostPageDescription`'); $this->_db->query('OPTIMIZE TABLE `hostPageDom`'); + $this->_db->query('OPTIMIZE TABLE `hostPageDomSelector`'); + $this->_db->query('OPTIMIZE TABLE `hostPageDomSelectorData`'); $this->_db->query('OPTIMIZE TABLE `hostPageSnap`'); $this->_db->query('OPTIMIZE TABLE `hostPageSnapStorage`'); $this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`');