diff --git a/crontab/crawler.php b/crontab/crawler.php index 0258be8..4aa13ba 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -71,9 +71,10 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET } // Get optional page meta data - $metaDescription = ''; - $metaKeywords = ''; - $metaRobots = ''; + $metaDescription = null; + $metaKeywords = null; + $metaRobots = null; + $metaYggo = null; foreach (@$dom->getElementsByTagName('meta') as $meta) { @@ -88,6 +89,10 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET if (@$meta->getAttribute('name') == 'robots') { $metaRobots = @$meta->getAttribute('content'); } + + if (@$meta->getAttribute('name') == 'yggo') { + $metaYggo = @$meta->getAttribute('content'); + } } // Update queued page data @@ -95,6 +100,7 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET Filter::pageTitle($title->item(0)->nodeValue), Filter::pageDescription($metaDescription), Filter::pageKeywords($metaKeywords), + Filter::url($metaYggo), CRAWL_HOST_DEFAULT_META_ONLY ? null : Filter::pageData($content)); // Append page with meta robots:noindex value to the robotsPostfix disallow list diff --git a/database/yggo.mwb b/database/yggo.mwb index b63a3fb..ca5f115 100644 Binary files a/database/yggo.mwb and b/database/yggo.mwb differ diff --git a/library/mysql.php b/library/mysql.php index 9383d81..08b5daf 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -193,14 +193,16 @@ class MySQL { mixed $metaTitle, mixed $metaDescription, mixed $metaKeywords, + mixed $metaYggo, mixed $data) { $query = $this->_db->prepare('UPDATE `hostPage` SET `metaTitle` = ?, `metaDescription` = ?, `metaKeywords` = ?, + `metaYggo` = ?, `data` = ? WHERE `hostPageId` = ? LIMIT 1'); - $query->execute([$metaTitle, $metaDescription, $metaKeywords, $data, $hostPageId]); + $query->execute([$metaTitle, $metaDescription, $metaKeywords, $metaYggo, $data, $hostPageId]); return $query->rowCount(); }