mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
create manifests registry
This commit is contained in:
parent
219a56d6cd
commit
6d8f4f4882
@ -165,6 +165,24 @@ define('CRAWL_ROBOTS_DEFAULT_RULES', null); // string|null
|
||||
*/
|
||||
define('CRAWL_ROBOTS_POSTFIX_RULES', null); // string|null
|
||||
|
||||
/*
|
||||
* Look for third-party manifests to collect distributed index
|
||||
*
|
||||
* API address provided in yggo meta tag
|
||||
* will be stored in the `manifest` DB table
|
||||
*
|
||||
*/
|
||||
define('CRAWL_MANIFEST', true);
|
||||
|
||||
/*
|
||||
* Set default auto-crawl status for new manifest added
|
||||
*
|
||||
* true - crawler autostart manifest indexer
|
||||
* false - requires manual validation by the moderator in the DB `manifest`.`status` field
|
||||
*
|
||||
*/
|
||||
define('CRAWL_MANIFEST_DEFAULT_STATUS', true);
|
||||
|
||||
// Cleaner settings
|
||||
|
||||
/*
|
||||
|
@ -100,9 +100,21 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
|
||||
Filter::pageTitle($title->item(0)->nodeValue),
|
||||
Filter::pageDescription($metaDescription),
|
||||
Filter::pageKeywords($metaKeywords),
|
||||
Filter::url($metaYggo),
|
||||
CRAWL_HOST_DEFAULT_META_ONLY ? null : Filter::pageData($content));
|
||||
|
||||
// Update manifest registry
|
||||
if (CRAWL_MANIFEST && !empty($metaYggo) && filter_var($metaYggo, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $metaYggo)) {
|
||||
|
||||
$metaYggoCRC32url = crc32($metaYggo);
|
||||
|
||||
if (!$db->getManifest($metaYggoCRC32url)) {
|
||||
$db->addManifest($metaYggoCRC32url,
|
||||
$metaYggo,
|
||||
(string) CRAWL_MANIFEST_DEFAULT_STATUS,
|
||||
time());
|
||||
}
|
||||
}
|
||||
|
||||
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
||||
if (false !== stripos($metaRobots, 'noindex')) {
|
||||
|
||||
|
Binary file not shown.
@ -28,6 +28,25 @@ class MySQL {
|
||||
$this->_db->rollBack();
|
||||
}
|
||||
|
||||
// Manifest
|
||||
public function getManifest(int $crc32url) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `manifest` WHERE `crc32url` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$crc32url]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function addManifest(int $crc32url, string $url, string $status, int $timeAdded, mixed $timeUpdated = null) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `manifest` (`crc32url`, `url`, `status`, `timeAdded`, `timeUpdated`) VALUES (?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$query->execute([$crc32url, $url, $status, $timeAdded, $timeUpdated]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
// Host
|
||||
public function getAPIHosts(string $apiHostFields) {
|
||||
|
||||
@ -184,16 +203,14 @@ class MySQL {
|
||||
mixed $metaTitle,
|
||||
mixed $metaDescription,
|
||||
mixed $metaKeywords,
|
||||
mixed $metaYggo,
|
||||
mixed $data) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostPage` SET `metaTitle` = ?,
|
||||
`metaDescription` = ?,
|
||||
`metaKeywords` = ?,
|
||||
`metaYggo` = ?,
|
||||
`data` = ? WHERE `hostPageId` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$metaTitle, $metaDescription, $metaKeywords, $metaYggo, $data, $hostPageId]);
|
||||
$query->execute([$metaTitle, $metaDescription, $metaKeywords, $data, $hostPageId]);
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user