mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
create manifests registry
This commit is contained in:
parent
219a56d6cd
commit
6d8f4f4882
@ -165,6 +165,24 @@ define('CRAWL_ROBOTS_DEFAULT_RULES', null); // string|null
|
|||||||
*/
|
*/
|
||||||
define('CRAWL_ROBOTS_POSTFIX_RULES', null); // string|null
|
define('CRAWL_ROBOTS_POSTFIX_RULES', null); // string|null
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look for third-party manifests to collect distributed index
|
||||||
|
*
|
||||||
|
* API address provided in yggo meta tag
|
||||||
|
* will be stored in the `manifest` DB table
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
define('CRAWL_MANIFEST', true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set default auto-crawl status for new manifest added
|
||||||
|
*
|
||||||
|
* true - crawler autostart manifest indexer
|
||||||
|
* false - requires manual validation by the moderator in the DB `manifest`.`status` field
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
define('CRAWL_MANIFEST_DEFAULT_STATUS', true);
|
||||||
|
|
||||||
// Cleaner settings
|
// Cleaner settings
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -100,9 +100,21 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
|
|||||||
Filter::pageTitle($title->item(0)->nodeValue),
|
Filter::pageTitle($title->item(0)->nodeValue),
|
||||||
Filter::pageDescription($metaDescription),
|
Filter::pageDescription($metaDescription),
|
||||||
Filter::pageKeywords($metaKeywords),
|
Filter::pageKeywords($metaKeywords),
|
||||||
Filter::url($metaYggo),
|
|
||||||
CRAWL_HOST_DEFAULT_META_ONLY ? null : Filter::pageData($content));
|
CRAWL_HOST_DEFAULT_META_ONLY ? null : Filter::pageData($content));
|
||||||
|
|
||||||
|
// Update manifest registry
|
||||||
|
if (CRAWL_MANIFEST && !empty($metaYggo) && filter_var($metaYggo, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $metaYggo)) {
|
||||||
|
|
||||||
|
$metaYggoCRC32url = crc32($metaYggo);
|
||||||
|
|
||||||
|
if (!$db->getManifest($metaYggoCRC32url)) {
|
||||||
|
$db->addManifest($metaYggoCRC32url,
|
||||||
|
$metaYggo,
|
||||||
|
(string) CRAWL_MANIFEST_DEFAULT_STATUS,
|
||||||
|
time());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
||||||
if (false !== stripos($metaRobots, 'noindex')) {
|
if (false !== stripos($metaRobots, 'noindex')) {
|
||||||
|
|
||||||
|
Binary file not shown.
@ -28,6 +28,25 @@ class MySQL {
|
|||||||
$this->_db->rollBack();
|
$this->_db->rollBack();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Manifest
|
||||||
|
public function getManifest(int $crc32url) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT * FROM `manifest` WHERE `crc32url` = ? LIMIT 1');
|
||||||
|
|
||||||
|
$query->execute([$crc32url]);
|
||||||
|
|
||||||
|
return $query->fetch();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function addManifest(int $crc32url, string $url, string $status, int $timeAdded, mixed $timeUpdated = null) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('INSERT INTO `manifest` (`crc32url`, `url`, `status`, `timeAdded`, `timeUpdated`) VALUES (?, ?, ?, ?, ?, ?)');
|
||||||
|
|
||||||
|
$query->execute([$crc32url, $url, $status, $timeAdded, $timeUpdated]);
|
||||||
|
|
||||||
|
return $this->_db->lastInsertId();
|
||||||
|
}
|
||||||
|
|
||||||
// Host
|
// Host
|
||||||
public function getAPIHosts(string $apiHostFields) {
|
public function getAPIHosts(string $apiHostFields) {
|
||||||
|
|
||||||
@ -184,16 +203,14 @@ class MySQL {
|
|||||||
mixed $metaTitle,
|
mixed $metaTitle,
|
||||||
mixed $metaDescription,
|
mixed $metaDescription,
|
||||||
mixed $metaKeywords,
|
mixed $metaKeywords,
|
||||||
mixed $metaYggo,
|
|
||||||
mixed $data) {
|
mixed $data) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `hostPage` SET `metaTitle` = ?,
|
$query = $this->_db->prepare('UPDATE `hostPage` SET `metaTitle` = ?,
|
||||||
`metaDescription` = ?,
|
`metaDescription` = ?,
|
||||||
`metaKeywords` = ?,
|
`metaKeywords` = ?,
|
||||||
`metaYggo` = ?,
|
|
||||||
`data` = ? WHERE `hostPageId` = ? LIMIT 1');
|
`data` = ? WHERE `hostPageId` = ? LIMIT 1');
|
||||||
|
|
||||||
$query->execute([$metaTitle, $metaDescription, $metaKeywords, $metaYggo, $data, $hostPageId]);
|
$query->execute([$metaTitle, $metaDescription, $metaKeywords, $data, $hostPageId]);
|
||||||
|
|
||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user