mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-08-31 08:12:19 +00:00
replace memcached to Yggverse\Cache\Memory API
This commit is contained in:
parent
30520f6047
commit
a27cb61f69
@ -74,11 +74,10 @@ try {
|
||||
exit;
|
||||
}
|
||||
|
||||
// Connect memcached
|
||||
// Connect Yggverse\Cache\Memory
|
||||
try {
|
||||
|
||||
$memcached = new Memcached();
|
||||
$memcached->addServer(MEMCACHED_HOST, MEMCACHED_PORT);
|
||||
$memory = new Yggverse\Cache\Memory(MEMCACHED_HOST, MEMCACHED_PORT, MEMCACHED_NAMESPACE, MEMCACHED_TIMEOUT + time());
|
||||
|
||||
} catch(Exception $e) {
|
||||
|
||||
@ -100,21 +99,21 @@ if (CRAWL_YGGSTATE) {
|
||||
|
||||
try {
|
||||
|
||||
if (!$memcached->get(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i))) {
|
||||
if (!$memory->get(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i))) {
|
||||
|
||||
$yggStateDB = new YGGstate($node->host, $node->port, $node->database, $node->username, $node->password);
|
||||
|
||||
foreach ($yggStatePeers = $yggStateDB->getPeersByMinLastUptime($node->peer_min_last_uptime) as $yggStatePeer) {
|
||||
|
||||
// Register new host
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, sprintf('http://[%s]/', $yggStatePeer->address))) {
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, sprintf('http://[%s]/', $yggStatePeer->address))) {
|
||||
|
||||
$hostsAdded += count($linkToDBresult->new->hostId);
|
||||
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
|
||||
}
|
||||
}
|
||||
|
||||
$memcached->set(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i), true, time() + $node->timeout);
|
||||
$memory->set(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i), true, time() + $node->timeout);
|
||||
}
|
||||
|
||||
} catch(Exception $e) {
|
||||
@ -154,7 +153,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
|
||||
// Update robots.txt rules
|
||||
if (200 == $curl->getCode() && false !== stripos(trim(mb_strtolower((string) $curl->getContentType())), 'text/plain')) {
|
||||
|
||||
Helper::setHostSetting($db, $memcached, $queueHost->hostId, 'ROBOTS_TXT', (string) $curl->getContent());
|
||||
Helper::setHostSetting($db, $queueHost->hostId, 'ROBOTS_TXT', (string) $curl->getContent());
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,8 +162,8 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
|
||||
|
||||
// Look for custom sitemap URL served in robots.txt
|
||||
$robots = new Robots(
|
||||
Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
|
||||
Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
|
||||
Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
|
||||
Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
|
||||
);
|
||||
|
||||
if ($sitemapLink = $robots->getSitemap()) {
|
||||
@ -204,7 +203,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
|
||||
}
|
||||
|
||||
// Register new link
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $loc)) {
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $loc)) {
|
||||
|
||||
$hostsAdded += count($linkToDBresult->new->hostId);
|
||||
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
|
||||
@ -217,7 +216,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
|
||||
if (CRAWL_MANIFEST) {
|
||||
|
||||
// Host have manifest provided
|
||||
if ($manifestURL = Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'MANIFEST_URL', NULL)) {
|
||||
if ($manifestURL = Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'MANIFEST_URL', NULL)) {
|
||||
|
||||
// Get remote manifest
|
||||
$curl = new Curl($manifestURL, CRAWL_CURLOPT_USERAGENT);
|
||||
@ -269,13 +268,13 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
|
||||
|
||||
// Skip processing on remote host URL does not match local condition
|
||||
if ($remoteManifest->result->config->DEFAULT_HOST_URL_REGEXP !=
|
||||
Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP)) {
|
||||
Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP)) {
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip processing on remote host link does not match local condition
|
||||
if (false === preg_match(Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP),
|
||||
if (false === preg_match(Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP),
|
||||
$remoteManifest->result->api->hosts)) {
|
||||
|
||||
continue;
|
||||
@ -324,7 +323,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
|
||||
}
|
||||
|
||||
// Register new link
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $remoteManifestHost->url)) {
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $remoteManifestHost->url)) {
|
||||
|
||||
$hostsAdded += count($linkToDBresult->new->hostId);
|
||||
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
|
||||
@ -431,7 +430,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
||||
}
|
||||
|
||||
// Register new link
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $url)) {
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $url)) {
|
||||
|
||||
$hostsAdded += count($linkToDBresult->new->hostId);
|
||||
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
|
||||
@ -480,7 +479,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
||||
// Check for MIME
|
||||
$hostPageInMime = false;
|
||||
|
||||
foreach ((array) explode(',', Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGES_MIME', DEFAULT_HOST_PAGES_MIME)) as $mime) {
|
||||
foreach ((array) explode(',', Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_MIME', DEFAULT_HOST_PAGES_MIME)) as $mime) {
|
||||
|
||||
// Ban page on MIME type not allowed in settings
|
||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||
@ -735,11 +734,11 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
||||
$metaTitle,
|
||||
$metaDescription ? Filter::pageDescription($metaDescription) : null,
|
||||
$metaKeywords ? Filter::pageKeywords($metaKeywords) : null,
|
||||
$content ? (Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGES_DATA', DEFAULT_HOST_PAGES_DATA) ? base64_encode($content) : null) : null,
|
||||
$content ? (Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DATA', DEFAULT_HOST_PAGES_DATA) ? base64_encode($content) : null) : null,
|
||||
time());
|
||||
|
||||
// Collect page DOM elements data on enabled
|
||||
if ($hostPageDomSelectors = Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', DEFAULT_HOST_PAGES_DOM_SELECTORS)) {
|
||||
if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', DEFAULT_HOST_PAGES_DOM_SELECTORS)) {
|
||||
|
||||
// Begin selectors extraction
|
||||
$html = str_get_html($content);
|
||||
@ -753,7 +752,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
||||
$db->addHostPageDom($queueHostPage->hostPageId,
|
||||
time(),
|
||||
$selector,
|
||||
trim(Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGE_DOM_STRIP_TAGS', DEFAULT_HOST_PAGE_DOM_STRIP_TAGS) ? strip_tags( preg_replace('/[\s]+/',
|
||||
trim(Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGE_DOM_STRIP_TAGS', DEFAULT_HOST_PAGE_DOM_STRIP_TAGS) ? strip_tags( preg_replace('/[\s]+/',
|
||||
' ',
|
||||
str_replace(['<br />', '<br/>', '<br>', '</'],
|
||||
[' ', ' ', ' ', ' </'],
|
||||
@ -1028,7 +1027,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
|
||||
}
|
||||
|
||||
// Register new link
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $link['href'])) {
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $link['href'])) {
|
||||
|
||||
// Increase new hosts counters
|
||||
if ($linkToDBresult->new->hostId) {
|
||||
|
@ -5,48 +5,42 @@ require_once __DIR__ . '/../../vendor/autoload.php';
|
||||
|
||||
class Helper {
|
||||
|
||||
public static function getHostSetting(MySQL $db,
|
||||
Memcached $memcached,
|
||||
public static function getHostSettingValue(MySQL $db,
|
||||
Yggverse\Cache\Memory $memory,
|
||||
int $hostId,
|
||||
string $key,
|
||||
mixed $defaultValue) : mixed {
|
||||
|
||||
if ($value = $memcached->get(sprintf('Helper.getHostSetting.%s.%s', $hostId, $key))) {
|
||||
if (false !== $value = $memory->getByMethodCallback(
|
||||
$db, 'findHostSettingValue', [$hostId, $key], time() + 3600
|
||||
)) {
|
||||
|
||||
return $value;
|
||||
|
||||
} else {
|
||||
|
||||
return $defaultValue;
|
||||
}
|
||||
|
||||
if (!$value = $db->findHostSettingValue($hostId, $key)) {
|
||||
|
||||
$value = $defaultValue;
|
||||
}
|
||||
|
||||
$memcached->set(sprintf('Helper.getHostSetting.%s.%s', $hostId, $key), $value, time() + 3600);
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
public static function setHostSetting(MySQL $db,
|
||||
Memcached $memcached,
|
||||
int $hostId,
|
||||
string $key,
|
||||
mixed $value) : int {
|
||||
|
||||
if ($hostSetting = $db->findHostSetting($hostId, $key)) {
|
||||
|
||||
$rowsAffected = $db->updateHostSetting($hostSetting->hostSettingId, $value, time());
|
||||
return $db->updateHostSetting($hostSetting->hostSettingId, $value, time());
|
||||
|
||||
} else {
|
||||
|
||||
$rowsAffected = $db->addHostSetting($hostId, $key, $value, time());
|
||||
return $db->addHostSetting($hostId, $key, $value, time());
|
||||
}
|
||||
|
||||
$memcached->set(sprintf('Helper.getHostSetting.%s.%s', $hostId, $key), $value, time() + 3600);
|
||||
|
||||
return $rowsAffected;
|
||||
// @TODO update cache
|
||||
}
|
||||
|
||||
public static function addLinkToDB(MySQL $db, Memcached $memcached, string $link) : mixed {
|
||||
public static function addLinkToDB(MySQL $db, Yggverse\Cache\Memory $memory, string $link) : mixed {
|
||||
|
||||
// Define variables
|
||||
$result = (object)
|
||||
@ -79,7 +73,7 @@ class Helper {
|
||||
if ($host = $db->findHostByCRC32URL(crc32($link->host->url))) {
|
||||
|
||||
// Make sure host URL compatible with this host rules before continue
|
||||
if (!preg_match(self::getHostSetting($db, $memcached, $host->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->host->url)) {
|
||||
if (!preg_match(self::getHostSettingValue($db, $memory, $host->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->host->url)) {
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -131,21 +125,21 @@ class Helper {
|
||||
} else {
|
||||
|
||||
// Make sure host page URL compatible with this host rules before continue
|
||||
if (!preg_match(self::getHostSetting($db, $memcached, $hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->page->url)) {
|
||||
if (!preg_match(self::getHostSettingValue($db, $memory, $hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->page->url)) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Validate page limits for this host
|
||||
if ($db->getTotalHostPages($hostId) >= self::getHostSetting($db, $memcached, $hostId, 'PAGES_LIMIT', DEFAULT_HOST_PAGES_LIMIT)) {
|
||||
if ($db->getTotalHostPages($hostId) >= self::getHostSettingValue($db, $memory, $hostId, 'PAGES_LIMIT', DEFAULT_HOST_PAGES_LIMIT)) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Validate ROBOTS.TXT
|
||||
$robots = new Robots(
|
||||
self::getHostSetting($db, $memcached, $hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
|
||||
self::getHostSetting($db, $memcached, $hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
|
||||
self::getHostSettingValue($db, $memory, $hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
|
||||
self::getHostSettingValue($db, $memory, $hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
|
||||
);
|
||||
|
||||
if (!$robots->uriAllowed($link->page->uri)) {
|
||||
|
@ -32,22 +32,6 @@ try {
|
||||
exit;
|
||||
}
|
||||
|
||||
// Connect memcached
|
||||
// @TODO
|
||||
// legacy, upgrade to yggverse/cache instead
|
||||
// https://github.com/YGGverse/cache-php
|
||||
try {
|
||||
|
||||
$memcached = new Memcached();
|
||||
$memcached->addServer(MEMCACHED_HOST, MEMCACHED_PORT);
|
||||
|
||||
} catch(Exception $e) {
|
||||
|
||||
var_dump($e);
|
||||
|
||||
exit;
|
||||
}
|
||||
|
||||
// Connect Yggverse\Cache\Memory
|
||||
try {
|
||||
|
||||
@ -94,7 +78,7 @@ if (Yggverse\Parser\Url::is($q)) {
|
||||
|
||||
$db->beginTransaction();
|
||||
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $q)) {
|
||||
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $q)) {
|
||||
|
||||
if (count($linkToDBresult->new->hostPageId)) {
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user