diff --git a/src/crontab/crawler.php b/src/crontab/crawler.php
index 117c4d5..cc86a12 100644
--- a/src/crontab/crawler.php
+++ b/src/crontab/crawler.php
@@ -74,11 +74,10 @@ try {
exit;
}
-// Connect memcached
+// Connect Yggverse\Cache\Memory
try {
- $memcached = new Memcached();
- $memcached->addServer(MEMCACHED_HOST, MEMCACHED_PORT);
+ $memory = new Yggverse\Cache\Memory(MEMCACHED_HOST, MEMCACHED_PORT, MEMCACHED_NAMESPACE, MEMCACHED_TIMEOUT + time());
} catch(Exception $e) {
@@ -100,21 +99,21 @@ if (CRAWL_YGGSTATE) {
try {
- if (!$memcached->get(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i))) {
+ if (!$memory->get(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i))) {
$yggStateDB = new YGGstate($node->host, $node->port, $node->database, $node->username, $node->password);
foreach ($yggStatePeers = $yggStateDB->getPeersByMinLastUptime($node->peer_min_last_uptime) as $yggStatePeer) {
// Register new host
- if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, sprintf('http://[%s]/', $yggStatePeer->address))) {
+ if ($linkToDBresult = Helper::addLinkToDB($db, $memory, sprintf('http://[%s]/', $yggStatePeer->address))) {
$hostsAdded += count($linkToDBresult->new->hostId);
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
}
}
- $memcached->set(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i), true, time() + $node->timeout);
+ $memory->set(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i), true, time() + $node->timeout);
}
} catch(Exception $e) {
@@ -154,7 +153,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
// Update robots.txt rules
if (200 == $curl->getCode() && false !== stripos(trim(mb_strtolower((string) $curl->getContentType())), 'text/plain')) {
- Helper::setHostSetting($db, $memcached, $queueHost->hostId, 'ROBOTS_TXT', (string) $curl->getContent());
+ Helper::setHostSetting($db, $queueHost->hostId, 'ROBOTS_TXT', (string) $curl->getContent());
}
}
@@ -163,8 +162,8 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
// Look for custom sitemap URL served in robots.txt
$robots = new Robots(
- Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
- Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
+ Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
+ Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
);
if ($sitemapLink = $robots->getSitemap()) {
@@ -204,7 +203,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
}
// Register new link
- if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $loc)) {
+ if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $loc)) {
$hostsAdded += count($linkToDBresult->new->hostId);
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
@@ -217,7 +216,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
if (CRAWL_MANIFEST) {
// Host have manifest provided
- if ($manifestURL = Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'MANIFEST_URL', NULL)) {
+ if ($manifestURL = Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'MANIFEST_URL', NULL)) {
// Get remote manifest
$curl = new Curl($manifestURL, CRAWL_CURLOPT_USERAGENT);
@@ -269,13 +268,13 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
// Skip processing on remote host URL does not match local condition
if ($remoteManifest->result->config->DEFAULT_HOST_URL_REGEXP !=
- Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP)) {
+ Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP)) {
continue;
}
// Skip processing on remote host link does not match local condition
- if (false === preg_match(Helper::getHostSetting($db, $memcached, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP),
+ if (false === preg_match(Helper::getHostSettingValue($db, $memory, $queueHost->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP),
$remoteManifest->result->api->hosts)) {
continue;
@@ -324,7 +323,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
}
// Register new link
- if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $remoteManifestHost->url)) {
+ if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $remoteManifestHost->url)) {
$hostsAdded += count($linkToDBresult->new->hostId);
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
@@ -431,7 +430,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
}
// Register new link
- if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $url)) {
+ if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $url)) {
$hostsAdded += count($linkToDBresult->new->hostId);
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
@@ -480,7 +479,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
// Check for MIME
$hostPageInMime = false;
- foreach ((array) explode(',', Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGES_MIME', DEFAULT_HOST_PAGES_MIME)) as $mime) {
+ foreach ((array) explode(',', Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_MIME', DEFAULT_HOST_PAGES_MIME)) as $mime) {
// Ban page on MIME type not allowed in settings
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
@@ -735,11 +734,11 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
$metaTitle,
$metaDescription ? Filter::pageDescription($metaDescription) : null,
$metaKeywords ? Filter::pageKeywords($metaKeywords) : null,
- $content ? (Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGES_DATA', DEFAULT_HOST_PAGES_DATA) ? base64_encode($content) : null) : null,
+ $content ? (Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DATA', DEFAULT_HOST_PAGES_DATA) ? base64_encode($content) : null) : null,
time());
// Collect page DOM elements data on enabled
- if ($hostPageDomSelectors = Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', DEFAULT_HOST_PAGES_DOM_SELECTORS)) {
+ if ($hostPageDomSelectors = Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGES_DOM_SELECTORS', DEFAULT_HOST_PAGES_DOM_SELECTORS)) {
// Begin selectors extraction
$html = str_get_html($content);
@@ -753,7 +752,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
$db->addHostPageDom($queueHostPage->hostPageId,
time(),
$selector,
- trim(Helper::getHostSetting($db, $memcached, $queueHostPage->hostId, 'PAGE_DOM_STRIP_TAGS', DEFAULT_HOST_PAGE_DOM_STRIP_TAGS) ? strip_tags( preg_replace('/[\s]+/',
+ trim(Helper::getHostSettingValue($db, $memory, $queueHostPage->hostId, 'PAGE_DOM_STRIP_TAGS', DEFAULT_HOST_PAGE_DOM_STRIP_TAGS) ? strip_tags( preg_replace('/[\s]+/',
' ',
str_replace(['
', '
', '
', ''],
[' ', ' ', ' ', ' '],
@@ -1028,7 +1027,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_HOST_PAGE_QUEUE_LIMIT, time() - CRAWL_
}
// Register new link
- if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $link['href'])) {
+ if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $link['href'])) {
// Increase new hosts counters
if ($linkToDBresult->new->hostId) {
diff --git a/src/library/helper.php b/src/library/helper.php
index 14ddfab..6fda9b7 100644
--- a/src/library/helper.php
+++ b/src/library/helper.php
@@ -5,48 +5,42 @@ require_once __DIR__ . '/../../vendor/autoload.php';
class Helper {
- public static function getHostSetting(MySQL $db,
- Memcached $memcached,
+ public static function getHostSettingValue(MySQL $db,
+ Yggverse\Cache\Memory $memory,
int $hostId,
string $key,
mixed $defaultValue) : mixed {
- if ($value = $memcached->get(sprintf('Helper.getHostSetting.%s.%s', $hostId, $key))) {
+ if (false !== $value = $memory->getByMethodCallback(
+ $db, 'findHostSettingValue', [$hostId, $key], time() + 3600
+ )) {
return $value;
- }
- if (!$value = $db->findHostSettingValue($hostId, $key)) {
+ } else {
- $value = $defaultValue;
+ return $defaultValue;
}
-
- $memcached->set(sprintf('Helper.getHostSetting.%s.%s', $hostId, $key), $value, time() + 3600);
-
- return $value;
}
public static function setHostSetting(MySQL $db,
- Memcached $memcached,
int $hostId,
string $key,
mixed $value) : int {
if ($hostSetting = $db->findHostSetting($hostId, $key)) {
- $rowsAffected = $db->updateHostSetting($hostSetting->hostSettingId, $value, time());
+ return $db->updateHostSetting($hostSetting->hostSettingId, $value, time());
} else {
- $rowsAffected = $db->addHostSetting($hostId, $key, $value, time());
+ return $db->addHostSetting($hostId, $key, $value, time());
}
- $memcached->set(sprintf('Helper.getHostSetting.%s.%s', $hostId, $key), $value, time() + 3600);
-
- return $rowsAffected;
+ // @TODO update cache
}
- public static function addLinkToDB(MySQL $db, Memcached $memcached, string $link) : mixed {
+ public static function addLinkToDB(MySQL $db, Yggverse\Cache\Memory $memory, string $link) : mixed {
// Define variables
$result = (object)
@@ -79,7 +73,7 @@ class Helper {
if ($host = $db->findHostByCRC32URL(crc32($link->host->url))) {
// Make sure host URL compatible with this host rules before continue
- if (!preg_match(self::getHostSetting($db, $memcached, $host->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->host->url)) {
+ if (!preg_match(self::getHostSettingValue($db, $memory, $host->hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->host->url)) {
return false;
}
@@ -131,21 +125,21 @@ class Helper {
} else {
// Make sure host page URL compatible with this host rules before continue
- if (!preg_match(self::getHostSetting($db, $memcached, $hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->page->url)) {
+ if (!preg_match(self::getHostSettingValue($db, $memory, $hostId, 'URL_REGEXP', DEFAULT_HOST_URL_REGEXP), $link->page->url)) {
return false;
}
// Validate page limits for this host
- if ($db->getTotalHostPages($hostId) >= self::getHostSetting($db, $memcached, $hostId, 'PAGES_LIMIT', DEFAULT_HOST_PAGES_LIMIT)) {
+ if ($db->getTotalHostPages($hostId) >= self::getHostSettingValue($db, $memory, $hostId, 'PAGES_LIMIT', DEFAULT_HOST_PAGES_LIMIT)) {
return false;
}
// Validate ROBOTS.TXT
$robots = new Robots(
- self::getHostSetting($db, $memcached, $hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
- self::getHostSetting($db, $memcached, $hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
+ self::getHostSettingValue($db, $memory, $hostId, 'ROBOTS_TXT', NULL) . PHP_EOL .
+ self::getHostSettingValue($db, $memory, $hostId, 'ROBOTS_TXT_POSTFIX', DEFAULT_HOST_ROBOTS_TXT_POSTFIX)
);
if (!$robots->uriAllowed($link->page->uri)) {
diff --git a/src/public/search.php b/src/public/search.php
index f27d2f3..773cc62 100644
--- a/src/public/search.php
+++ b/src/public/search.php
@@ -32,22 +32,6 @@ try {
exit;
}
-// Connect memcached
-// @TODO
-// legacy, upgrade to yggverse/cache instead
-// https://github.com/YGGverse/cache-php
-try {
-
- $memcached = new Memcached();
- $memcached->addServer(MEMCACHED_HOST, MEMCACHED_PORT);
-
-} catch(Exception $e) {
-
- var_dump($e);
-
- exit;
-}
-
// Connect Yggverse\Cache\Memory
try {
@@ -94,7 +78,7 @@ if (Yggverse\Parser\Url::is($q)) {
$db->beginTransaction();
- if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, $q)) {
+ if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $q)) {
if (count($linkToDBresult->new->hostPageId)) {