mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-09 14:27:55 +00:00
add page rank update optional in the crawl queue
This commit is contained in:
parent
a5a2ec233e
commit
5df59661d8
@ -321,6 +321,16 @@ define('CRAWL_HOST_DEFAULT_NSFW', false);
|
|||||||
*/
|
*/
|
||||||
define('CRAWL_SITEMAPS', true);
|
define('CRAWL_SITEMAPS', true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Re-calculate page rank on page update
|
||||||
|
*
|
||||||
|
* When enabled, may enlarge execution time
|
||||||
|
*
|
||||||
|
* true|false
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
define('CRAWL_PAGE_RANK_UPDATE', true);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Renew robots.txt index by timing offset provided
|
* Renew robots.txt index by timing offset provided
|
||||||
*
|
*
|
||||||
|
@ -357,26 +357,29 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||||
|
|
||||||
// Update page rank
|
// Update page rank
|
||||||
// @TODO add common method
|
if (CRAWL_PAGE_RANK_UPDATE) {
|
||||||
|
|
||||||
$hostPageRank = 0;
|
// @TODO add common method
|
||||||
|
|
||||||
// Get referrers
|
$hostPageRank = 0;
|
||||||
foreach ($db->getHostPagesToHostPageByHostPageIdTarget($queueHostPage->hostPageId) as $hostPageToHostPageByHostPageIdTarget) {
|
|
||||||
|
|
||||||
// Get source page details
|
// Get referrers
|
||||||
if ($hostPageSource = $db->getHostPage($hostPageToHostPageByHostPageIdTarget->hostPageIdSource)) {
|
foreach ($db->getHostPagesToHostPageByHostPageIdTarget($queueHostPage->hostPageId) as $hostPageToHostPageByHostPageIdTarget) {
|
||||||
|
|
||||||
// Increase PR on external referrer only
|
// Get source page details
|
||||||
if ($hostPageSource->hostId != $queueHostPage->hostId) {
|
if ($hostPageSource = $db->getHostPage($hostPageToHostPageByHostPageIdTarget->hostPageIdSource)) {
|
||||||
|
|
||||||
$hostPageRank++;
|
// Increase PR on external referrer only
|
||||||
}
|
if ($hostPageSource->hostId != $queueHostPage->hostId) {
|
||||||
|
|
||||||
// Delegate page rank value from redirected pages
|
$hostPageRank++;
|
||||||
if (false !== strpos($hostPageSource->httpCode, '30')) {
|
}
|
||||||
|
|
||||||
$hostPageRank += $hostPageSource->rank;
|
// Delegate page rank value from redirected pages
|
||||||
|
if (false !== strpos($hostPageSource->httpCode, '30')) {
|
||||||
|
|
||||||
|
$hostPageRank += $hostPageSource->rank;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user