|
|
@ -44,13 +44,16 @@ $httpRequestsSizeTotal = 0; |
|
|
|
$httpDownloadSizeTotal = 0; |
|
|
|
$httpDownloadSizeTotal = 0; |
|
|
|
$httpRequestsTimeTotal = 0; |
|
|
|
$httpRequestsTimeTotal = 0; |
|
|
|
|
|
|
|
|
|
|
|
$hostPagesProcessed = 0; |
|
|
|
|
|
|
|
$manifestsProcessed = 0; |
|
|
|
|
|
|
|
$hostPagesAdded = 0; |
|
|
|
|
|
|
|
$hostsAdded = 0; |
|
|
|
$hostsAdded = 0; |
|
|
|
$hostPagesBanned = 0; |
|
|
|
$hostPagesBanned = 0; |
|
|
|
$hostPagesSnapAdded = 0; |
|
|
|
$hostPagesSnapAdded = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$hostPagesProcessed = 0; |
|
|
|
|
|
|
|
$hostPagesAdded = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$manifestsProcessed = 0; |
|
|
|
|
|
|
|
$sitemapsProcessed = 0; |
|
|
|
|
|
|
|
|
|
|
|
// Connect database |
|
|
|
// Connect database |
|
|
|
try { |
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
@ -111,8 +114,12 @@ foreach ($db->getHostRobotsCrawlQueue(CRAWL_ROBOTS_LIMIT, time() - CRAWL_ROBOTS_ |
|
|
|
// Init sitemap data |
|
|
|
// Init sitemap data |
|
|
|
$sitemap = new Sitemap($hostSitemapPath); |
|
|
|
$sitemap = new Sitemap($hostSitemapPath); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ($sitemapLinks = $sitemap->getLinks()) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$sitemapsProcessed++; |
|
|
|
|
|
|
|
|
|
|
|
// Process collected sitemap links |
|
|
|
// Process collected sitemap links |
|
|
|
foreach ($sitemap->getLinks() as $link => $attributes) { |
|
|
|
foreach ($sitemapLinks as $link => $attributes) { |
|
|
|
|
|
|
|
|
|
|
|
// Parse formatted link |
|
|
|
// Parse formatted link |
|
|
|
$linkURI = Parser::uri($link); |
|
|
|
$linkURI = Parser::uri($link); |
|
|
@ -129,6 +136,7 @@ foreach ($db->getHostRobotsCrawlQueue(CRAWL_ROBOTS_LIMIT, time() - CRAWL_ROBOTS_ |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Update manifest if available for this host |
|
|
|
// Update manifest if available for this host |
|
|
|
if ($manifestURL = $db->getHostSetting($host->hostId, 'MANIFEST_URL')) { |
|
|
|
if ($manifestURL = $db->getHostSetting($host->hostId, 'MANIFEST_URL')) { |
|
|
@ -1206,6 +1214,8 @@ echo 'Pages added: ' . $hostPagesAdded . PHP_EOL; |
|
|
|
echo 'Pages snaps added: ' . $hostPagesSnapAdded . PHP_EOL; |
|
|
|
echo 'Pages snaps added: ' . $hostPagesSnapAdded . PHP_EOL; |
|
|
|
echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL; |
|
|
|
echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo 'Sitemaps processed: ' . $sitemapsProcessed . PHP_EOL; |
|
|
|
|
|
|
|
|
|
|
|
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL; |
|
|
|
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL; |
|
|
|
|
|
|
|
|
|
|
|
echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL; |
|
|
|
echo 'HTTP Requests total: ' . $httpRequestsTotal . PHP_EOL; |
|
|
|