|
|
|
@ -301,6 +301,10 @@ foreach($index->search('')
@@ -301,6 +301,10 @@ foreach($index->search('')
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
// Crawl links |
|
|
|
|
$base = new \Yggverse\Net\Address( |
|
|
|
|
$document->get('url') |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
$documents = []; |
|
|
|
|
|
|
|
|
|
foreach ($body->getLinks() as $line) |
|
|
|
@ -318,10 +322,6 @@ foreach($index->search('')
@@ -318,10 +322,6 @@ foreach($index->search('')
|
|
|
|
|
|
|
|
|
|
if ($address->isRelative()) |
|
|
|
|
{ |
|
|
|
|
$base = new \Yggverse\Net\Address( |
|
|
|
|
$document->get('url') |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
if ($absolute = $address->getAbsolute($base)) |
|
|
|
|
{ |
|
|
|
|
$url = $absolute; |
|
|
|
@ -335,7 +335,7 @@ foreach($index->search('')
@@ -335,7 +335,7 @@ foreach($index->search('')
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// External host rules |
|
|
|
|
if (!$config->cli->document->crawl->url->external && parse_url($url, PHP_URL_HOST) != $host) |
|
|
|
|
if (!$config->cli->document->crawl->url->external && $address->getHost() != $base->getHost()) |
|
|
|
|
{ |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|