|
|
@ -486,6 +486,10 @@ foreach($index->search('') |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Crawl documents |
|
|
|
// Crawl documents |
|
|
|
|
|
|
|
$base = new \Yggverse\Net\Address( |
|
|
|
|
|
|
|
$document->get('url') |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
$documents = []; |
|
|
|
$documents = []; |
|
|
|
|
|
|
|
|
|
|
|
foreach ($config->cli->document->crawl->selector as $selector => $settings) |
|
|
|
foreach ($config->cli->document->crawl->selector as $selector => $settings) |
|
|
@ -496,6 +500,8 @@ foreach($index->search('') |
|
|
|
|
|
|
|
|
|
|
|
}) as $value) { |
|
|
|
}) as $value) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global $base; |
|
|
|
|
|
|
|
|
|
|
|
if ($url = $value->attr($settings->attribute)) |
|
|
|
if ($url = $value->attr($settings->attribute)) |
|
|
|
{ |
|
|
|
{ |
|
|
|
// Convert relative links to absolute |
|
|
|
// Convert relative links to absolute |
|
|
@ -505,10 +511,6 @@ foreach($index->search('') |
|
|
|
|
|
|
|
|
|
|
|
if ($address->isRelative()) |
|
|
|
if ($address->isRelative()) |
|
|
|
{ |
|
|
|
{ |
|
|
|
$base = new \Yggverse\Net\Address( |
|
|
|
|
|
|
|
$document->get('url') |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ($absolute = $address->getAbsolute($base)) |
|
|
|
if ($absolute = $address->getAbsolute($base)) |
|
|
|
{ |
|
|
|
{ |
|
|
|
$url = $absolute; |
|
|
|
$url = $absolute; |
|
|
@ -522,7 +524,7 @@ foreach($index->search('') |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// External host rules |
|
|
|
// External host rules |
|
|
|
if (!$settings->external && parse_url($url, PHP_URL_HOST) != $address->getHost()) |
|
|
|
if (!$settings->external && $address->getHost() != $base->getHost()) |
|
|
|
{ |
|
|
|
{ |
|
|
|
continue; |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|