diff --git a/src/cli/document/crawl.php b/src/cli/document/crawl.php index 778df08..e0952f2 100644 --- a/src/cli/document/crawl.php +++ b/src/cli/document/crawl.php @@ -486,6 +486,10 @@ foreach($index->search('') } // Crawl documents + $base = new \Yggverse\Net\Address( + $document->get('url') + ); + $documents = []; foreach ($config->cli->document->crawl->selector as $selector => $settings) @@ -496,6 +500,8 @@ foreach($index->search('') }) as $value) { + global $base; + if ($url = $value->attr($settings->attribute)) { // Convert relative links to absolute @@ -505,10 +511,6 @@ foreach($index->search('') if ($address->isRelative()) { - $base = new \Yggverse\Net\Address( - $document->get('url') - ); - if ($absolute = $address->getAbsolute($base)) { $url = $absolute; @@ -522,7 +524,7 @@ foreach($index->search('') } // External host rules - if (!$settings->external && parse_url($url, PHP_URL_HOST) != $address->getHost()) + if (!$settings->external && $address->getHost() != $base->getHost()) { continue; }