mirror of https://github.com/YGGverse/YGGo.git
phpyggdrasilmysqlcrawlerjs-lessalt-websphinxspiderdistributedwebsearch-engineopen-sourcepdocurlparserfts5privacy-orientedsphinxsearchfederativeweb-archive
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
1.1 KiB
59 lines
1.1 KiB
<?php |
|
|
|
class Sitemap { |
|
|
|
private $_files = []; |
|
private $_links = []; |
|
|
|
public function __construct(string $filename) { |
|
|
|
$this->_scanFiles($filename); |
|
$this->_scanLinks(); |
|
} |
|
|
|
private function _scanFiles(string $filename) { |
|
|
|
if ($data = @simplexml_load_file($filename)) { |
|
|
|
if (!empty($data->sitemap)) { // sitemaps index |
|
|
|
foreach ($data->sitemap as $value) { |
|
|
|
if (!empty($value->loc)) { |
|
|
|
$this->_scanFiles(trim(urldecode($value->loc))); |
|
} |
|
} |
|
|
|
} else if (!empty($data->url)) { // target file |
|
|
|
$this->_files[trim(urldecode($filename))] = []; // @TODO attributes |
|
} |
|
} |
|
} |
|
|
|
private function _scanLinks() { |
|
|
|
foreach ($this->_files as $filename => $attributes) { |
|
|
|
if ($data = @simplexml_load_file($filename)) { |
|
|
|
if (!empty($data->url)) { |
|
|
|
foreach ($data->url as $value) { |
|
|
|
if (!empty($value->loc)) { |
|
|
|
$this->_links[trim(urldecode($value->loc))] = []; // @TODO attributes |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
public function getLinks() { |
|
|
|
return $this->_links; |
|
} |
|
} |