mirror of https://github.com/YGGverse/YGGo.git
phpyggdrasilcrawlermysqljs-lessspideralt-websphinxdistributedwebsearch-engineopen-sourcepdocurlparserfts5privacy-orientedsphinxsearchfederativeweb-archive
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
1.1 KiB
59 lines
1.1 KiB
1 year ago
|
<?php
|
||
|
|
||
|
class Sitemap {
|
||
|
|
||
|
private $_files = [];
|
||
|
private $_links = [];
|
||
|
|
||
|
public function __construct(string $filename) {
|
||
|
|
||
|
$this->_scanFiles($filename);
|
||
|
$this->_scanLinks();
|
||
|
}
|
||
|
|
||
|
private function _scanFiles(string $filename) {
|
||
|
|
||
|
if ($data = @simplexml_load_file($filename)) {
|
||
|
|
||
|
if (!empty($data->sitemap)) { // sitemaps index
|
||
|
|
||
|
foreach ($data->sitemap as $value) {
|
||
|
|
||
|
if (!empty($value->loc)) {
|
||
|
|
||
|
$this->_scanFiles(trim(urldecode($value->loc)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} else if (!empty($data->url)) { // target file
|
||
|
|
||
|
$this->_files[trim(urldecode($filename))] = []; // @TODO attributes
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private function _scanLinks() {
|
||
|
|
||
|
foreach ($this->_files as $filename => $attributes) {
|
||
|
|
||
|
if ($data = @simplexml_load_file($filename)) {
|
||
|
|
||
|
if (!empty($data->url)) {
|
||
|
|
||
|
foreach ($data->url as $value) {
|
||
|
|
||
|
if (!empty($value->loc)) {
|
||
|
|
||
|
$this->_links[trim(urldecode($value->loc))] = []; // @TODO attributes
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public function getLinks() {
|
||
|
|
||
|
return $this->_links;
|
||
|
}
|
||
|
}
|