2023-04-03 21:27:32 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class Robots {
|
|
|
|
|
2023-07-27 08:44:42 +00:00
|
|
|
private $_rule = [];
|
|
|
|
private $_sitemap = null;
|
|
|
|
private $_data = null;
|
2023-04-03 21:27:32 +00:00
|
|
|
|
2023-04-09 00:28:31 +00:00
|
|
|
public function __construct(mixed $data) {
|
|
|
|
|
|
|
|
$this->_data = $data;
|
2023-04-03 21:27:32 +00:00
|
|
|
|
|
|
|
$read = false;
|
|
|
|
|
2023-04-09 00:28:31 +00:00
|
|
|
foreach ((array) explode(PHP_EOL, (string) $data) as $row) {
|
2023-04-03 21:27:32 +00:00
|
|
|
|
|
|
|
$row = strtolower(trim($row));
|
|
|
|
|
2023-07-27 08:44:42 +00:00
|
|
|
// Parse sitemap address
|
|
|
|
if (preg_match('!^sitemap:\s?(.*)!', $row, $matches)) {
|
|
|
|
|
|
|
|
if (!empty($matches[1])) {
|
|
|
|
|
|
|
|
$this->_sitemap = urldecode(trim($matches[1]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-03 21:27:32 +00:00
|
|
|
// User-agent * begin
|
|
|
|
if (preg_match('!^user-agent:\s?\*!', $row)) {
|
|
|
|
$read = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($read) {
|
|
|
|
$part = explode(' ', $row);
|
|
|
|
|
|
|
|
if (isset($part[0]) && isset($part[1])) {
|
|
|
|
|
|
|
|
if (false !== strpos($part[0], 'allow')) {
|
2023-04-03 22:38:32 +00:00
|
|
|
$this->_rule[$this->_regex(trim($part[1]))] = true;
|
2023-04-03 21:27:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (false !== strpos($part[0], 'disallow')) {
|
2023-04-03 22:38:32 +00:00
|
|
|
$this->_rule[$this->_regex(trim($part[1]))] = false;
|
2023-04-03 21:27:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// User-agent * end
|
|
|
|
if ($read && preg_match('!^user-agent:!', $row)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-07 01:04:24 +00:00
|
|
|
public function uriAllowed(string $uri) {
|
2023-04-03 21:27:32 +00:00
|
|
|
|
|
|
|
// Unify case match
|
2023-04-07 01:04:24 +00:00
|
|
|
$uri = strtolower(trim($uri));
|
2023-04-03 21:27:32 +00:00
|
|
|
|
|
|
|
// Index by default
|
|
|
|
$result = true;
|
|
|
|
|
|
|
|
// Begin index rules by ASC priority
|
2023-04-03 22:38:32 +00:00
|
|
|
foreach ($this->_rule as $rule => $value) {
|
2023-04-03 21:27:32 +00:00
|
|
|
|
2023-04-07 01:04:24 +00:00
|
|
|
if (preg_match('!^' . $rule . '!', $uri)) {
|
2023-04-03 21:27:32 +00:00
|
|
|
|
2023-04-03 22:38:32 +00:00
|
|
|
$result = $value;
|
2023-04-03 21:27:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
2023-07-27 08:44:42 +00:00
|
|
|
/* @TODO not in use
|
2023-04-09 00:28:31 +00:00
|
|
|
public function append(string $key, string $value) {
|
|
|
|
|
|
|
|
if (!preg_match('!^user-agent:\s?\*!', strtolower(trim($this->_data)))) {
|
|
|
|
|
|
|
|
$this->_data .= PHP_EOL . 'User-agent: *' . PHP_EOL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (false === stripos($this->_data, PHP_EOL . $key . ' ' . $value)) {
|
|
|
|
|
|
|
|
$this->_data .= PHP_EOL . $key . ' ' . $value;
|
|
|
|
}
|
|
|
|
}
|
2023-07-27 08:44:42 +00:00
|
|
|
*/
|
2023-04-09 00:28:31 +00:00
|
|
|
|
|
|
|
public function getData() {
|
|
|
|
|
|
|
|
return $this->_data;
|
|
|
|
}
|
|
|
|
|
2023-07-27 08:44:42 +00:00
|
|
|
public function getSitemap() {
|
|
|
|
|
|
|
|
return $this->_sitemap;
|
|
|
|
}
|
|
|
|
|
2023-04-03 21:27:32 +00:00
|
|
|
private function _regex(string $string) {
|
|
|
|
|
|
|
|
return str_replace(
|
|
|
|
[
|
|
|
|
'*',
|
2023-04-10 00:18:50 +00:00
|
|
|
'?',
|
|
|
|
'+'
|
2023-04-03 21:27:32 +00:00
|
|
|
],
|
|
|
|
[
|
|
|
|
'.*',
|
2023-04-10 00:18:50 +00:00
|
|
|
'\?',
|
|
|
|
'\+'
|
2023-04-03 21:27:32 +00:00
|
|
|
],
|
|
|
|
$string
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|