YGGo/library/robots.php

98 lines
1.8 KiB
PHP
Raw Normal View History

2023-04-04 00:27:32 +03:00
<?php
class Robots {
2023-04-04 01:38:32 +03:00
private $_rule = [];
2023-04-09 03:28:31 +03:00
private $_data = null;
2023-04-04 00:27:32 +03:00
2023-04-09 03:28:31 +03:00
public function __construct(mixed $data) {
$this->_data = $data;
2023-04-04 00:27:32 +03:00
$read = false;
2023-04-09 03:28:31 +03:00
foreach ((array) explode(PHP_EOL, (string) $data) as $row) {
2023-04-04 00:27:32 +03:00
$row = strtolower(trim($row));
// User-agent * begin
if (preg_match('!^user-agent:\s?\*!', $row)) {
$read = true;
continue;
}
if ($read) {
$part = explode(' ', $row);
if (isset($part[0]) && isset($part[1])) {
if (false !== strpos($part[0], 'allow')) {
2023-04-04 01:38:32 +03:00
$this->_rule[$this->_regex(trim($part[1]))] = true;
2023-04-04 00:27:32 +03:00
}
if (false !== strpos($part[0], 'disallow')) {
2023-04-04 01:38:32 +03:00
$this->_rule[$this->_regex(trim($part[1]))] = false;
2023-04-04 00:27:32 +03:00
}
}
}
// User-agent * end
if ($read && preg_match('!^user-agent:!', $row)) {
break;
}
}
}
public function uriAllowed(string $uri) {
2023-04-04 00:27:32 +03:00
// Unify case match
$uri = strtolower(trim($uri));
2023-04-04 00:27:32 +03:00
// Index by default
$result = true;
// Begin index rules by ASC priority
2023-04-04 01:38:32 +03:00
foreach ($this->_rule as $rule => $value) {
2023-04-04 00:27:32 +03:00
if (preg_match('!^' . $rule . '!', $uri)) {
2023-04-04 00:27:32 +03:00
2023-04-04 01:38:32 +03:00
$result = $value;
2023-04-04 00:27:32 +03:00
}
}
return $result;
}
2023-04-09 03:28:31 +03:00
public function append(string $key, string $value) {
if (!preg_match('!^user-agent:\s?\*!', strtolower(trim($this->_data)))) {
$this->_data .= PHP_EOL . 'User-agent: *' . PHP_EOL;
}
if (false === stripos($this->_data, PHP_EOL . $key . ' ' . $value)) {
$this->_data .= PHP_EOL . $key . ' ' . $value;
}
}
public function getData() {
return $this->_data;
}
2023-04-04 00:27:32 +03:00
private function _regex(string $string) {
return str_replace(
[
'*',
'?'
],
[
'.*',
'\?'
],
$string
);
}
}