mirror of
https://github.com/YGGverse/Yo.git
synced 2025-02-09 21:34:22 +00:00
use yo-tools-php library
This commit is contained in:
parent
672b789328
commit
c492a98094
@ -20,6 +20,7 @@
|
|||||||
"jdenticon/jdenticon": "^1.0",
|
"jdenticon/jdenticon": "^1.0",
|
||||||
"yggverse/ftp": "^1.0",
|
"yggverse/ftp": "^1.0",
|
||||||
"gregwar/captcha": "^1.2",
|
"gregwar/captcha": "^1.2",
|
||||||
"yggverse/net": "^1.2"
|
"yggverse/net": "^1.2",
|
||||||
|
"yggverse/yo-tools": "^0.1.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,37 +6,6 @@ $microtime = microtime(true);
|
|||||||
// Load dependencies
|
// Load dependencies
|
||||||
require_once __DIR__ . '/../../../vendor/autoload.php';
|
require_once __DIR__ . '/../../../vendor/autoload.php';
|
||||||
|
|
||||||
// Define helpers
|
|
||||||
function getLastSnapTime(array $files): int
|
|
||||||
{
|
|
||||||
$time = [];
|
|
||||||
|
|
||||||
foreach ($files as $file)
|
|
||||||
{
|
|
||||||
if (!str_ends_with($file, '.tar.gz'))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$time[] = preg_replace(
|
|
||||||
'/^([\d]+)\.tar\.gz$/',
|
|
||||||
'$1',
|
|
||||||
basename(
|
|
||||||
$file
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($time)
|
|
||||||
{
|
|
||||||
return max(
|
|
||||||
$time
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Init config
|
// Init config
|
||||||
$config = json_decode(
|
$config = json_decode(
|
||||||
file_get_contents(
|
file_get_contents(
|
||||||
@ -519,10 +488,6 @@ foreach($index->search('')
|
|||||||
// Crawl documents
|
// Crawl documents
|
||||||
$documents = [];
|
$documents = [];
|
||||||
|
|
||||||
$scheme = parse_url($document->get('url'), PHP_URL_SCHEME);
|
|
||||||
$host = parse_url($document->get('url'), PHP_URL_HOST);
|
|
||||||
$port = parse_url($document->get('url'), PHP_URL_PORT);
|
|
||||||
|
|
||||||
foreach ($config->cli->document->crawl->selector as $selector => $settings)
|
foreach ($config->cli->document->crawl->selector as $selector => $settings)
|
||||||
{
|
{
|
||||||
foreach ($crawler->filter($selector)->each(function($node) {
|
foreach ($crawler->filter($selector)->each(function($node) {
|
||||||
@ -534,25 +499,13 @@ foreach($index->search('')
|
|||||||
if ($url = $value->attr($settings->attribute))
|
if ($url = $value->attr($settings->attribute))
|
||||||
{
|
{
|
||||||
//Make relative links absolute
|
//Make relative links absolute
|
||||||
if (!parse_url($url, PHP_URL_HOST))
|
$url = \Yggverse\YoTools\Link::relative2absolute(
|
||||||
{
|
$document->get('url'),
|
||||||
$url = $scheme . '://' . $host . ($port ? ':' . $port : null) .
|
$url,
|
||||||
'/' .
|
$scheme,
|
||||||
trim(
|
$host,
|
||||||
ltrim(
|
$port,
|
||||||
str_replace(
|
);
|
||||||
[
|
|
||||||
'./',
|
|
||||||
'../'
|
|
||||||
],
|
|
||||||
'',
|
|
||||||
$url
|
|
||||||
),
|
|
||||||
'/'
|
|
||||||
),
|
|
||||||
'.'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Regex rules
|
// Regex rules
|
||||||
if (!preg_match($settings->regex, $url))
|
if (!preg_match($settings->regex, $url))
|
||||||
@ -834,7 +787,7 @@ foreach($index->search('')
|
|||||||
@mkdir($filepath, 0755, true);
|
@mkdir($filepath, 0755, true);
|
||||||
|
|
||||||
// Check latest snap older than defined in settings
|
// Check latest snap older than defined in settings
|
||||||
if (time() - getLastSnapTime((array) scandir($filepath)) > $config->cli->document->crawl->snap->timeout)
|
if (time() - \Yggverse\YoTools\Snap::getTimeLast((array) scandir($filepath)) > $config->cli->document->crawl->snap->timeout)
|
||||||
{
|
{
|
||||||
$filename = sprintf(
|
$filename = sprintf(
|
||||||
'%s/%s',
|
'%s/%s',
|
||||||
@ -1003,7 +956,7 @@ foreach($index->search('')
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Check latest snap older than defined in settings
|
// Check latest snap older than defined in settings
|
||||||
if (time() - getLastSnapTime((array) $remote->nlist($filepath)) > $config->cli->document->crawl->snap->timeout)
|
if (time() - \Yggverse\YoTools\Snap::getTimeLast((array) $remote->nlist($filepath)) > $config->cli->document->crawl->snap->timeout)
|
||||||
{
|
{
|
||||||
if ($remote->copy($tmp, $filename))
|
if ($remote->copy($tmp, $filename))
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user