mirror of
https://github.com/YGGverse/Yo.git
synced 2025-02-05 03:14:33 +00:00
add skip url filter by stripos condition
This commit is contained in:
parent
ee074b684a
commit
3306dc1961
@ -99,6 +99,20 @@
|
||||
"regex":"/.*/ui"
|
||||
}
|
||||
},
|
||||
"skip":
|
||||
{
|
||||
"stripos":
|
||||
{
|
||||
"url":
|
||||
[
|
||||
"#",
|
||||
"javascript:",
|
||||
"mailto:",
|
||||
"magnet:",
|
||||
"xmpp:"
|
||||
]
|
||||
}
|
||||
},
|
||||
"snap":
|
||||
{
|
||||
"enabled":true
|
||||
|
@ -79,6 +79,8 @@ foreach ($delete as $crc32url => $ids)
|
||||
// Free mem
|
||||
$delete = [];
|
||||
|
||||
// @TODO $config->cli->document->crawl->skip->stripos->url
|
||||
|
||||
// Dump operation result
|
||||
echo sprintf(
|
||||
_('duplicated URLs deleted: %s') . PHP_EOL,
|
||||
|
@ -298,6 +298,22 @@ foreach($search->get() as $document)
|
||||
{
|
||||
foreach (array_unique($documents) as $url)
|
||||
{
|
||||
// Apply stripos condition
|
||||
$skip = false;
|
||||
|
||||
foreach ($config->cli->document->crawl->skip->stripos->url as $condition)
|
||||
{
|
||||
if (false !== stripos($url, $condition)) {
|
||||
|
||||
$skip = true;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($skip) continue;
|
||||
|
||||
// Save index
|
||||
$url = trim($url);
|
||||
$crc32url = crc32($url);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user