mirror of
https://github.com/YGGverse/Yo.git
synced 2025-02-05 11:24:20 +00:00
add skip url filter by stripos condition
This commit is contained in:
parent
ee074b684a
commit
3306dc1961
@ -99,6 +99,20 @@
|
|||||||
"regex":"/.*/ui"
|
"regex":"/.*/ui"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"skip":
|
||||||
|
{
|
||||||
|
"stripos":
|
||||||
|
{
|
||||||
|
"url":
|
||||||
|
[
|
||||||
|
"#",
|
||||||
|
"javascript:",
|
||||||
|
"mailto:",
|
||||||
|
"magnet:",
|
||||||
|
"xmpp:"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"snap":
|
"snap":
|
||||||
{
|
{
|
||||||
"enabled":true
|
"enabled":true
|
||||||
|
@ -79,6 +79,8 @@ foreach ($delete as $crc32url => $ids)
|
|||||||
// Free mem
|
// Free mem
|
||||||
$delete = [];
|
$delete = [];
|
||||||
|
|
||||||
|
// @TODO $config->cli->document->crawl->skip->stripos->url
|
||||||
|
|
||||||
// Dump operation result
|
// Dump operation result
|
||||||
echo sprintf(
|
echo sprintf(
|
||||||
_('duplicated URLs deleted: %s') . PHP_EOL,
|
_('duplicated URLs deleted: %s') . PHP_EOL,
|
||||||
|
@ -298,6 +298,22 @@ foreach($search->get() as $document)
|
|||||||
{
|
{
|
||||||
foreach (array_unique($documents) as $url)
|
foreach (array_unique($documents) as $url)
|
||||||
{
|
{
|
||||||
|
// Apply stripos condition
|
||||||
|
$skip = false;
|
||||||
|
|
||||||
|
foreach ($config->cli->document->crawl->skip->stripos->url as $condition)
|
||||||
|
{
|
||||||
|
if (false !== stripos($url, $condition)) {
|
||||||
|
|
||||||
|
$skip = true;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($skip) continue;
|
||||||
|
|
||||||
|
// Save index
|
||||||
$url = trim($url);
|
$url = trim($url);
|
||||||
$crc32url = crc32($url);
|
$crc32url = crc32($url);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user