Browse Source

add stripos url rules for crawl snaps

main
ghost 1 year ago
parent
commit
35ad144a9e
  1. 66
      example/config.json
  2. 60
      src/cli/document/crawl.php

66
example/config.json

@ -192,24 +192,34 @@ @@ -192,24 +192,34 @@
"directory":"storage/tmp/snap"
},
"local":{
"enabled":true,
"enabled":false,
"directory":"storage/snap",
"size":
{
"max":10000024
},
"mime":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
{
"stripos":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"url":
{
"stripos":
[
"http"
]
}
},
"remote":
{
@ -237,17 +247,27 @@ @@ -237,17 +247,27 @@
"max":10000024
},
"mime":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
{
"stripos":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"url":
{
"stripos":
[
"http"
]
}
}
]
}

60
src/cli/document/crawl.php

@ -479,10 +479,10 @@ foreach($search->get() as $document) @@ -479,10 +479,10 @@ foreach($search->get() as $document)
// Copy to local storage on enabled
if ($config->snap->storage->local->enabled)
{
// Check for mime allowed
$allowed = false;
// Check for mime allowed
foreach ($config->snap->storage->local->mime as $whitelist)
foreach ($config->snap->storage->local->mime->stripos as $whitelist)
{
if (false !== stripos($mime, $whitelist))
{
@ -491,10 +491,30 @@ foreach($search->get() as $document) @@ -491,10 +491,30 @@ foreach($search->get() as $document)
}
}
// Check size limits
if ($size > $config->snap->storage->local->size->max)
// Check for url allowed
if ($allowed)
{
$allowed = false;
foreach ($config->snap->storage->local->url->stripos as $whitelist)
{
if (false !== stripos($document->get('url'), $whitelist))
{
$allowed = true;
break;
}
}
// Check size limits
if ($allowed)
{
$allowed = false;
if ($size <= $config->snap->storage->local->size->max)
{
$allowed = true;
}
}
}
// Copy snap to the permanent storage
@ -558,10 +578,10 @@ foreach($search->get() as $document) @@ -558,10 +578,10 @@ foreach($search->get() as $document)
continue;
}
// Check for mime allowed
$allowed = false;
// Check for mime allowed
foreach ($ftp->mime as $whitelist)
foreach ($ftp->mime->stripos as $whitelist)
{
if (false !== stripos($mime, $whitelist))
{
@ -570,10 +590,34 @@ foreach($search->get() as $document) @@ -570,10 +590,34 @@ foreach($search->get() as $document)
}
}
if (!$allowed)
{
continue;
}
// Check for url allowed
$allowed = false;
foreach ($ftp->url->stripos as $whitelist)
{
if (false !== stripos($document->get('url'), $whitelist))
{
$allowed = true;
break;
}
}
if (!$allowed)
{
continue;
}
// Check size limits
if ($size > $ftp->size->max)
$allowed = false;
if ($size <= $ftp->size->max)
{
$allowed = false;
$allowed = true;
}
if (!$allowed)

Loading…
Cancel
Save