diff --git a/example/config.json b/example/config.json index 36a44fd..f4dc7bb 100644 --- a/example/config.json +++ b/example/config.json @@ -192,24 +192,34 @@ "directory":"storage/tmp/snap" }, "local":{ - "enabled":true, + "enabled":false, "directory":"storage/snap", "size": { "max":10000024 }, "mime": - [ - "application/xhtml+xml", - "application/javascript", - "text/html", - "text/plain", - "text/css", - "image/webp", - "image/png", - "image/gif", - "image/ico" - ] + { + "stripos": + [ + "application/xhtml+xml", + "application/javascript", + "text/html", + "text/plain", + "text/css", + "image/webp", + "image/png", + "image/gif", + "image/ico" + ] + }, + "url": + { + "stripos": + [ + "http" + ] + } }, "remote": { @@ -237,17 +247,27 @@ "max":10000024 }, "mime": - [ - "application/xhtml+xml", - "application/javascript", - "text/html", - "text/plain", - "text/css", - "image/webp", - "image/png", - "image/gif", - "image/ico" - ] + { + "stripos": + [ + "application/xhtml+xml", + "application/javascript", + "text/html", + "text/plain", + "text/css", + "image/webp", + "image/png", + "image/gif", + "image/ico" + ] + }, + "url": + { + "stripos": + [ + "http" + ] + } } ] } diff --git a/src/cli/document/crawl.php b/src/cli/document/crawl.php index d368366..471c86a 100644 --- a/src/cli/document/crawl.php +++ b/src/cli/document/crawl.php @@ -479,10 +479,10 @@ foreach($search->get() as $document) // Copy to local storage on enabled if ($config->snap->storage->local->enabled) { + // Check for mime allowed $allowed = false; - // Check for mime allowed - foreach ($config->snap->storage->local->mime as $whitelist) + foreach ($config->snap->storage->local->mime->stripos as $whitelist) { if (false !== stripos($mime, $whitelist)) { @@ -491,10 +491,30 @@ foreach($search->get() as $document) } } - // Check size limits - if ($size > $config->snap->storage->local->size->max) + // Check for url allowed + if ($allowed) { $allowed = false; + + foreach ($config->snap->storage->local->url->stripos as $whitelist) + { + if (false !== stripos($document->get('url'), $whitelist)) + { + $allowed = true; + break; + } + } + + // Check size limits + if ($allowed) + { + $allowed = false; + + if ($size <= $config->snap->storage->local->size->max) + { + $allowed = true; + } + } } // Copy snap to the permanent storage @@ -558,10 +578,10 @@ foreach($search->get() as $document) continue; } + // Check for mime allowed $allowed = false; - // Check for mime allowed - foreach ($ftp->mime as $whitelist) + foreach ($ftp->mime->stripos as $whitelist) { if (false !== stripos($mime, $whitelist)) { @@ -570,10 +590,34 @@ foreach($search->get() as $document) } } + if (!$allowed) + { + continue; + } + + // Check for url allowed + $allowed = false; + + foreach ($ftp->url->stripos as $whitelist) + { + if (false !== stripos($document->get('url'), $whitelist)) + { + $allowed = true; + break; + } + } + + if (!$allowed) + { + continue; + } + // Check size limits - if ($size > $ftp->size->max) + $allowed = false; + + if ($size <= $ftp->size->max) { - $allowed = false; + $allowed = true; } if (!$allowed)