Browse Source

add crc32url filter

main
ghost 1 year ago
parent
commit
dfb2c06738
  1. 14
      src/cli/document/add.php
  2. 7
      src/cli/document/crawl.php
  3. 4
      src/cli/index/init.php
  4. 44
      src/webui/search.php

14
src/cli/document/add.php

@ -23,8 +23,13 @@ $index = $client->index(
$config->manticore->index->document->name $config->manticore->index->document->name
); );
// Prepare URL
$url = trim($argv[1]);
$crc32url = crc32($url);
// Check URL for exist // Check URL for exist
$result = $index->search('@url "' . trim($argv[1]) . '"') $result = $index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1) ->limit(1)
->get(); ->get();
@ -32,7 +37,7 @@ if ($result->getTotal())
{ {
echo sprintf( echo sprintf(
'URL "%s" already exists in "%s" index!' . PHP_EOL, 'URL "%s" already exists in "%s" index!' . PHP_EOL,
$argv[1], $url,
$config->manticore->index->document->name $config->manticore->index->document->name
); );
@ -42,13 +47,14 @@ if ($result->getTotal())
// Add // Add
$result = $index->addDocument( $result = $index->addDocument(
[ [
'url' => trim($argv[1]) 'url' => $url,
'crc32url' => $crc32url
] ]
); );
echo sprintf( echo sprintf(
'URL "%s" added to "%s" index: %s' . PHP_EOL, 'URL "%s" added to "%s" index: %s' . PHP_EOL,
$argv[1], $url,
$config->manticore->index->document->name, $config->manticore->index->document->name,
print_r( print_r(
$result, $result,

7
src/cli/document/crawl.php

@ -252,16 +252,19 @@ foreach($search->get() as $document)
{ {
foreach (array_unique($documents) as $url) foreach (array_unique($documents) as $url)
{ {
$url = trim($url); $url = trim($url);
$crc32url = crc32($url);
if (!$index->search('@url "' . $url . '"') if (!$index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1) ->limit(1)
->get() ->get()
->getTotal()) ->getTotal())
{ {
$index->addDocument( $index->addDocument(
[ [
'url' => $url 'url' => $url,
'crc32url' => $crc32url
] ]
); );

4
src/cli/index/init.php

@ -79,6 +79,10 @@ $result = $index->create(
'time' => 'time' =>
[ [
'type' => 'integer' 'type' => 'integer'
],
'crc32url' =>
[
'type' => 'bigint'
] ]
], ],
(array) $config->manticore->index->document->settings (array) $config->manticore->index->document->settings

44
src/webui/search.php

@ -69,7 +69,7 @@ $placeholder = plural(
$response = false; $response = false;
// Request // Request
$q = !empty($_GET['q']) ? $_GET['q'] : ''; $q = !empty($_GET['q']) ? trim($_GET['q']) : '';
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1; $p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
// Register new URL by request on enabled // Register new URL by request on enabled
@ -77,20 +77,23 @@ if ($config->webui->search->index->request->url->enabled)
{ {
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match($config->webui->search->index->request->url->regex, $q)) if (filter_var($q, FILTER_VALIDATE_URL) && preg_match($config->webui->search->index->request->url->regex, $q))
{ {
$url = trim($q); // Prepare URL
$url = $q;
$crc32url = crc32($url);
// Check URL for exist // Check URL for exist
$exist = $index->search('@url "' . trim($url) . '"') $exist = $index->search('@url "' . $url . '"')
->limit(1) ->filter('crc32url', $crc32url)
->get() ->limit(1)
->getTotal(); ->get()
->getTotal();
if ($exist) if ($exist)
{ {
/* disable as regular search request possible /* disable as regular search request possible
$response = sprintf( $response = sprintf(
_('URL "%s" exists in search index'), _('URL "%s" exists in search index'),
htmlentities($url) htmlentities($q)
); );
*/ */
} }
@ -98,36 +101,39 @@ if ($config->webui->search->index->request->url->enabled)
// Add URL // Add URL
else else
{ {
// @TODO check http code
$index->addDocument( $index->addDocument(
[ [
'url' => trim($url) 'url' => $url,
'crc32url' => $crc32url
] ]
); );
$response = sprintf( $response = sprintf(
_('URL "%s" added to the crawl queue!'), _('URL "%s" added to the crawl queue!'),
htmlentities($url) htmlentities($q)
); );
} }
} }
} }
// Extended syntax corrections // Extended syntax corrections
$query = trim($q); switch (true)
if (filter_var($q, FILTER_VALIDATE_URL))
{ {
$query = '@url "' . $q . '"'; case filter_var($q, FILTER_VALIDATE_URL):
}
elseif (false === strpos($q, '"')) $query = $index->search('@url "' . $q . '"')->filter('crc32url', crc32($q));
{
$query = '"' . $q . '"'; break;
default:
$query = $index->search($q);
} }
// Search request begin // Search request begin
$results = $index->search($query) $results = $query->offset($p * $config->webui->pagination->limit - $config->webui->pagination->limit)
->offset($p * $config->webui->pagination->limit - $config->webui->pagination->limit)
->limit($config->webui->pagination->limit) ->limit($config->webui->pagination->limit)
->get(); ->get();

Loading…
Cancel
Save