Browse Source

add crc32url filter

main
ghost 1 year ago
parent
commit
dfb2c06738
  1. 14
      src/cli/document/add.php
  2. 7
      src/cli/document/crawl.php
  3. 4
      src/cli/index/init.php
  4. 44
      src/webui/search.php

14
src/cli/document/add.php

@ -23,8 +23,13 @@ $index = $client->index( @@ -23,8 +23,13 @@ $index = $client->index(
$config->manticore->index->document->name
);
// Prepare URL
$url = trim($argv[1]);
$crc32url = crc32($url);
// Check URL for exist
$result = $index->search('@url "' . trim($argv[1]) . '"')
$result = $index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1)
->get();
@ -32,7 +37,7 @@ if ($result->getTotal()) @@ -32,7 +37,7 @@ if ($result->getTotal())
{
echo sprintf(
'URL "%s" already exists in "%s" index!' . PHP_EOL,
$argv[1],
$url,
$config->manticore->index->document->name
);
@ -42,13 +47,14 @@ if ($result->getTotal()) @@ -42,13 +47,14 @@ if ($result->getTotal())
// Add
$result = $index->addDocument(
[
'url' => trim($argv[1])
'url' => $url,
'crc32url' => $crc32url
]
);
echo sprintf(
'URL "%s" added to "%s" index: %s' . PHP_EOL,
$argv[1],
$url,
$config->manticore->index->document->name,
print_r(
$result,

7
src/cli/document/crawl.php

@ -252,16 +252,19 @@ foreach($search->get() as $document) @@ -252,16 +252,19 @@ foreach($search->get() as $document)
{
foreach (array_unique($documents) as $url)
{
$url = trim($url);
$url = trim($url);
$crc32url = crc32($url);
if (!$index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1)
->get()
->getTotal())
{
$index->addDocument(
[
'url' => $url
'url' => $url,
'crc32url' => $crc32url
]
);

4
src/cli/index/init.php

@ -79,6 +79,10 @@ $result = $index->create( @@ -79,6 +79,10 @@ $result = $index->create(
'time' =>
[
'type' => 'integer'
],
'crc32url' =>
[
'type' => 'bigint'
]
],
(array) $config->manticore->index->document->settings

44
src/webui/search.php

@ -69,7 +69,7 @@ $placeholder = plural( @@ -69,7 +69,7 @@ $placeholder = plural(
$response = false;
// Request
$q = !empty($_GET['q']) ? $_GET['q'] : '';
$q = !empty($_GET['q']) ? trim($_GET['q']) : '';
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
// Register new URL by request on enabled
@ -77,20 +77,23 @@ if ($config->webui->search->index->request->url->enabled) @@ -77,20 +77,23 @@ if ($config->webui->search->index->request->url->enabled)
{
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match($config->webui->search->index->request->url->regex, $q))
{
$url = trim($q);
// Prepare URL
$url = $q;
$crc32url = crc32($url);
// Check URL for exist
$exist = $index->search('@url "' . trim($url) . '"')
->limit(1)
->get()
->getTotal();
$exist = $index->search('@url "' . $url . '"')
->filter('crc32url', $crc32url)
->limit(1)
->get()
->getTotal();
if ($exist)
{
/* disable as regular search request possible
$response = sprintf(
_('URL "%s" exists in search index'),
htmlentities($url)
htmlentities($q)
);
*/
}
@ -98,36 +101,39 @@ if ($config->webui->search->index->request->url->enabled) @@ -98,36 +101,39 @@ if ($config->webui->search->index->request->url->enabled)
// Add URL
else
{
// @TODO check http code
$index->addDocument(
[
'url' => trim($url)
'url' => $url,
'crc32url' => $crc32url
]
);
$response = sprintf(
_('URL "%s" added to the crawl queue!'),
htmlentities($url)
htmlentities($q)
);
}
}
}
// Extended syntax corrections
$query = trim($q);
if (filter_var($q, FILTER_VALIDATE_URL))
switch (true)
{
$query = '@url "' . $q . '"';
}
case filter_var($q, FILTER_VALIDATE_URL):
elseif (false === strpos($q, '"'))
{
$query = '"' . $q . '"';
$query = $index->search('@url "' . $q . '"')->filter('crc32url', crc32($q));
break;
default:
$query = $index->search($q);
}
// Search request begin
$results = $index->search($query)
->offset($p * $config->webui->pagination->limit - $config->webui->pagination->limit)
$results = $query->offset($p * $config->webui->pagination->limit - $config->webui->pagination->limit)
->limit($config->webui->pagination->limit)
->get();

Loading…
Cancel
Save