Browse Source

add collision events debug

main
yggverse 2 months ago
parent
commit
27564c4fbc
  1. 57
      src/cli/document/crawl.php

57
src/cli/document/crawl.php

@ -603,19 +603,22 @@ foreach($index->search('') @@ -603,19 +603,22 @@ foreach($index->search('')
}
// Save index
$url = trim($url);
$crc32url = crc32($url);
if (!$index->search('')
->filter('id', $crc32url)
->limit(1)
->get()
->getTotal())
{
$url = trim(
$url
);
$crc32url = crc32(
$url
);
// Check url does not registered yet
$results = $index->search('')->filter('id', $crc32url)->get();
if (!$results->getTotal())
{
$index->addDocument(
[
'url' => $url
'url' => $url
],
$crc32url
);
@ -630,6 +633,40 @@ foreach($index->search('') @@ -630,6 +633,40 @@ foreach($index->search('')
);
}
}
// URL already exists
else
{
// Print notice level notice
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] URL "%s" already registered with CRC32 "%d"') . PHP_EOL,
date('c'),
$url,
$crc32url
);
}
// Check for event details
foreach ($results as $result)
{
// Is collision
if ($url != $result->get('url'))
{
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] ID "%d" collision for target url "%s" stored as "%s"') . PHP_EOL,
date('c'),
$crc32url,
$url,
$result->get('url')
);
}
}
}
}
}
}
}

Loading…
Cancel
Save