Browse Source

add collision events debug

main
yggverse 8 months ago
parent
commit
27564c4fbc
  1. 57
      src/cli/document/crawl.php

57
src/cli/document/crawl.php

@ -603,19 +603,22 @@ foreach($index->search('')
} }
// Save index // Save index
$url = trim($url); $url = trim(
$crc32url = crc32($url); $url
);
if (!$index->search('')
->filter('id', $crc32url) $crc32url = crc32(
->limit(1) $url
->get() );
->getTotal())
{
// Check url does not registered yet
$results = $index->search('')->filter('id', $crc32url)->get();
if (!$results->getTotal())
{
$index->addDocument( $index->addDocument(
[ [
'url' => $url 'url' => $url
], ],
$crc32url $crc32url
); );
@ -630,6 +633,40 @@ foreach($index->search('')
); );
} }
} }
// URL already exists
else
{
// Print notice level notice
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] URL "%s" already registered with CRC32 "%d"') . PHP_EOL,
date('c'),
$url,
$crc32url
);
}
// Check for event details
foreach ($results as $result)
{
// Is collision
if ($url != $result->get('url'))
{
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] ID "%d" collision for target url "%s" stored as "%s"') . PHP_EOL,
date('c'),
$crc32url,
$url,
$result->get('url')
);
}
}
}
}
} }
} }
} }

Loading…
Cancel
Save