Browse Source

add collision events debug

main
yggverse 8 months ago
parent
commit
27564c4fbc
  1. 53
      src/cli/document/crawl.php

53
src/cli/document/crawl.php

@ -603,16 +603,19 @@ foreach($index->search('')
} }
// Save index // Save index
$url = trim($url); $url = trim(
$crc32url = crc32($url); $url
);
if (!$index->search('') $crc32url = crc32(
->filter('id', $crc32url) $url
->limit(1) );
->get()
->getTotal()) // Check url does not registered yet
{ $results = $index->search('')->filter('id', $crc32url)->get();
if (!$results->getTotal())
{
$index->addDocument( $index->addDocument(
[ [
'url' => $url 'url' => $url
@ -630,6 +633,40 @@ foreach($index->search('')
); );
} }
} }
// URL already exists
else
{
// Print notice level notice
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] URL "%s" already registered with CRC32 "%d"') . PHP_EOL,
date('c'),
$url,
$crc32url
);
}
// Check for event details
foreach ($results as $result)
{
// Is collision
if ($url != $result->get('url'))
{
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] ID "%d" collision for target url "%s" stored as "%s"') . PHP_EOL,
date('c'),
$crc32url,
$url,
$result->get('url')
);
}
}
}
}
} }
} }
} }

Loading…
Cancel
Save