Yo/src/cli/document/clean.php

76 lines
1.6 KiB
PHP
Raw Normal View History

2023-11-27 19:29:17 +02:00
<?php
2023-11-27 19:31:03 +02:00
// Prevent multi-thread execution
2023-11-30 00:51:42 +02:00
$semaphore = sem_get(
crc32(
__DIR__ . '.yo.cli.document.clean'
),
1
);
2023-11-27 19:31:03 +02:00
2023-11-27 19:34:14 +02:00
if (false === sem_acquire($semaphore, true))
{
exit ('process execution locked by another thread!' . PHP_EOL);
}
2023-11-27 19:29:17 +02:00
// Load dependencies
require_once __DIR__ . '/../../../vendor/autoload.php';
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
// Init client
$client = new \Manticoresearch\Client(
[
'host' => $config->manticore->server->host,
'port' => $config->manticore->server->port,
]
);
// Init index
$index = $client->index(
$config->manticore->index->document->name
);
// Apply new configuration rules
2024-03-20 22:41:12 +02:00
/*
@TODO this case removes some not relevant records, the solution still wanted:
https://github.com/manticoresoftware/manticoresearch-php/discussions/196
echo _('apply new configuration rules...') . PHP_EOL;
foreach ($config->cli->document->crawl->skip->stripos->url as $condition)
{
echo sprintf(
_('cleanup documents with url that contain substring "%s"...') . PHP_EOL,
$condition
);
$result = $index->deleteDocuments(
2024-03-20 22:35:33 +02:00
[
'match' =>
[
'url' => $condition
]
]
);
echo sprintf(
_('documents deleted: %d') . PHP_EOL,
$result['deleted']
);
}
echo _('new configuration rules apply completed.') . PHP_EOL;
2024-03-20 22:41:12 +02:00
*/
2023-11-27 19:29:17 +02:00
// Optimize indexes
echo _('indexes optimization begin...') . PHP_EOL;
2023-11-27 19:29:17 +02:00
$index->optimize();
echo _('indexes optimization completed.') . PHP_EOL;