Browse Source

add debug levels

main
ghost 1 year ago
parent
commit
87ca594860
  1. 9
      example/config.json
  2. 302
      src/cli/document/crawl.php

9
example/config.json

@ -102,6 +102,15 @@ @@ -102,6 +102,15 @@
{
"crawl":
{
"debug":
{
"level":
{
"notice":true,
"warning":true,
"error":true
}
},
"curl":
{
"connection":

302
src/cli/document/crawl.php

@ -1,5 +1,18 @@ @@ -1,5 +1,18 @@
<?php
// Debug
$microtime = microtime(true);
// Load dependencies
require_once __DIR__ . '/../../../vendor/autoload.php';
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
// Prevent multi-thread execution
$semaphore = sem_get(
crc32(
@ -10,18 +23,16 @@ $semaphore = sem_get( @@ -10,18 +23,16 @@ $semaphore = sem_get(
if (false === sem_acquire($semaphore, true))
{
exit ('process execution locked by another thread!' . PHP_EOL);
}
// Load dependencies
require_once __DIR__ . '/../../../vendor/autoload.php';
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] process execution locked by another thread!') . PHP_EOL,
date('c')
);
}
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
exit;
}
// Set global options
define(
@ -30,40 +41,69 @@ define( @@ -30,40 +41,69 @@ define(
);
// Init client
$client = new \Manticoresearch\Client(
[
'host' => $config->manticore->server->host,
'port' => $config->manticore->server->port,
]
);
try {
// Init search
$search = new \Manticoresearch\Search(
$client
);
$client = new \Manticoresearch\Client(
[
'host' => $config->manticore->server->host,
'port' => $config->manticore->server->port,
]
);
$search->setIndex(
$config->manticore->index->document->name
);
// Init search
$search = new \Manticoresearch\Search(
$client
);
$search->match(
'*',
'url'
);
$search->setIndex(
$config->manticore->index->document->name
);
$search->sort(
'time',
'asc'
);
$search->match(
'*',
'url'
);
$search->limit(
$config->cli->document->crawl->queue->limit
);
$search->sort(
'time',
'asc'
);
// Init index
$index = $client->index(
$config->manticore->index->document->name
);
$search->limit(
$config->cli->document->crawl->queue->limit
);
// Init index
$index = $client->index(
$config->manticore->index->document->name
);
}
catch (Exception $exception)
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] %s') . PHP_EOL,
date('c'),
print_r(
$exception,
true
)
);
}
exit;
}
// Debug totals
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] crawl queue begin...') . PHP_EOL,
date('c')
);
}
// Begin queue
foreach($search->get() as $document)
@ -86,7 +126,8 @@ foreach($search->get() as $document) @@ -86,7 +126,8 @@ foreach($search->get() as $document)
// Debug target
echo sprintf(
'index "%s" in "%s"' . PHP_EOL,
_('[%s] index "%s" in "%s"') . PHP_EOL,
date('c'),
$document->get('url'),
$config->manticore->index->document->name
);
@ -296,14 +337,18 @@ foreach($search->get() as $document) @@ -296,14 +337,18 @@ foreach($search->get() as $document)
if ($skip)
{
echo sprintf(
'skip "%s" by stripos condition "%s"' . PHP_EOL,
$url,
print_r(
$config->cli->document->crawl->skip->stripos->url,
true
)
);
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] skip "%s" by stripos condition "%s"') . PHP_EOL,
date('c'),
$url,
print_r(
$config->cli->document->crawl->skip->stripos->url,
true
)
);
}
continue;
}
@ -325,11 +370,15 @@ foreach($search->get() as $document) @@ -325,11 +370,15 @@ foreach($search->get() as $document)
]
);
echo sprintf(
'add "%s" to "%s"' . PHP_EOL,
$url,
$config->manticore->index->document->name
);
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] add "%s" to "%s"') . PHP_EOL,
date('c'),
$url,
$config->manticore->index->document->name
);
}
}
}
}
@ -343,18 +392,22 @@ foreach($search->get() as $document) @@ -343,18 +392,22 @@ foreach($search->get() as $document)
);
// Debug result
echo sprintf(
'index "%s" updated: %s %s' . PHP_EOL,
$config->manticore->index->document->name,
print_r(
$result,
true
),
print_r(
$data,
true
),
);
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
'[%s] [notice] index "%s" updated: %s %s' . PHP_EOL,
date('c'),
$config->manticore->index->document->name,
print_r(
$result,
true
),
print_r(
$data,
true
),
);
}
// Create snap
if ($config->cli->document->crawl->snap->enabled && $code === 200)
@ -477,10 +530,18 @@ foreach($search->get() as $document) @@ -477,10 +530,18 @@ foreach($search->get() as $document)
)
);
copy(
$tmp,
$filename
);
if (!copy($tmp, $filename))
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not copy "%" to "%" on local storage') . PHP_EOL,
date('c'),
$tmp,
$filename
);
}
}
}
}
@ -557,10 +618,19 @@ foreach($search->get() as $document) @@ -557,10 +618,19 @@ foreach($search->get() as $document)
true
);
$remote->copy(
$tmp,
$filename
);
if (!$remote->copy($tmp, $filename))
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not copy "%" to "%" on destination "%s"') . PHP_EOL,
date('c'),
$tmp,
$filename,
$ftp->connection->host,
);
}
}
$remote->close();
@ -574,15 +644,28 @@ foreach($search->get() as $document) @@ -574,15 +644,28 @@ foreach($search->get() as $document)
}
// Log event
echo sprintf(
_('[attempt: %s] wait for remote storage "%s" reconnection...') . PHP_EOL,
$attempt++,
$ftp->connection->host,
);
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] attempt: %s, wait for remote storage "%s" reconnection...') . PHP_EOL,
date('c'),
$attempt++,
$ftp->connection->host,
);
}
// Delay next attempt
if ($ftp->connection->attempts->delay)
{
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] pending %s seconds to reconnect...') . PHP_EOL,
date('c'),
$ftp->connection->attempts->delay
);
}
sleep(
$ftp->connection->attempts->delay
);
@ -593,25 +676,72 @@ foreach($search->get() as $document) @@ -593,25 +676,72 @@ foreach($search->get() as $document)
}
// Remove tmp data
@unlink(
$tmp
);
if (unlink($tmp))
{
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] remove tmp snap file %s') . PHP_EOL,
date('c'),
$tmp
);
}
}
else
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not remove tmp snap file %s') . PHP_EOL,
date('c'),
$tmp
);
}
}
}
catch (Exception $exception)
{
var_dump(
$exception
);
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] %s') . PHP_EOL,
date('c'),
print_r(
$exception,
true
)
);
}
}
}
}
// Crawl queue delay
if ($config->cli->document->crawl->queue->limit)
if ($config->cli->document->crawl->queue->delay)
{
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] pending %s seconds...') . PHP_EOL,
date('c'),
$config->cli->document->crawl->queue->delay
);
}
sleep(
$config->cli->document->crawl->queue->limit
$config->cli->document->crawl->queue->delay
);
}
// Debug totals
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] crawl queue completed in %s') . PHP_EOL,
date('c'),
microtime(true) - $microtime
);
}
}
Loading…
Cancel
Save