Browse Source

add debug levels

main
ghost 12 months ago
parent
commit
87ca594860
  1. 9
      example/config.json
  2. 210
      src/cli/document/crawl.php

9
example/config.json

@ -102,6 +102,15 @@
{ {
"crawl": "crawl":
{ {
"debug":
{
"level":
{
"notice":true,
"warning":true,
"error":true
}
},
"curl": "curl":
{ {
"connection": "connection":

210
src/cli/document/crawl.php

@ -1,5 +1,18 @@
<?php <?php
// Debug
$microtime = microtime(true);
// Load dependencies
require_once __DIR__ . '/../../../vendor/autoload.php';
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
// Prevent multi-thread execution // Prevent multi-thread execution
$semaphore = sem_get( $semaphore = sem_get(
crc32( crc32(
@ -10,18 +23,16 @@ $semaphore = sem_get(
if (false === sem_acquire($semaphore, true)) if (false === sem_acquire($semaphore, true))
{ {
exit ('process execution locked by another thread!' . PHP_EOL); if ($config->cli->document->crawl->debug->level->warning)
} {
echo sprintf(
// Load dependencies _('[%s] [warning] process execution locked by another thread!') . PHP_EOL,
require_once __DIR__ . '/../../../vendor/autoload.php'; date('c')
);
}
// Init config exit;
$config = json_decode( }
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
// Set global options // Set global options
define( define(
@ -30,40 +41,69 @@ define(
); );
// Init client // Init client
$client = new \Manticoresearch\Client( try {
$client = new \Manticoresearch\Client(
[ [
'host' => $config->manticore->server->host, 'host' => $config->manticore->server->host,
'port' => $config->manticore->server->port, 'port' => $config->manticore->server->port,
] ]
); );
// Init search // Init search
$search = new \Manticoresearch\Search( $search = new \Manticoresearch\Search(
$client $client
); );
$search->setIndex( $search->setIndex(
$config->manticore->index->document->name $config->manticore->index->document->name
); );
$search->match( $search->match(
'*', '*',
'url' 'url'
); );
$search->sort( $search->sort(
'time', 'time',
'asc' 'asc'
); );
$search->limit( $search->limit(
$config->cli->document->crawl->queue->limit $config->cli->document->crawl->queue->limit
); );
// Init index // Init index
$index = $client->index( $index = $client->index(
$config->manticore->index->document->name $config->manticore->index->document->name
); );
}
catch (Exception $exception)
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] %s') . PHP_EOL,
date('c'),
print_r(
$exception,
true
)
);
}
exit;
}
// Debug totals
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] crawl queue begin...') . PHP_EOL,
date('c')
);
}
// Begin queue // Begin queue
foreach($search->get() as $document) foreach($search->get() as $document)
@ -86,7 +126,8 @@ foreach($search->get() as $document)
// Debug target // Debug target
echo sprintf( echo sprintf(
'index "%s" in "%s"' . PHP_EOL, _('[%s] index "%s" in "%s"') . PHP_EOL,
date('c'),
$document->get('url'), $document->get('url'),
$config->manticore->index->document->name $config->manticore->index->document->name
); );
@ -295,15 +336,19 @@ foreach($search->get() as $document)
} }
if ($skip) if ($skip)
{
if ($config->cli->document->crawl->debug->level->notice)
{ {
echo sprintf( echo sprintf(
'skip "%s" by stripos condition "%s"' . PHP_EOL, _('[%s] [notice] skip "%s" by stripos condition "%s"') . PHP_EOL,
date('c'),
$url, $url,
print_r( print_r(
$config->cli->document->crawl->skip->stripos->url, $config->cli->document->crawl->skip->stripos->url,
true true
) )
); );
}
continue; continue;
} }
@ -325,8 +370,11 @@ foreach($search->get() as $document)
] ]
); );
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf( echo sprintf(
'add "%s" to "%s"' . PHP_EOL, _('[%s] [notice] add "%s" to "%s"') . PHP_EOL,
date('c'),
$url, $url,
$config->manticore->index->document->name $config->manticore->index->document->name
); );
@ -334,6 +382,7 @@ foreach($search->get() as $document)
} }
} }
} }
}
// Replace document data // Replace document data
// https://github.com/manticoresoftware/manticoresearch-php/issues/10#issuecomment-612685916 // https://github.com/manticoresoftware/manticoresearch-php/issues/10#issuecomment-612685916
@ -343,8 +392,11 @@ foreach($search->get() as $document)
); );
// Debug result // Debug result
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf( echo sprintf(
'index "%s" updated: %s %s' . PHP_EOL, '[%s] [notice] index "%s" updated: %s %s' . PHP_EOL,
date('c'),
$config->manticore->index->document->name, $config->manticore->index->document->name,
print_r( print_r(
$result, $result,
@ -355,6 +407,7 @@ foreach($search->get() as $document)
true true
), ),
); );
}
// Create snap // Create snap
if ($config->cli->document->crawl->snap->enabled && $code === 200) if ($config->cli->document->crawl->snap->enabled && $code === 200)
@ -477,12 +530,20 @@ foreach($search->get() as $document)
) )
); );
copy( if (!copy($tmp, $filename))
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not copy "%" to "%" on local storage') . PHP_EOL,
date('c'),
$tmp, $tmp,
$filename $filename
); );
} }
} }
}
}
// Copy to FTP storage on enabled // Copy to FTP storage on enabled
foreach ($config->snap->storage->remote->ftp as $ftp) foreach ($config->snap->storage->remote->ftp as $ftp)
@ -557,10 +618,19 @@ foreach($search->get() as $document)
true true
); );
$remote->copy( if (!$remote->copy($tmp, $filename))
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not copy "%" to "%" on destination "%s"') . PHP_EOL,
date('c'),
$tmp, $tmp,
$filename $filename,
$ftp->connection->host,
); );
}
}
$remote->close(); $remote->close();
@ -574,15 +644,28 @@ foreach($search->get() as $document)
} }
// Log event // Log event
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf( echo sprintf(
_('[attempt: %s] wait for remote storage "%s" reconnection...') . PHP_EOL, _('[%s] [warning] attempt: %s, wait for remote storage "%s" reconnection...') . PHP_EOL,
date('c'),
$attempt++, $attempt++,
$ftp->connection->host, $ftp->connection->host,
); );
}
// Delay next attempt // Delay next attempt
if ($ftp->connection->attempts->delay) if ($ftp->connection->attempts->delay)
{ {
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] pending %s seconds to reconnect...') . PHP_EOL,
date('c'),
$ftp->connection->attempts->delay
);
}
sleep( sleep(
$ftp->connection->attempts->delay $ftp->connection->attempts->delay
); );
@ -593,25 +676,72 @@ foreach($search->get() as $document)
} }
// Remove tmp data // Remove tmp data
@unlink( if (unlink($tmp))
{
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] remove tmp snap file %s') . PHP_EOL,
date('c'),
$tmp $tmp
); );
} }
}
else
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not remove tmp snap file %s') . PHP_EOL,
date('c'),
$tmp
);
}
}
}
catch (Exception $exception) catch (Exception $exception)
{ {
var_dump( if ($config->cli->document->crawl->debug->level->error)
$exception {
echo sprintf(
_('[%s] [error] %s') . PHP_EOL,
date('c'),
print_r(
$exception,
true
)
); );
} }
} }
} }
}
// Crawl queue delay // Crawl queue delay
if ($config->cli->document->crawl->queue->limit) if ($config->cli->document->crawl->queue->delay)
{
if ($config->cli->document->crawl->debug->level->notice)
{ {
echo sprintf(
_('[%s] [notice] pending %s seconds...') . PHP_EOL,
date('c'),
$config->cli->document->crawl->queue->delay
);
}
sleep( sleep(
$config->cli->document->crawl->queue->limit $config->cli->document->crawl->queue->delay
);
}
// Debug totals
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] crawl queue completed in %s') . PHP_EOL,
date('c'),
microtime(true) - $microtime
); );
} }
} }
Loading…
Cancel
Save