Browse Source

add debug levels

main
ghost 12 months ago
parent
commit
87ca594860
  1. 9
      example/config.json
  2. 302
      src/cli/document/crawl.php

9
example/config.json

@ -102,6 +102,15 @@
{ {
"crawl": "crawl":
{ {
"debug":
{
"level":
{
"notice":true,
"warning":true,
"error":true
}
},
"curl": "curl":
{ {
"connection": "connection":

302
src/cli/document/crawl.php

@ -1,5 +1,18 @@
<?php <?php
// Debug
$microtime = microtime(true);
// Load dependencies
require_once __DIR__ . '/../../../vendor/autoload.php';
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
// Prevent multi-thread execution // Prevent multi-thread execution
$semaphore = sem_get( $semaphore = sem_get(
crc32( crc32(
@ -10,18 +23,16 @@ $semaphore = sem_get(
if (false === sem_acquire($semaphore, true)) if (false === sem_acquire($semaphore, true))
{ {
exit ('process execution locked by another thread!' . PHP_EOL); if ($config->cli->document->crawl->debug->level->warning)
} {
echo sprintf(
// Load dependencies _('[%s] [warning] process execution locked by another thread!') . PHP_EOL,
require_once __DIR__ . '/../../../vendor/autoload.php'; date('c')
);
}
// Init config exit;
$config = json_decode( }
file_get_contents(
__DIR__ . '/../../../config.json'
)
);
// Set global options // Set global options
define( define(
@ -30,40 +41,69 @@ define(
); );
// Init client // Init client
$client = new \Manticoresearch\Client( try {
[
'host' => $config->manticore->server->host,
'port' => $config->manticore->server->port,
]
);
// Init search $client = new \Manticoresearch\Client(
$search = new \Manticoresearch\Search( [
$client 'host' => $config->manticore->server->host,
); 'port' => $config->manticore->server->port,
]
);
$search->setIndex( // Init search
$config->manticore->index->document->name $search = new \Manticoresearch\Search(
); $client
);
$search->match( $search->setIndex(
'*', $config->manticore->index->document->name
'url' );
);
$search->sort( $search->match(
'time', '*',
'asc' 'url'
); );
$search->limit( $search->sort(
$config->cli->document->crawl->queue->limit 'time',
); 'asc'
);
// Init index $search->limit(
$index = $client->index( $config->cli->document->crawl->queue->limit
$config->manticore->index->document->name );
);
// Init index
$index = $client->index(
$config->manticore->index->document->name
);
}
catch (Exception $exception)
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] %s') . PHP_EOL,
date('c'),
print_r(
$exception,
true
)
);
}
exit;
}
// Debug totals
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] crawl queue begin...') . PHP_EOL,
date('c')
);
}
// Begin queue // Begin queue
foreach($search->get() as $document) foreach($search->get() as $document)
@ -86,7 +126,8 @@ foreach($search->get() as $document)
// Debug target // Debug target
echo sprintf( echo sprintf(
'index "%s" in "%s"' . PHP_EOL, _('[%s] index "%s" in "%s"') . PHP_EOL,
date('c'),
$document->get('url'), $document->get('url'),
$config->manticore->index->document->name $config->manticore->index->document->name
); );
@ -296,14 +337,18 @@ foreach($search->get() as $document)
if ($skip) if ($skip)
{ {
echo sprintf( if ($config->cli->document->crawl->debug->level->notice)
'skip "%s" by stripos condition "%s"' . PHP_EOL, {
$url, echo sprintf(
print_r( _('[%s] [notice] skip "%s" by stripos condition "%s"') . PHP_EOL,
$config->cli->document->crawl->skip->stripos->url, date('c'),
true $url,
) print_r(
); $config->cli->document->crawl->skip->stripos->url,
true
)
);
}
continue; continue;
} }
@ -325,11 +370,15 @@ foreach($search->get() as $document)
] ]
); );
echo sprintf( if ($config->cli->document->crawl->debug->level->notice)
'add "%s" to "%s"' . PHP_EOL, {
$url, echo sprintf(
$config->manticore->index->document->name _('[%s] [notice] add "%s" to "%s"') . PHP_EOL,
); date('c'),
$url,
$config->manticore->index->document->name
);
}
} }
} }
} }
@ -343,18 +392,22 @@ foreach($search->get() as $document)
); );
// Debug result // Debug result
echo sprintf( if ($config->cli->document->crawl->debug->level->notice)
'index "%s" updated: %s %s' . PHP_EOL, {
$config->manticore->index->document->name, echo sprintf(
print_r( '[%s] [notice] index "%s" updated: %s %s' . PHP_EOL,
$result, date('c'),
true $config->manticore->index->document->name,
), print_r(
print_r( $result,
$data, true
true ),
), print_r(
); $data,
true
),
);
}
// Create snap // Create snap
if ($config->cli->document->crawl->snap->enabled && $code === 200) if ($config->cli->document->crawl->snap->enabled && $code === 200)
@ -477,10 +530,18 @@ foreach($search->get() as $document)
) )
); );
copy( if (!copy($tmp, $filename))
$tmp, {
$filename if ($config->cli->document->crawl->debug->level->error)
); {
echo sprintf(
_('[%s] [error] could not copy "%" to "%" on local storage') . PHP_EOL,
date('c'),
$tmp,
$filename
);
}
}
} }
} }
@ -557,10 +618,19 @@ foreach($search->get() as $document)
true true
); );
$remote->copy( if (!$remote->copy($tmp, $filename))
$tmp, {
$filename if ($config->cli->document->crawl->debug->level->error)
); {
echo sprintf(
_('[%s] [error] could not copy "%" to "%" on destination "%s"') . PHP_EOL,
date('c'),
$tmp,
$filename,
$ftp->connection->host,
);
}
}
$remote->close(); $remote->close();
@ -574,15 +644,28 @@ foreach($search->get() as $document)
} }
// Log event // Log event
echo sprintf( if ($config->cli->document->crawl->debug->level->warning)
_('[attempt: %s] wait for remote storage "%s" reconnection...') . PHP_EOL, {
$attempt++, echo sprintf(
$ftp->connection->host, _('[%s] [warning] attempt: %s, wait for remote storage "%s" reconnection...') . PHP_EOL,
); date('c'),
$attempt++,
$ftp->connection->host,
);
}
// Delay next attempt // Delay next attempt
if ($ftp->connection->attempts->delay) if ($ftp->connection->attempts->delay)
{ {
if ($config->cli->document->crawl->debug->level->warning)
{
echo sprintf(
_('[%s] [warning] pending %s seconds to reconnect...') . PHP_EOL,
date('c'),
$ftp->connection->attempts->delay
);
}
sleep( sleep(
$ftp->connection->attempts->delay $ftp->connection->attempts->delay
); );
@ -593,25 +676,72 @@ foreach($search->get() as $document)
} }
// Remove tmp data // Remove tmp data
@unlink( if (unlink($tmp))
$tmp {
); if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] remove tmp snap file %s') . PHP_EOL,
date('c'),
$tmp
);
}
}
else
{
if ($config->cli->document->crawl->debug->level->error)
{
echo sprintf(
_('[%s] [error] could not remove tmp snap file %s') . PHP_EOL,
date('c'),
$tmp
);
}
}
} }
catch (Exception $exception) catch (Exception $exception)
{ {
var_dump( if ($config->cli->document->crawl->debug->level->error)
$exception {
); echo sprintf(
_('[%s] [error] %s') . PHP_EOL,
date('c'),
print_r(
$exception,
true
)
);
}
} }
} }
} }
// Crawl queue delay // Crawl queue delay
if ($config->cli->document->crawl->queue->limit) if ($config->cli->document->crawl->queue->delay)
{ {
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] pending %s seconds...') . PHP_EOL,
date('c'),
$config->cli->document->crawl->queue->delay
);
}
sleep( sleep(
$config->cli->document->crawl->queue->limit $config->cli->document->crawl->queue->delay
);
}
// Debug totals
if ($config->cli->document->crawl->debug->level->notice)
{
echo sprintf(
_('[%s] [notice] crawl queue completed in %s') . PHP_EOL,
date('c'),
microtime(true) - $microtime
); );
} }
} }
Loading…
Cancel
Save