mirror of
https://github.com/YGGverse/Yo.git
synced 2025-02-05 19:34:14 +00:00
crawl newest pages by rand in queue
This commit is contained in:
parent
811c700049
commit
33cc778999
@ -50,30 +50,6 @@ try {
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
// Init search
|
|
||||||
$search = new \Manticoresearch\Search(
|
|
||||||
$client
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->setIndex(
|
|
||||||
$config->manticore->index->document->name
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->match(
|
|
||||||
'*',
|
|
||||||
'url'
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->sort(
|
|
||||||
'time',
|
|
||||||
'asc'
|
|
||||||
);
|
|
||||||
|
|
||||||
$search->limit(
|
|
||||||
$config->cli->document->crawl->queue->limit
|
|
||||||
);
|
|
||||||
|
|
||||||
// Init index
|
|
||||||
$index = $client->index(
|
$index = $client->index(
|
||||||
$config->manticore->index->document->name
|
$config->manticore->index->document->name
|
||||||
);
|
);
|
||||||
@ -105,8 +81,16 @@ if ($config->cli->document->crawl->debug->level->notice)
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Begin queue
|
// Begin crawl queue
|
||||||
foreach($search->get() as $document)
|
// thanks to @manticoresearch for help with random feature implementation:
|
||||||
|
// https://github.com/manticoresoftware/manticoresearch-php/discussions/176
|
||||||
|
|
||||||
|
foreach($index->search('')
|
||||||
|
->expression('random', 'rand()')
|
||||||
|
->sort('time', 'asc')
|
||||||
|
->sort('random', 'asc')
|
||||||
|
->limit($config->cli->document->crawl->queue->limit)
|
||||||
|
->get() as $document)
|
||||||
{
|
{
|
||||||
// Define data
|
// Define data
|
||||||
$time = time();
|
$time = time();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user