mirror of
https://github.com/YGGverse/Yo.git
synced 2025-02-05 11:24:20 +00:00
save document body text to index
This commit is contained in:
parent
1f27a7e105
commit
3884f375d4
@ -335,6 +335,45 @@ foreach($index->search('')
|
||||
$data['keywords'] = implode(',', $keywords);
|
||||
}
|
||||
|
||||
// Save document body text to index
|
||||
foreach ($crawler->filter('html > body')->each(function($node) {
|
||||
|
||||
return $node->html();
|
||||
|
||||
}) as $value)
|
||||
{
|
||||
if (!empty($value))
|
||||
{
|
||||
$data['body'] = trim(
|
||||
preg_replace(
|
||||
'/[\s]{2,}/', // strip extra separators
|
||||
' ',
|
||||
strip_tags(
|
||||
str_replace( // make text separators before strip any closing tag, new line, etc
|
||||
[
|
||||
'<',
|
||||
'>',
|
||||
PHP_EOL,
|
||||
],
|
||||
[
|
||||
' <',
|
||||
'> ',
|
||||
PHP_EOL . ' ',
|
||||
],
|
||||
preg_replace(
|
||||
'/<script([^>]*)>([^<]*)<\/script>/is', // strip js content
|
||||
'',
|
||||
html_entity_decode(
|
||||
$value
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Crawl documents
|
||||
$documents = [];
|
||||
|
||||
|
@ -64,6 +64,10 @@ $result = $index->create(
|
||||
[
|
||||
'type' => 'text'
|
||||
],
|
||||
'body' =>
|
||||
[
|
||||
'type' => 'text'
|
||||
],
|
||||
'mime' =>
|
||||
[
|
||||
'type' => 'text'
|
||||
|
Loading…
x
Reference in New Issue
Block a user