Browse Source

update DEFAULT_HOST_PAGES_DOM_SELECTORS syntax

main
ghost 1 year ago
parent
commit
0b4abd2b50
  1. 24
      src/config/app.php.example

24
src/config/app.php.example

@ -187,28 +187,20 @@ define('DEFAULT_HOST_PAGES_LIMIT', 100000);
define('DEFAULT_HOST_PAGES_MIME', 'text/html,application/xhtml+xml,application/javascript,text/plain,text/css,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/x-mpegurl,audio/flac,font/ttf'); define('DEFAULT_HOST_PAGES_MIME', 'text/html,application/xhtml+xml,application/javascript,text/plain,text/css,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/x-mpegurl,audio/flac,font/ttf');
/* /*
* Generate hostPageDom table by CSS selectors * Collect hostPageDom table by selectors
* *
* Allows to build semantical search index (config/sphinx.conf.txt) * Allows to build semantical search index (config/sphinx.conf.txt) or cache the document data
* *
* Supported for text/html MIME documents only
* Leave empty array to skip DOM elements crawling * Leave empty array to skip DOM elements crawling
*/
define('DEFAULT_HOST_PAGES_DOM_SELECTORS', json_encode((object)
[
'h1',
// ...
]
));
/*
* Strip tags rules for DEFAULT_HOST_PAGES_DOM_SELECTORS condition
* *
* Leave empty array to strip all tags * Provide CSS selector as key and allowed tags as value for each selector, leave null to strip all childrent tags
* https://www.php.net/manual/en/function.strip-tags.php
*
*/ */
define('DEFAULT_HOST_PAGES_DOM_SELECTORS_STRIP_TAGS_ALLOWED', json_encode((object) define('DEFAULT_HOST_PAGES_DOM_SELECTORS', json_encode((object)
[ [
'<p>', 'h1' => null,
'<br>',
// ... // ...
] ]
)); ));

Loading…
Cancel
Save