Browse Source

update Filter::searchQuery method, fix search by URL

main
ghost 1 year ago
parent
commit
5791877a4e
  1. 10
      config/sphinx.conf.txt
  2. 57
      library/filter.php

10
config/sphinx.conf.txt

@ -22,10 +22,16 @@ source hostPage : common
`host`.`name`, \ `host`.`name`, \
IF (`host`.`port` IS NOT NULL, \ IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`)) AS `hostURL`, \ CONCAT(`host`.`scheme`, '://', `host`.`name`)), \
CRC32 (IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`))), \
IF (`host`.`port` IS NOT NULL, \ IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)) AS `hostPageURL`, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)), \
CRC32 (IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`))), \
REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \ REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \
(SELECT GROUP_CONCAT(CONCAT_WS(' ', `hostPageDescription`.`title`, \ (SELECT GROUP_CONCAT(CONCAT_WS(' ', `hostPageDescription`.`title`, \
`hostPageDescription`.`description`, \ `hostPageDescription`.`description`, \

57
library/filter.php

@ -79,6 +79,9 @@ class Filter {
static public function searchQuery(string $query, string $mode = 'default') { static public function searchQuery(string $query, string $mode = 'default') {
// Create query CRC32
$crc32query = crc32($query);
// Prepare user-friendly search request (default mode) // Prepare user-friendly search request (default mode)
// https://sphinxsearch.com/docs/current.html#extended-syntax // https://sphinxsearch.com/docs/current.html#extended-syntax
if ($mode == 'default') { if ($mode == 'default') {
@ -88,19 +91,6 @@ class Filter {
$query = trim($query); $query = trim($query);
// Remove single char words
$words = [];
foreach ((array) explode(' ', $query) as $word) {
if (mb_strlen($word) > 1) {
$words[] = $word;
}
}
if ($words) {
$query = implode(' ', $words);
}
// Quote reserved keyword operators // Quote reserved keyword operators
$operators = [ $operators = [
'MAYBE', 'MAYBE',
@ -119,19 +109,44 @@ class Filter {
foreach ($operators as $operator) { foreach ($operators as $operator) {
$query = str_ireplace($operator, '\\' . $operator, $query); $query = str_ireplace($operator, '\\' . $operator, $query);
} }
}
// Apply query semantics // Apply separators
$query = str_replace(['-', '_', '/'], ' ', $query);
// Apply query MATCH rules
$words = [];
// Long queries // URL request
// @TODO seems that queries longer than 68 chars cropping if (false !== strpos($query, '\:\ \ ')) {
if (mb_strlen($query) > 68) {
$query = sprintf('%s*', substr($query, 0, 67)); $query = sprintf('"%s"', $crc32query);
} else { // @TODO Queries longer than 68 chars unreachable in search index
} else if (mb_strlen($query) > 68) {
$query = sprintf('"%s" | (%s)', $query, str_replace(' ', '* MAYBE ', $query) . '*'); $query = sprintf('"%s" | (%s*)', $crc32query, substr($query, 0, 67));
// Default condition
} else {
// Remove single char words
foreach ((array) explode(' ', $query) as $word) {
if (mb_strlen($word) > 1) {
$words[] = sprintf('%s*', $word);
}
}
if ($words) {
$query = implode(' | ', $words);
}
$query = sprintf('"%s" | "%s" | %s', $query,
$crc32query,
$query);
}
} }
return trim($query); return trim($query);

Loading…
Cancel
Save