diff --git a/config/sphinx.conf.txt b/config/sphinx.conf.txt index 4b3eb99..58587d1 100644 --- a/config/sphinx.conf.txt +++ b/config/sphinx.conf.txt @@ -22,10 +22,16 @@ source hostPage : common `host`.`name`, \ IF (`host`.`port` IS NOT NULL, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ - CONCAT(`host`.`scheme`, '://', `host`.`name`)) AS `hostURL`, \ + CONCAT(`host`.`scheme`, '://', `host`.`name`)), \ + CRC32 (IF (`host`.`port` IS NOT NULL, \ + CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ + CONCAT(`host`.`scheme`, '://', `host`.`name`))), \ IF (`host`.`port` IS NOT NULL, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ - CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)) AS `hostPageURL`, \ + CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)), \ + CRC32 (IF (`host`.`port` IS NOT NULL, \ + CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ + CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`))), \ REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \ (SELECT GROUP_CONCAT(CONCAT_WS(' ', `hostPageDescription`.`title`, \ `hostPageDescription`.`description`, \ diff --git a/library/filter.php b/library/filter.php index 76cb20c..f1e151c 100644 --- a/library/filter.php +++ b/library/filter.php @@ -79,6 +79,9 @@ class Filter { static public function searchQuery(string $query, string $mode = 'default') { + // Create query CRC32 + $crc32query = crc32($query); + // Prepare user-friendly search request (default mode) // https://sphinxsearch.com/docs/current.html#extended-syntax if ($mode == 'default') { @@ -88,19 +91,6 @@ class Filter { $query = trim($query); - // Remove single char words - $words = []; - foreach ((array) explode(' ', $query) as $word) { - - if (mb_strlen($word) > 1) { - $words[] = $word; - } - } - - if ($words) { - $query = implode(' ', $words); - } - // Quote reserved keyword operators $operators = [ 'MAYBE', @@ -119,19 +109,44 @@ class Filter { foreach ($operators as $operator) { $query = str_ireplace($operator, '\\' . $operator, $query); } - } - // Apply query semantics + // Apply separators + $query = str_replace(['-', '_', '/'], ' ', $query); + + // Apply query MATCH rules + $words = []; - // Long queries - // @TODO seems that queries longer than 68 chars cropping - if (mb_strlen($query) > 68) { + // URL request + if (false !== strpos($query, '\:\ \ ')) { - $query = sprintf('%s*', substr($query, 0, 67)); + $query = sprintf('"%s"', $crc32query); - } else { + // @TODO Queries longer than 68 chars unreachable in search index + } else if (mb_strlen($query) > 68) { - $query = sprintf('"%s" | (%s)', $query, str_replace(' ', '* MAYBE ', $query) . '*'); + $query = sprintf('"%s" | (%s*)', $crc32query, substr($query, 0, 67)); + + // Default condition + } else { + + // Remove single char words + foreach ((array) explode(' ', $query) as $word) { + + if (mb_strlen($word) > 1) { + + $words[] = sprintf('%s*', $word); + } + } + + if ($words) { + + $query = implode(' | ', $words); + } + + $query = sprintf('"%s" | "%s" | %s', $query, + $crc32query, + $query); + } } return trim($query);