From 5b03f386fe6d1a94e46b30a6dd8ec685022dd0d6 Mon Sep 17 00:00:00 2001 From: ghost Date: Tue, 29 Aug 2023 15:38:32 +0300 Subject: [PATCH] implement semantic search by stopwords settings --- src/config/app.php.example | 9 +++++++++ src/library/sphinx.php | 27 ++++++++++++++++----------- src/public/magnet.php | 5 +++-- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/config/app.php.example b/src/config/app.php.example index c459053..d336e1b 100644 --- a/src/config/app.php.example +++ b/src/config/app.php.example @@ -83,6 +83,15 @@ define('MAGNET_EDITOR_LOCK_TIMEOUT', 60*60); define('MAGNET_META_TITLE_MIN_LENGTH', 10); define('MAGNET_META_DESCRIPTION_MIN_LENGTH', 0); +define('MAGNET_STOP_WORDS_SIMILAR', + [ + 'series', + 'season', + 'discography', + // ... + ] +); + // Comment define('COMMENT_DEFAULT_APPROVED', false); define('COMMENT_DEFAULT_PUBLIC', false); diff --git a/src/library/sphinx.php b/src/library/sphinx.php index e36a8ef..b56eee0 100644 --- a/src/library/sphinx.php +++ b/src/library/sphinx.php @@ -11,20 +11,20 @@ class Sphinx { $this->_sphinx->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ); } - public function searchMagnetsTotal(string $keyword, string $mode = 'default') : int + public function searchMagnetsTotal(string $keyword, string $mode = 'default', array $stopWords = []) : int { $query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `magnet` WHERE MATCH(?)'); $query->execute( [ - self::_match($keyword, $mode) + self::_match($keyword, $mode, $stopWords) ] ); return $query->fetch()->total; } - public function searchMagnets(string $keyword, int $start, int $limit, int $maxMatches, string $mode = 'default') + public function searchMagnets(string $keyword, int $start, int $limit, int $maxMatches, string $mode = 'default', array $stopWords = []) { $query = $this->_sphinx->prepare("SELECT * @@ -40,14 +40,14 @@ class Sphinx { $query->execute( [ - self::_match($keyword, $mode) + self::_match($keyword, $mode, $stopWords) ] ); return $query->fetchAll(); } - private static function _match(string $keyword, string $mode = 'default') : string + private static function _match(string $keyword, string $mode = 'default', array $stopWords = []) : string { $keyword = trim($keyword); @@ -67,15 +67,17 @@ class Sphinx { $result = []; - foreach ((array) explode(' ', $keyword) as $i => $value) + $keyword = preg_replace('/[\d]/ui', ' ', $keyword); + $keyword = preg_replace('/[\s]+/ui', ' ', $keyword); + $keyword = trim($keyword); + + foreach ((array) explode(' ', $keyword) as $value) { if (mb_strlen($value) > 5) { - $result[] = sprintf('@metaTitle "%s" | @dn "%s"', $value, $value); - - if ($i > 3) + if (!in_array(mb_strtolower($value), $stopWords)) { - break; + $result[] = sprintf('@metaTitle "%s" | @dn "%s"', $value, $value); } } } @@ -97,7 +99,10 @@ class Sphinx { foreach ((array) explode(' ', $keyword) as $value) { - $result[] = sprintf('@"*%s*"', $value); + if (!in_array(mb_strtolower($value), $stopWords)) + { + $result[] = sprintf('@"*%s*"', $value); + } } return implode(' | ', $result); diff --git a/src/public/magnet.php b/src/public/magnet.php index 9b0eb42..8655968 100644 --- a/src/public/magnet.php +++ b/src/public/magnet.php @@ -371,7 +371,7 @@ echo '' . PHP_EOL ?> - searchMagnetsTotal($magnet->metaTitle ? $magnet->metaTitle : $magnet->dn, 'similar')) { ?> + searchMagnetsTotal($magnet->metaTitle ? $magnet->metaTitle : $magnet->dn, 'similar', MAGNET_STOP_WORDS_SIMILAR)) { ?> 1) { // skip current magnet ?>
@@ -384,7 +384,8 @@ echo '' . PHP_EOL ?> 0, 10, $similarMagnetsTotal, - 'similar' + 'similar', + MAGNET_STOP_WORDS_SIMILAR ) as $result) { ?> getMagnet($result->magnetid)) { ?> magnetid != $response->magnet->magnetId && // skip current magnet