Browse Source

implement semantic search by stopwords settings

main
ghost 1 year ago
parent
commit
5b03f386fe
  1. 9
      src/config/app.php.example
  2. 25
      src/library/sphinx.php
  3. 5
      src/public/magnet.php

9
src/config/app.php.example

@ -83,6 +83,15 @@ define('MAGNET_EDITOR_LOCK_TIMEOUT', 60*60);
define('MAGNET_META_TITLE_MIN_LENGTH', 10); define('MAGNET_META_TITLE_MIN_LENGTH', 10);
define('MAGNET_META_DESCRIPTION_MIN_LENGTH', 0); define('MAGNET_META_DESCRIPTION_MIN_LENGTH', 0);
define('MAGNET_STOP_WORDS_SIMILAR',
[
'series',
'season',
'discography',
// ...
]
);
// Comment // Comment
define('COMMENT_DEFAULT_APPROVED', false); define('COMMENT_DEFAULT_APPROVED', false);
define('COMMENT_DEFAULT_PUBLIC', false); define('COMMENT_DEFAULT_PUBLIC', false);

25
src/library/sphinx.php

@ -11,20 +11,20 @@ class Sphinx {
$this->_sphinx->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ); $this->_sphinx->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ);
} }
public function searchMagnetsTotal(string $keyword, string $mode = 'default') : int public function searchMagnetsTotal(string $keyword, string $mode = 'default', array $stopWords = []) : int
{ {
$query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `magnet` WHERE MATCH(?)'); $query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `magnet` WHERE MATCH(?)');
$query->execute( $query->execute(
[ [
self::_match($keyword, $mode) self::_match($keyword, $mode, $stopWords)
] ]
); );
return $query->fetch()->total; return $query->fetch()->total;
} }
public function searchMagnets(string $keyword, int $start, int $limit, int $maxMatches, string $mode = 'default') public function searchMagnets(string $keyword, int $start, int $limit, int $maxMatches, string $mode = 'default', array $stopWords = [])
{ {
$query = $this->_sphinx->prepare("SELECT * $query = $this->_sphinx->prepare("SELECT *
@ -40,14 +40,14 @@ class Sphinx {
$query->execute( $query->execute(
[ [
self::_match($keyword, $mode) self::_match($keyword, $mode, $stopWords)
] ]
); );
return $query->fetchAll(); return $query->fetchAll();
} }
private static function _match(string $keyword, string $mode = 'default') : string private static function _match(string $keyword, string $mode = 'default', array $stopWords = []) : string
{ {
$keyword = trim($keyword); $keyword = trim($keyword);
@ -67,15 +67,17 @@ class Sphinx {
$result = []; $result = [];
foreach ((array) explode(' ', $keyword) as $i => $value) $keyword = preg_replace('/[\d]/ui', ' ', $keyword);
$keyword = preg_replace('/[\s]+/ui', ' ', $keyword);
$keyword = trim($keyword);
foreach ((array) explode(' ', $keyword) as $value)
{ {
if (mb_strlen($value) > 5) if (mb_strlen($value) > 5)
{ {
$result[] = sprintf('@metaTitle "%s" | @dn "%s"', $value, $value); if (!in_array(mb_strtolower($value), $stopWords))
if ($i > 3)
{ {
break; $result[] = sprintf('@metaTitle "%s" | @dn "%s"', $value, $value);
} }
} }
} }
@ -96,9 +98,12 @@ class Sphinx {
$result = []; $result = [];
foreach ((array) explode(' ', $keyword) as $value) foreach ((array) explode(' ', $keyword) as $value)
{
if (!in_array(mb_strtolower($value), $stopWords))
{ {
$result[] = sprintf('@"*%s*"', $value); $result[] = sprintf('@"*%s*"', $value);
} }
}
return implode(' | ', $result); return implode(' | ', $result);
} }

5
src/public/magnet.php

@ -371,7 +371,7 @@ echo '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL ?>
</span> </span>
</div> </div>
</div> </div>
<?php if ($similarMagnetsTotal = $sphinx->searchMagnetsTotal($magnet->metaTitle ? $magnet->metaTitle : $magnet->dn, 'similar')) { ?> <?php if ($similarMagnetsTotal = $sphinx->searchMagnetsTotal($magnet->metaTitle ? $magnet->metaTitle : $magnet->dn, 'similar', MAGNET_STOP_WORDS_SIMILAR)) { ?>
<?php if ($similarMagnetsTotal > 1) { // skip current magnet ?> <?php if ($similarMagnetsTotal > 1) { // skip current magnet ?>
<div class="padding-y-8 padding-x-16"> <div class="padding-y-8 padding-x-16">
<a name="similar"></a> <a name="similar"></a>
@ -384,7 +384,8 @@ echo '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL ?>
0, 0,
10, 10,
$similarMagnetsTotal, $similarMagnetsTotal,
'similar' 'similar',
MAGNET_STOP_WORDS_SIMILAR
) as $result) { ?> ) as $result) { ?>
<?php if ($magnet = $db->getMagnet($result->magnetid)) { ?> <?php if ($magnet = $db->getMagnet($result->magnetid)) { ?>
<?php if ($result->magnetid != $response->magnet->magnetId && // skip current magnet <?php if ($result->magnetid != $response->magnet->magnetId && // skip current magnet

Loading…
Cancel
Save