mirror of
https://github.com/YGGverse/YGGtracker.git
synced 2025-02-09 05:24:22 +00:00
implement transliteration word forms in search #33
This commit is contained in:
parent
c7c5d7340c
commit
997666ab8e
17
.env
17
.env
@ -92,28 +92,25 @@ APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker
|
||||
APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1
|
||||
|
||||
# Enable search index for torrent name
|
||||
APP_INDEX_TORRENT_NAME=1
|
||||
APP_INDEX_TORRENT_NAME_ENABLED=1
|
||||
|
||||
# Enable search index for torrent info hash v1
|
||||
APP_INDEX_TORRENT_HASH_V1=1
|
||||
APP_INDEX_TORRENT_HASH_V1_ENABLED=1
|
||||
|
||||
# Enable search index for torrent info hash v2
|
||||
APP_INDEX_TORRENT_HASH_V2=1
|
||||
APP_INDEX_TORRENT_HASH_V2_ENABLED=1
|
||||
|
||||
# Enable search index for torrent filenames
|
||||
APP_INDEX_TORRENT_FILENAMES=1
|
||||
APP_INDEX_TORRENT_FILENAMES_ENABLED=1
|
||||
|
||||
# Enable search index for torrent source
|
||||
APP_INDEX_TORRENT_SOURCE=1
|
||||
APP_INDEX_TORRENT_SOURCE_ENABLED=1
|
||||
|
||||
# Enable search index for torrent comment
|
||||
APP_INDEX_TORRENT_COMMENT=1
|
||||
APP_INDEX_TORRENT_COMMENT_ENABLED=1
|
||||
|
||||
# Enable search index for words length greater than N chars
|
||||
APP_INDEX_WORD_LENGTH_MIN=3
|
||||
|
||||
# Enable search index for words length not greater than N chars
|
||||
APP_INDEX_WORD_LENGTH_MAX=255
|
||||
|
||||
# Enable search index transliteration @TODO
|
||||
APP_INDEX_TRANSLITERATION=1
|
||||
APP_INDEX_WORD_LENGTH_MAX=255
|
@ -108,6 +108,7 @@ git checkout -b my-pr-branch-name
|
||||
* [SVG icons](https://icons.getbootstrap.com)
|
||||
* [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer)
|
||||
* [Bencode Library](https://github.com/Rhilip/Bencode)
|
||||
* [Transliteration Library](https://github.com/ashtokalo/php-translit)
|
||||
* [Identicons](https://github.com/dmester/jdenticon-php)
|
||||
|
||||
#### Support
|
||||
|
@ -9,6 +9,7 @@
|
||||
"php": ">=8.1",
|
||||
"ext-ctype": "*",
|
||||
"ext-iconv": "*",
|
||||
"ashtokalo/php-translit": "^0.2.0",
|
||||
"doctrine/annotations": "^2.0",
|
||||
"doctrine/doctrine-bundle": "^2.10",
|
||||
"doctrine/doctrine-migrations-bundle": "^3.2",
|
||||
|
45
composer.lock
generated
45
composer.lock
generated
@ -4,8 +4,51 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "3770ffcd80695bc10a22f8ece4f68d1f",
|
||||
"content-hash": "4d930a43cf9a80e1622029c4a4048a6b",
|
||||
"packages": [
|
||||
{
|
||||
"name": "ashtokalo/php-translit",
|
||||
"version": "0.2.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ashtokalo/php-translit.git",
|
||||
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/ashtokalo/php-translit/zipball/8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
|
||||
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=7.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~7.0"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"ashtokalo\\translit\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"description": "PHP library to convert text from one script to another.",
|
||||
"keywords": [
|
||||
"latinization",
|
||||
"romanization",
|
||||
"translit",
|
||||
"transliteration"
|
||||
],
|
||||
"support": {
|
||||
"issues": "https://github.com/ashtokalo/php-translit/issues",
|
||||
"source": "https://github.com/ashtokalo/php-translit/tree/0.2.0"
|
||||
},
|
||||
"time": "2022-09-26T09:05:24+00:00"
|
||||
},
|
||||
{
|
||||
"name": "doctrine/annotations",
|
||||
"version": "2.0.1",
|
||||
|
@ -21,15 +21,14 @@ parameters:
|
||||
app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%'
|
||||
app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%'
|
||||
app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%'
|
||||
app.index.torrent.name: '%env(APP_INDEX_TORRENT_NAME)%'
|
||||
app.index.torrent.filenames: '%env(APP_INDEX_TORRENT_FILENAMES)%'
|
||||
app.index.torrent.hash.v1: '%env(APP_INDEX_TORRENT_HASH_V1)%'
|
||||
app.index.torrent.hash.v2: '%env(APP_INDEX_TORRENT_HASH_V2)%'
|
||||
app.index.torrent.source: '%env(APP_INDEX_TORRENT_SOURCE)%'
|
||||
app.index.torrent.comment: '%env(APP_INDEX_TORRENT_COMMENT)%'
|
||||
app.index.torrent.name.enabled: '%env(APP_INDEX_TORRENT_NAME_ENABLED)%'
|
||||
app.index.torrent.filenames.enabled: '%env(APP_INDEX_TORRENT_FILENAMES_ENABLED)%'
|
||||
app.index.torrent.hash.v1.enabled: '%env(APP_INDEX_TORRENT_HASH_V1_ENABLED)%'
|
||||
app.index.torrent.hash.v2.enabled: '%env(APP_INDEX_TORRENT_HASH_V2_ENABLED)%'
|
||||
app.index.torrent.source.enabled: '%env(APP_INDEX_TORRENT_SOURCE_ENABLED)%'
|
||||
app.index.torrent.comment.enabled: '%env(APP_INDEX_TORRENT_COMMENT_ENABLED)%'
|
||||
app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%'
|
||||
app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%'
|
||||
app.index.transliteration: '%env(APP_INDEX_TRANSLITERATION)%'
|
||||
|
||||
services:
|
||||
# default configuration for services in *this* file
|
||||
|
@ -229,8 +229,6 @@ class TorrentController extends AbstractController
|
||||
$activityService
|
||||
);
|
||||
|
||||
//
|
||||
|
||||
// Init request
|
||||
$query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : [];
|
||||
$page = $request->get('page') ? (int) $request->get('page') : 1;
|
||||
@ -883,13 +881,12 @@ class TorrentController extends AbstractController
|
||||
|
||||
$file->getPathName(),
|
||||
|
||||
(bool) $this->getParameter('app.index.torrent.name'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2'),
|
||||
(bool) $this->getParameter('app.index.torrent.source'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment'),
|
||||
(bool) $this->getParameter('app.index.transliteration'),
|
||||
(bool) $this->getParameter('app.index.torrent.name.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.source.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
|
||||
(int) $this->getParameter('app.index.word.length.min'),
|
||||
(int) $this->getParameter('app.index.word.length.max'),
|
||||
|
||||
@ -2453,13 +2450,12 @@ class TorrentController extends AbstractController
|
||||
{
|
||||
// Reindex keywords
|
||||
$torrentService->reindexTorrentKeywordsAll(
|
||||
(bool) $this->getParameter('app.index.torrent.name'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2'),
|
||||
(bool) $this->getParameter('app.index.torrent.source'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment'),
|
||||
(bool) $this->getParameter('app.index.transliteration'),
|
||||
(bool) $this->getParameter('app.index.torrent.name.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.source.enabled'),
|
||||
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
|
||||
(int) $this->getParameter('app.index.word.length.min'),
|
||||
(int) $this->getParameter('app.index.word.length.max')
|
||||
);
|
||||
|
@ -73,27 +73,36 @@ class TorrentRepository extends ServiceEntityRepository
|
||||
int $userId,
|
||||
array $keywords,
|
||||
array $locales,
|
||||
?bool $sensitive = null,
|
||||
?bool $approved = null,
|
||||
?bool $status = null,
|
||||
?bool $sensitive = null,
|
||||
?bool $approved = null,
|
||||
?bool $status = null
|
||||
): \Doctrine\ORM\QueryBuilder
|
||||
{
|
||||
$query = $this->createQueryBuilder('t');
|
||||
|
||||
if ($keywords)
|
||||
{
|
||||
$andKeywords = $query->expr()->andX();
|
||||
|
||||
foreach ($keywords as $i => $keyword)
|
||||
{
|
||||
$keyword = mb_strtolower($keyword); // all keywords stored in lowercase
|
||||
// Make query to the index case insensitive
|
||||
$keyword = mb_strtolower($keyword);
|
||||
|
||||
$andKeywords->add("t.keywords LIKE :keyword{$i}");
|
||||
// Init OR condition for each word form
|
||||
$orKeywords = $query->expr()->orX();
|
||||
|
||||
$orKeywords->add("t.keywords LIKE :keyword{$i}");
|
||||
$query->setParameter(":keyword{$i}", "%{$keyword}%");
|
||||
}
|
||||
|
||||
$query->andWhere($andKeywords);
|
||||
// Generate word forms for each transliteration locale #33
|
||||
foreach ($this->generateWordForms($keyword) as $j => $wordForm)
|
||||
{
|
||||
$orKeywords->add("t.keywords LIKE :keyword{$i}{$j}");
|
||||
$query->setParameter(":keyword{$i}{$j}", "%{$wordForm}%");
|
||||
}
|
||||
|
||||
// Append AND condition
|
||||
$query->andWhere($orKeywords);
|
||||
}
|
||||
}
|
||||
|
||||
if ($locales)
|
||||
@ -153,4 +162,59 @@ class TorrentRepository extends ServiceEntityRepository
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
// Word forms generator to improve search results
|
||||
// e.g. transliteration rules for latin filenames
|
||||
private function generateWordForms(
|
||||
string $keyword,
|
||||
// #33 supported locales:
|
||||
// https://github.com/ashtokalo/php-translit
|
||||
array $transliteration = [
|
||||
'be',
|
||||
'bg',
|
||||
'el',
|
||||
'hy',
|
||||
'kk',
|
||||
'mk',
|
||||
'ru',
|
||||
'ka',
|
||||
'uk'
|
||||
],
|
||||
// Additional char forms
|
||||
array $charForms =
|
||||
[
|
||||
'c' => 'k',
|
||||
'k' => 'c',
|
||||
]
|
||||
): array
|
||||
{
|
||||
$wordForms = [];
|
||||
|
||||
// Apply transliteration
|
||||
foreach ($transliteration as $locale)
|
||||
{
|
||||
$wordForms[] = \ashtokalo\translit\Translit::object()->convert(
|
||||
$keyword,
|
||||
$locale
|
||||
);
|
||||
}
|
||||
|
||||
// Apply char forms
|
||||
foreach ($wordForms as $wordForm)
|
||||
{
|
||||
foreach ($charForms as $from => $to)
|
||||
{
|
||||
$wordForms[] = str_replace(
|
||||
$from,
|
||||
$to,
|
||||
$wordForm
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
return array_unique(
|
||||
$wordForms
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -64,7 +64,6 @@ class TorrentService
|
||||
|
||||
public function generateTorrentKeywordsByString(
|
||||
string $string,
|
||||
bool $transliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax,
|
||||
): array
|
||||
@ -97,11 +96,6 @@ class TorrentService
|
||||
{
|
||||
// Apply case insensitive search conversion
|
||||
$words[$key] = mb_strtolower($value);
|
||||
|
||||
if ($transliteration)
|
||||
{
|
||||
// @TODO
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,7 +123,6 @@ class TorrentService
|
||||
bool $extractSource,
|
||||
bool $extractComment,
|
||||
|
||||
bool $wordTransliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax
|
||||
|
||||
@ -147,7 +140,6 @@ class TorrentService
|
||||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$name,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
@ -163,7 +155,6 @@ class TorrentService
|
||||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$list['path'],
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
@ -179,7 +170,6 @@ class TorrentService
|
||||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$source,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
@ -195,7 +185,6 @@ class TorrentService
|
||||
$keywords,
|
||||
$this->generateTorrentKeywordsByString(
|
||||
$comment,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
@ -301,7 +290,6 @@ class TorrentService
|
||||
bool $extractSource,
|
||||
bool $extractComment,
|
||||
|
||||
bool $wordTransliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax,
|
||||
|
||||
@ -326,7 +314,6 @@ class TorrentService
|
||||
$extractInfoHashV2,
|
||||
$extractSource,
|
||||
$extractComment,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
),
|
||||
@ -623,7 +610,6 @@ class TorrentService
|
||||
bool $extractInfoHashV2,
|
||||
bool $extractSource,
|
||||
bool $extractComment,
|
||||
bool $wordTransliteration,
|
||||
int $wordLengthMin,
|
||||
int $wordLengthMax
|
||||
): void
|
||||
@ -643,7 +629,6 @@ class TorrentService
|
||||
$extractInfoHashV2,
|
||||
$extractSource,
|
||||
$extractComment,
|
||||
$wordTransliteration,
|
||||
$wordLengthMin,
|
||||
$wordLengthMax
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user