Browse Source

implement transliteration word forms in search #33

main
ghost 1 year ago
parent
commit
997666ab8e
  1. 17
      .env
  2. 1
      README.md
  3. 1
      composer.json
  4. 45
      composer.lock
  5. 13
      config/services.yaml
  6. 28
      src/Controller/TorrentController.php
  7. 82
      src/Repository/TorrentRepository.php
  8. 15
      src/Service/TorrentService.php

17
.env

@ -92,28 +92,25 @@ APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker @@ -92,28 +92,25 @@ APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker
APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1
# Enable search index for torrent name
APP_INDEX_TORRENT_NAME=1
APP_INDEX_TORRENT_NAME_ENABLED=1
# Enable search index for torrent info hash v1
APP_INDEX_TORRENT_HASH_V1=1
APP_INDEX_TORRENT_HASH_V1_ENABLED=1
# Enable search index for torrent info hash v2
APP_INDEX_TORRENT_HASH_V2=1
APP_INDEX_TORRENT_HASH_V2_ENABLED=1
# Enable search index for torrent filenames
APP_INDEX_TORRENT_FILENAMES=1
APP_INDEX_TORRENT_FILENAMES_ENABLED=1
# Enable search index for torrent source
APP_INDEX_TORRENT_SOURCE=1
APP_INDEX_TORRENT_SOURCE_ENABLED=1
# Enable search index for torrent comment
APP_INDEX_TORRENT_COMMENT=1
APP_INDEX_TORRENT_COMMENT_ENABLED=1
# Enable search index for words length greater than N chars
APP_INDEX_WORD_LENGTH_MIN=3
# Enable search index for words length not greater than N chars
APP_INDEX_WORD_LENGTH_MAX=255
# Enable search index transliteration @TODO
APP_INDEX_TRANSLITERATION=1
APP_INDEX_WORD_LENGTH_MAX=255

1
README.md

@ -108,6 +108,7 @@ git checkout -b my-pr-branch-name @@ -108,6 +108,7 @@ git checkout -b my-pr-branch-name
* [SVG icons](https://icons.getbootstrap.com)
* [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer)
* [Bencode Library](https://github.com/Rhilip/Bencode)
* [Transliteration Library](https://github.com/ashtokalo/php-translit)
* [Identicons](https://github.com/dmester/jdenticon-php)
#### Support

1
composer.json

@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
"php": ">=8.1",
"ext-ctype": "*",
"ext-iconv": "*",
"ashtokalo/php-translit": "^0.2.0",
"doctrine/annotations": "^2.0",
"doctrine/doctrine-bundle": "^2.10",
"doctrine/doctrine-migrations-bundle": "^3.2",

45
composer.lock generated

@ -4,8 +4,51 @@ @@ -4,8 +4,51 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "3770ffcd80695bc10a22f8ece4f68d1f",
"content-hash": "4d930a43cf9a80e1622029c4a4048a6b",
"packages": [
{
"name": "ashtokalo/php-translit",
"version": "0.2.0",
"source": {
"type": "git",
"url": "https://github.com/ashtokalo/php-translit.git",
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/ashtokalo/php-translit/zipball/8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
"shasum": ""
},
"require": {
"php": ">=7.0"
},
"require-dev": {
"phpunit/phpunit": "~7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"ashtokalo\\translit\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "PHP library to convert text from one script to another.",
"keywords": [
"latinization",
"romanization",
"translit",
"transliteration"
],
"support": {
"issues": "https://github.com/ashtokalo/php-translit/issues",
"source": "https://github.com/ashtokalo/php-translit/tree/0.2.0"
},
"time": "2022-09-26T09:05:24+00:00"
},
{
"name": "doctrine/annotations",
"version": "2.0.1",

13
config/services.yaml

@ -21,15 +21,14 @@ parameters: @@ -21,15 +21,14 @@ parameters:
app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%'
app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%'
app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%'
app.index.torrent.name: '%env(APP_INDEX_TORRENT_NAME)%'
app.index.torrent.filenames: '%env(APP_INDEX_TORRENT_FILENAMES)%'
app.index.torrent.hash.v1: '%env(APP_INDEX_TORRENT_HASH_V1)%'
app.index.torrent.hash.v2: '%env(APP_INDEX_TORRENT_HASH_V2)%'
app.index.torrent.source: '%env(APP_INDEX_TORRENT_SOURCE)%'
app.index.torrent.comment: '%env(APP_INDEX_TORRENT_COMMENT)%'
app.index.torrent.name.enabled: '%env(APP_INDEX_TORRENT_NAME_ENABLED)%'
app.index.torrent.filenames.enabled: '%env(APP_INDEX_TORRENT_FILENAMES_ENABLED)%'
app.index.torrent.hash.v1.enabled: '%env(APP_INDEX_TORRENT_HASH_V1_ENABLED)%'
app.index.torrent.hash.v2.enabled: '%env(APP_INDEX_TORRENT_HASH_V2_ENABLED)%'
app.index.torrent.source.enabled: '%env(APP_INDEX_TORRENT_SOURCE_ENABLED)%'
app.index.torrent.comment.enabled: '%env(APP_INDEX_TORRENT_COMMENT_ENABLED)%'
app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%'
app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%'
app.index.transliteration: '%env(APP_INDEX_TRANSLITERATION)%'
services:
# default configuration for services in *this* file

28
src/Controller/TorrentController.php

@ -229,8 +229,6 @@ class TorrentController extends AbstractController @@ -229,8 +229,6 @@ class TorrentController extends AbstractController
$activityService
);
//
// Init request
$query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : [];
$page = $request->get('page') ? (int) $request->get('page') : 1;
@ -883,13 +881,12 @@ class TorrentController extends AbstractController @@ -883,13 +881,12 @@ class TorrentController extends AbstractController
$file->getPathName(),
(bool) $this->getParameter('app.index.torrent.name'),
(bool) $this->getParameter('app.index.torrent.filenames'),
(bool) $this->getParameter('app.index.torrent.hash.v1'),
(bool) $this->getParameter('app.index.torrent.hash.v2'),
(bool) $this->getParameter('app.index.torrent.source'),
(bool) $this->getParameter('app.index.torrent.comment'),
(bool) $this->getParameter('app.index.transliteration'),
(bool) $this->getParameter('app.index.torrent.name.enabled'),
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
(bool) $this->getParameter('app.index.torrent.source.enabled'),
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
(int) $this->getParameter('app.index.word.length.min'),
(int) $this->getParameter('app.index.word.length.max'),
@ -2453,13 +2450,12 @@ class TorrentController extends AbstractController @@ -2453,13 +2450,12 @@ class TorrentController extends AbstractController
{
// Reindex keywords
$torrentService->reindexTorrentKeywordsAll(
(bool) $this->getParameter('app.index.torrent.name'),
(bool) $this->getParameter('app.index.torrent.filenames'),
(bool) $this->getParameter('app.index.torrent.hash.v1'),
(bool) $this->getParameter('app.index.torrent.hash.v2'),
(bool) $this->getParameter('app.index.torrent.source'),
(bool) $this->getParameter('app.index.torrent.comment'),
(bool) $this->getParameter('app.index.transliteration'),
(bool) $this->getParameter('app.index.torrent.name.enabled'),
(bool) $this->getParameter('app.index.torrent.filenames.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
(bool) $this->getParameter('app.index.torrent.source.enabled'),
(bool) $this->getParameter('app.index.torrent.comment.enabled'),
(int) $this->getParameter('app.index.word.length.min'),
(int) $this->getParameter('app.index.word.length.max')
);

82
src/Repository/TorrentRepository.php

@ -73,27 +73,36 @@ class TorrentRepository extends ServiceEntityRepository @@ -73,27 +73,36 @@ class TorrentRepository extends ServiceEntityRepository
int $userId,
array $keywords,
array $locales,
?bool $sensitive = null,
?bool $approved = null,
?bool $status = null,
?bool $sensitive = null,
?bool $approved = null,
?bool $status = null
): \Doctrine\ORM\QueryBuilder
{
$query = $this->createQueryBuilder('t');
if ($keywords)
{
$andKeywords = $query->expr()->andX();
foreach ($keywords as $i => $keyword)
{
$keyword = mb_strtolower($keyword); // all keywords stored in lowercase
// Make query to the index case insensitive
$keyword = mb_strtolower($keyword);
$andKeywords->add("t.keywords LIKE :keyword{$i}");
// Init OR condition for each word form
$orKeywords = $query->expr()->orX();
$orKeywords->add("t.keywords LIKE :keyword{$i}");
$query->setParameter(":keyword{$i}", "%{$keyword}%");
}
$query->andWhere($andKeywords);
// Generate word forms for each transliteration locale #33
foreach ($this->generateWordForms($keyword) as $j => $wordForm)
{
$orKeywords->add("t.keywords LIKE :keyword{$i}{$j}");
$query->setParameter(":keyword{$i}{$j}", "%{$wordForm}%");
}
// Append AND condition
$query->andWhere($orKeywords);
}
}
if ($locales)
@ -153,4 +162,59 @@ class TorrentRepository extends ServiceEntityRepository @@ -153,4 +162,59 @@ class TorrentRepository extends ServiceEntityRepository
return $query;
}
// Word forms generator to improve search results
// e.g. transliteration rules for latin filenames
private function generateWordForms(
string $keyword,
// #33 supported locales:
// https://github.com/ashtokalo/php-translit
array $transliteration = [
'be',
'bg',
'el',
'hy',
'kk',
'mk',
'ru',
'ka',
'uk'
],
// Additional char forms
array $charForms =
[
'c' => 'k',
'k' => 'c',
]
): array
{
$wordForms = [];
// Apply transliteration
foreach ($transliteration as $locale)
{
$wordForms[] = \ashtokalo\translit\Translit::object()->convert(
$keyword,
$locale
);
}
// Apply char forms
foreach ($wordForms as $wordForm)
{
foreach ($charForms as $from => $to)
{
$wordForms[] = str_replace(
$from,
$to,
$wordForm
);
}
}
// Remove duplicates
return array_unique(
$wordForms
);
}
}

15
src/Service/TorrentService.php

@ -64,7 +64,6 @@ class TorrentService @@ -64,7 +64,6 @@ class TorrentService
public function generateTorrentKeywordsByString(
string $string,
bool $transliteration,
int $wordLengthMin,
int $wordLengthMax,
): array
@ -97,11 +96,6 @@ class TorrentService @@ -97,11 +96,6 @@ class TorrentService
{
// Apply case insensitive search conversion
$words[$key] = mb_strtolower($value);
if ($transliteration)
{
// @TODO
}
}
}
@ -129,7 +123,6 @@ class TorrentService @@ -129,7 +123,6 @@ class TorrentService
bool $extractSource,
bool $extractComment,
bool $wordTransliteration,
int $wordLengthMin,
int $wordLengthMax
@ -147,7 +140,6 @@ class TorrentService @@ -147,7 +140,6 @@ class TorrentService
$keywords,
$this->generateTorrentKeywordsByString(
$name,
$wordTransliteration,
$wordLengthMin,
$wordLengthMax
)
@ -163,7 +155,6 @@ class TorrentService @@ -163,7 +155,6 @@ class TorrentService
$keywords,
$this->generateTorrentKeywordsByString(
$list['path'],
$wordTransliteration,
$wordLengthMin,
$wordLengthMax
)
@ -179,7 +170,6 @@ class TorrentService @@ -179,7 +170,6 @@ class TorrentService
$keywords,
$this->generateTorrentKeywordsByString(
$source,
$wordTransliteration,
$wordLengthMin,
$wordLengthMax
)
@ -195,7 +185,6 @@ class TorrentService @@ -195,7 +185,6 @@ class TorrentService
$keywords,
$this->generateTorrentKeywordsByString(
$comment,
$wordTransliteration,
$wordLengthMin,
$wordLengthMax
)
@ -301,7 +290,6 @@ class TorrentService @@ -301,7 +290,6 @@ class TorrentService
bool $extractSource,
bool $extractComment,
bool $wordTransliteration,
int $wordLengthMin,
int $wordLengthMax,
@ -326,7 +314,6 @@ class TorrentService @@ -326,7 +314,6 @@ class TorrentService
$extractInfoHashV2,
$extractSource,
$extractComment,
$wordTransliteration,
$wordLengthMin,
$wordLengthMax
),
@ -623,7 +610,6 @@ class TorrentService @@ -623,7 +610,6 @@ class TorrentService
bool $extractInfoHashV2,
bool $extractSource,
bool $extractComment,
bool $wordTransliteration,
int $wordLengthMin,
int $wordLengthMax
): void
@ -643,7 +629,6 @@ class TorrentService @@ -643,7 +629,6 @@ class TorrentService
$extractInfoHashV2,
$extractSource,
$extractComment,
$wordTransliteration,
$wordLengthMin,
$wordLengthMax
)

Loading…
Cancel
Save