Browse Source

implement transliteration word forms in search #33

main
ghost 1 year ago
parent
commit
997666ab8e
  1. 17
      .env
  2. 1
      README.md
  3. 1
      composer.json
  4. 45
      composer.lock
  5. 13
      config/services.yaml
  6. 28
      src/Controller/TorrentController.php
  7. 82
      src/Repository/TorrentRepository.php
  8. 15
      src/Service/TorrentService.php

17
.env

@ -92,28 +92,25 @@ APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker
APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1 APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1
# Enable search index for torrent name # Enable search index for torrent name
APP_INDEX_TORRENT_NAME=1 APP_INDEX_TORRENT_NAME_ENABLED=1
# Enable search index for torrent info hash v1 # Enable search index for torrent info hash v1
APP_INDEX_TORRENT_HASH_V1=1 APP_INDEX_TORRENT_HASH_V1_ENABLED=1
# Enable search index for torrent info hash v2 # Enable search index for torrent info hash v2
APP_INDEX_TORRENT_HASH_V2=1 APP_INDEX_TORRENT_HASH_V2_ENABLED=1
# Enable search index for torrent filenames # Enable search index for torrent filenames
APP_INDEX_TORRENT_FILENAMES=1 APP_INDEX_TORRENT_FILENAMES_ENABLED=1
# Enable search index for torrent source # Enable search index for torrent source
APP_INDEX_TORRENT_SOURCE=1 APP_INDEX_TORRENT_SOURCE_ENABLED=1
# Enable search index for torrent comment # Enable search index for torrent comment
APP_INDEX_TORRENT_COMMENT=1 APP_INDEX_TORRENT_COMMENT_ENABLED=1
# Enable search index for words length greater than N chars # Enable search index for words length greater than N chars
APP_INDEX_WORD_LENGTH_MIN=3 APP_INDEX_WORD_LENGTH_MIN=3
# Enable search index for words length not greater than N chars # Enable search index for words length not greater than N chars
APP_INDEX_WORD_LENGTH_MAX=255 APP_INDEX_WORD_LENGTH_MAX=255
# Enable search index transliteration @TODO
APP_INDEX_TRANSLITERATION=1

1
README.md

@ -108,6 +108,7 @@ git checkout -b my-pr-branch-name
* [SVG icons](https://icons.getbootstrap.com) * [SVG icons](https://icons.getbootstrap.com)
* [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer) * [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer)
* [Bencode Library](https://github.com/Rhilip/Bencode) * [Bencode Library](https://github.com/Rhilip/Bencode)
* [Transliteration Library](https://github.com/ashtokalo/php-translit)
* [Identicons](https://github.com/dmester/jdenticon-php) * [Identicons](https://github.com/dmester/jdenticon-php)
#### Support #### Support

1
composer.json

@ -9,6 +9,7 @@
"php": ">=8.1", "php": ">=8.1",
"ext-ctype": "*", "ext-ctype": "*",
"ext-iconv": "*", "ext-iconv": "*",
"ashtokalo/php-translit": "^0.2.0",
"doctrine/annotations": "^2.0", "doctrine/annotations": "^2.0",
"doctrine/doctrine-bundle": "^2.10", "doctrine/doctrine-bundle": "^2.10",
"doctrine/doctrine-migrations-bundle": "^3.2", "doctrine/doctrine-migrations-bundle": "^3.2",

45
composer.lock generated

@ -4,8 +4,51 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "3770ffcd80695bc10a22f8ece4f68d1f", "content-hash": "4d930a43cf9a80e1622029c4a4048a6b",
"packages": [ "packages": [
{
"name": "ashtokalo/php-translit",
"version": "0.2.0",
"source": {
"type": "git",
"url": "https://github.com/ashtokalo/php-translit.git",
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/ashtokalo/php-translit/zipball/8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
"reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3",
"shasum": ""
},
"require": {
"php": ">=7.0"
},
"require-dev": {
"phpunit/phpunit": "~7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"ashtokalo\\translit\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "PHP library to convert text from one script to another.",
"keywords": [
"latinization",
"romanization",
"translit",
"transliteration"
],
"support": {
"issues": "https://github.com/ashtokalo/php-translit/issues",
"source": "https://github.com/ashtokalo/php-translit/tree/0.2.0"
},
"time": "2022-09-26T09:05:24+00:00"
},
{ {
"name": "doctrine/annotations", "name": "doctrine/annotations",
"version": "2.0.1", "version": "2.0.1",

13
config/services.yaml

@ -21,15 +21,14 @@ parameters:
app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%' app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%'
app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%' app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%'
app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%' app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%'
app.index.torrent.name: '%env(APP_INDEX_TORRENT_NAME)%' app.index.torrent.name.enabled: '%env(APP_INDEX_TORRENT_NAME_ENABLED)%'
app.index.torrent.filenames: '%env(APP_INDEX_TORRENT_FILENAMES)%' app.index.torrent.filenames.enabled: '%env(APP_INDEX_TORRENT_FILENAMES_ENABLED)%'
app.index.torrent.hash.v1: '%env(APP_INDEX_TORRENT_HASH_V1)%' app.index.torrent.hash.v1.enabled: '%env(APP_INDEX_TORRENT_HASH_V1_ENABLED)%'
app.index.torrent.hash.v2: '%env(APP_INDEX_TORRENT_HASH_V2)%' app.index.torrent.hash.v2.enabled: '%env(APP_INDEX_TORRENT_HASH_V2_ENABLED)%'
app.index.torrent.source: '%env(APP_INDEX_TORRENT_SOURCE)%' app.index.torrent.source.enabled: '%env(APP_INDEX_TORRENT_SOURCE_ENABLED)%'
app.index.torrent.comment: '%env(APP_INDEX_TORRENT_COMMENT)%' app.index.torrent.comment.enabled: '%env(APP_INDEX_TORRENT_COMMENT_ENABLED)%'
app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%' app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%'
app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%' app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%'
app.index.transliteration: '%env(APP_INDEX_TRANSLITERATION)%'
services: services:
# default configuration for services in *this* file # default configuration for services in *this* file

28
src/Controller/TorrentController.php

@ -229,8 +229,6 @@ class TorrentController extends AbstractController
$activityService $activityService
); );
//
// Init request // Init request
$query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : []; $query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : [];
$page = $request->get('page') ? (int) $request->get('page') : 1; $page = $request->get('page') ? (int) $request->get('page') : 1;
@ -883,13 +881,12 @@ class TorrentController extends AbstractController
$file->getPathName(), $file->getPathName(),
(bool) $this->getParameter('app.index.torrent.name'), (bool) $this->getParameter('app.index.torrent.name.enabled'),
(bool) $this->getParameter('app.index.torrent.filenames'), (bool) $this->getParameter('app.index.torrent.filenames.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v1'), (bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v2'), (bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
(bool) $this->getParameter('app.index.torrent.source'), (bool) $this->getParameter('app.index.torrent.source.enabled'),
(bool) $this->getParameter('app.index.torrent.comment'), (bool) $this->getParameter('app.index.torrent.comment.enabled'),
(bool) $this->getParameter('app.index.transliteration'),
(int) $this->getParameter('app.index.word.length.min'), (int) $this->getParameter('app.index.word.length.min'),
(int) $this->getParameter('app.index.word.length.max'), (int) $this->getParameter('app.index.word.length.max'),
@ -2453,13 +2450,12 @@ class TorrentController extends AbstractController
{ {
// Reindex keywords // Reindex keywords
$torrentService->reindexTorrentKeywordsAll( $torrentService->reindexTorrentKeywordsAll(
(bool) $this->getParameter('app.index.torrent.name'), (bool) $this->getParameter('app.index.torrent.name.enabled'),
(bool) $this->getParameter('app.index.torrent.filenames'), (bool) $this->getParameter('app.index.torrent.filenames.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v1'), (bool) $this->getParameter('app.index.torrent.hash.v1.enabled'),
(bool) $this->getParameter('app.index.torrent.hash.v2'), (bool) $this->getParameter('app.index.torrent.hash.v2.enabled'),
(bool) $this->getParameter('app.index.torrent.source'), (bool) $this->getParameter('app.index.torrent.source.enabled'),
(bool) $this->getParameter('app.index.torrent.comment'), (bool) $this->getParameter('app.index.torrent.comment.enabled'),
(bool) $this->getParameter('app.index.transliteration'),
(int) $this->getParameter('app.index.word.length.min'), (int) $this->getParameter('app.index.word.length.min'),
(int) $this->getParameter('app.index.word.length.max') (int) $this->getParameter('app.index.word.length.max')
); );

82
src/Repository/TorrentRepository.php

@ -73,27 +73,36 @@ class TorrentRepository extends ServiceEntityRepository
int $userId, int $userId,
array $keywords, array $keywords,
array $locales, array $locales,
?bool $sensitive = null, ?bool $sensitive = null,
?bool $approved = null, ?bool $approved = null,
?bool $status = null, ?bool $status = null
): \Doctrine\ORM\QueryBuilder ): \Doctrine\ORM\QueryBuilder
{ {
$query = $this->createQueryBuilder('t'); $query = $this->createQueryBuilder('t');
if ($keywords) if ($keywords)
{ {
$andKeywords = $query->expr()->andX();
foreach ($keywords as $i => $keyword) foreach ($keywords as $i => $keyword)
{ {
$keyword = mb_strtolower($keyword); // all keywords stored in lowercase // Make query to the index case insensitive
$keyword = mb_strtolower($keyword);
$andKeywords->add("t.keywords LIKE :keyword{$i}"); // Init OR condition for each word form
$orKeywords = $query->expr()->orX();
$orKeywords->add("t.keywords LIKE :keyword{$i}");
$query->setParameter(":keyword{$i}", "%{$keyword}%"); $query->setParameter(":keyword{$i}", "%{$keyword}%");
}
$query->andWhere($andKeywords); // Generate word forms for each transliteration locale #33
foreach ($this->generateWordForms($keyword) as $j => $wordForm)
{
$orKeywords->add("t.keywords LIKE :keyword{$i}{$j}");
$query->setParameter(":keyword{$i}{$j}", "%{$wordForm}%");
}
// Append AND condition
$query->andWhere($orKeywords);
}
} }
if ($locales) if ($locales)
@ -153,4 +162,59 @@ class TorrentRepository extends ServiceEntityRepository
return $query; return $query;
} }
// Word forms generator to improve search results
// e.g. transliteration rules for latin filenames
private function generateWordForms(
string $keyword,
// #33 supported locales:
// https://github.com/ashtokalo/php-translit
array $transliteration = [
'be',
'bg',
'el',
'hy',
'kk',
'mk',
'ru',
'ka',
'uk'
],
// Additional char forms
array $charForms =
[
'c' => 'k',
'k' => 'c',
]
): array
{
$wordForms = [];
// Apply transliteration
foreach ($transliteration as $locale)
{
$wordForms[] = \ashtokalo\translit\Translit::object()->convert(
$keyword,
$locale
);
}
// Apply char forms
foreach ($wordForms as $wordForm)
{
foreach ($charForms as $from => $to)
{
$wordForms[] = str_replace(
$from,
$to,
$wordForm
);
}
}
// Remove duplicates
return array_unique(
$wordForms
);
}
} }

15
src/Service/TorrentService.php

@ -64,7 +64,6 @@ class TorrentService
public function generateTorrentKeywordsByString( public function generateTorrentKeywordsByString(
string $string, string $string,
bool $transliteration,
int $wordLengthMin, int $wordLengthMin,
int $wordLengthMax, int $wordLengthMax,
): array ): array
@ -97,11 +96,6 @@ class TorrentService
{ {
// Apply case insensitive search conversion // Apply case insensitive search conversion
$words[$key] = mb_strtolower($value); $words[$key] = mb_strtolower($value);
if ($transliteration)
{
// @TODO
}
} }
} }
@ -129,7 +123,6 @@ class TorrentService
bool $extractSource, bool $extractSource,
bool $extractComment, bool $extractComment,
bool $wordTransliteration,
int $wordLengthMin, int $wordLengthMin,
int $wordLengthMax int $wordLengthMax
@ -147,7 +140,6 @@ class TorrentService
$keywords, $keywords,
$this->generateTorrentKeywordsByString( $this->generateTorrentKeywordsByString(
$name, $name,
$wordTransliteration,
$wordLengthMin, $wordLengthMin,
$wordLengthMax $wordLengthMax
) )
@ -163,7 +155,6 @@ class TorrentService
$keywords, $keywords,
$this->generateTorrentKeywordsByString( $this->generateTorrentKeywordsByString(
$list['path'], $list['path'],
$wordTransliteration,
$wordLengthMin, $wordLengthMin,
$wordLengthMax $wordLengthMax
) )
@ -179,7 +170,6 @@ class TorrentService
$keywords, $keywords,
$this->generateTorrentKeywordsByString( $this->generateTorrentKeywordsByString(
$source, $source,
$wordTransliteration,
$wordLengthMin, $wordLengthMin,
$wordLengthMax $wordLengthMax
) )
@ -195,7 +185,6 @@ class TorrentService
$keywords, $keywords,
$this->generateTorrentKeywordsByString( $this->generateTorrentKeywordsByString(
$comment, $comment,
$wordTransliteration,
$wordLengthMin, $wordLengthMin,
$wordLengthMax $wordLengthMax
) )
@ -301,7 +290,6 @@ class TorrentService
bool $extractSource, bool $extractSource,
bool $extractComment, bool $extractComment,
bool $wordTransliteration,
int $wordLengthMin, int $wordLengthMin,
int $wordLengthMax, int $wordLengthMax,
@ -326,7 +314,6 @@ class TorrentService
$extractInfoHashV2, $extractInfoHashV2,
$extractSource, $extractSource,
$extractComment, $extractComment,
$wordTransliteration,
$wordLengthMin, $wordLengthMin,
$wordLengthMax $wordLengthMax
), ),
@ -623,7 +610,6 @@ class TorrentService
bool $extractInfoHashV2, bool $extractInfoHashV2,
bool $extractSource, bool $extractSource,
bool $extractComment, bool $extractComment,
bool $wordTransliteration,
int $wordLengthMin, int $wordLengthMin,
int $wordLengthMax int $wordLengthMax
): void ): void
@ -643,7 +629,6 @@ class TorrentService
$extractInfoHashV2, $extractInfoHashV2,
$extractSource, $extractSource,
$extractComment, $extractComment,
$wordTransliteration,
$wordLengthMin, $wordLengthMin,
$wordLengthMax $wordLengthMax
) )

Loading…
Cancel
Save