From ca92db88264fc19e7fae58b447ae2a5306437688 Mon Sep 17 00:00:00 2001 From: ghost Date: Sat, 28 Oct 2023 01:00:00 +0300 Subject: [PATCH] add transliteration support in search requests #13 --- README.md | 1 + composer.json | 3 +- src/library/filter.php | 62 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 474b4ad..f172535 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,7 @@ See also: [SQLite tree](https://github.com/YGGverse/YGGo/tree/sqliteway) * Engine sources [MIT License](https://github.com/YGGverse/YGGo/blob/main/LICENSE) * Home page animation by [alvarotrigo](https://codepen.io/alvarotrigo/pen/GRvYNax) * CLI logo by [patorjk.com](https://patorjk.com/software/taag/#p=display&f=Slant&t=YGGo!) +* Transliteration by [php-translit](https://github.com/ashtokalo/php-translit) * Identicons by [jdenticon](https://github.com/dmester/jdenticon-php) #### Feedback diff --git a/composer.json b/composer.json index a1b4111..af3b2e8 100644 --- a/composer.json +++ b/composer.json @@ -8,7 +8,8 @@ "yggverse/cache": ">=0.3.0", "symfony/dom-crawler": "^6.3", "symfony/css-selector": "^6.3", - "jdenticon/jdenticon": "^1.0" + "jdenticon/jdenticon": "^1.0", + "ashtokalo/php-translit": "^0.2.0" }, "license": "MIT", "autoload": { diff --git a/src/library/filter.php b/src/library/filter.php index d066b80..fcee673 100644 --- a/src/library/filter.php +++ b/src/library/filter.php @@ -113,7 +113,12 @@ class Filter { if (mb_strlen($word) > 1) { - $words[] = sprintf('(%s*)', $word); + $words[] = sprintf('%s*', $word); + + foreach (self::_generateWordForms($word) as $wordForm) + { + $words[] = sprintf('(*%s*)', $wordForm); + } } } @@ -132,4 +137,59 @@ class Filter { return $texts[(($number % 100) > 4 && ($number % 100) < 20) ? 2 : $cases[min($number % 10, 5)]]; } + + // Word forms generator to improve search results + // e.g. transliteration rules for latin filenames + private static function _generateWordForms( + string $keyword, + // #13 supported locales: + // https://github.com/ashtokalo/php-translit + array $transliteration = [ + 'be', + 'bg', + 'el', + 'hy', + 'kk', + 'mk', + 'ru', + 'ka', + 'uk' + ], + // Additional char forms + array $charForms = + [ + 'c' => 'k', + 'k' => 'c', + ] + ): array + { + $wordForms = []; + + // Apply transliteration + foreach ($transliteration as $locale) + { + $wordForms[] = \ashtokalo\translit\Translit::object()->convert( + $keyword, + $locale + ); + } + + // Apply char forms + foreach ($wordForms as $wordForm) + { + foreach ($charForms as $from => $to) + { + $wordForms[] = str_replace( + $from, + $to, + $wordForm + ); + } + } + + // Remove duplicates + return array_unique( + $wordForms + ); + } } \ No newline at end of file