From c7c5d7340cda57a8f49b4e8bc3340d169e8d6fc7 Mon Sep 17 00:00:00 2001 From: ghost Date: Fri, 27 Oct 2023 21:22:16 +0300 Subject: [PATCH] implement additional torrent fields index, add indexer configuration #31 --- .env | 29 +++- config/services.yaml | 9 ++ src/Controller/TorrentController.php | 24 ++- src/Service/TorrentService.php | 226 ++++++++++++++++++++++----- 4 files changed, 246 insertions(+), 42 deletions(-) diff --git a/.env b/.env index af22220..5afd327 100644 --- a/.env +++ b/.env @@ -89,4 +89,31 @@ APP_TORRENT_FILE_SIZE_MAX=1024000 # Store wanted torrent files in /app/var/ftp by /app/crontab/torrent/scrape/{key} APP_TORRENT_WANTED_FTP_ENABLED=1 APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker -APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1 \ No newline at end of file +APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1 + +# Enable search index for torrent name +APP_INDEX_TORRENT_NAME=1 + +# Enable search index for torrent info hash v1 +APP_INDEX_TORRENT_HASH_V1=1 + +# Enable search index for torrent info hash v2 +APP_INDEX_TORRENT_HASH_V2=1 + +# Enable search index for torrent filenames +APP_INDEX_TORRENT_FILENAMES=1 + +# Enable search index for torrent source +APP_INDEX_TORRENT_SOURCE=1 + +# Enable search index for torrent comment +APP_INDEX_TORRENT_COMMENT=1 + +# Enable search index for words length greater than N chars +APP_INDEX_WORD_LENGTH_MIN=3 + +# Enable search index for words length not greater than N chars +APP_INDEX_WORD_LENGTH_MAX=255 + +# Enable search index transliteration @TODO +APP_INDEX_TRANSLITERATION=1 \ No newline at end of file diff --git a/config/services.yaml b/config/services.yaml index e53a8b1..a63214a 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -21,6 +21,15 @@ parameters: app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%' app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%' app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%' + app.index.torrent.name: '%env(APP_INDEX_TORRENT_NAME)%' + app.index.torrent.filenames: '%env(APP_INDEX_TORRENT_FILENAMES)%' + app.index.torrent.hash.v1: '%env(APP_INDEX_TORRENT_HASH_V1)%' + app.index.torrent.hash.v2: '%env(APP_INDEX_TORRENT_HASH_V2)%' + app.index.torrent.source: '%env(APP_INDEX_TORRENT_SOURCE)%' + app.index.torrent.comment: '%env(APP_INDEX_TORRENT_COMMENT)%' + app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%' + app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%' + app.index.transliteration: '%env(APP_INDEX_TRANSLITERATION)%' services: # default configuration for services in *this* file diff --git a/src/Controller/TorrentController.php b/src/Controller/TorrentController.php index a775048..713f020 100644 --- a/src/Controller/TorrentController.php +++ b/src/Controller/TorrentController.php @@ -880,7 +880,19 @@ class TorrentController extends AbstractController { // Save data $torrent = $torrentService->add( + $file->getPathName(), + + (bool) $this->getParameter('app.index.torrent.name'), + (bool) $this->getParameter('app.index.torrent.filenames'), + (bool) $this->getParameter('app.index.torrent.hash.v1'), + (bool) $this->getParameter('app.index.torrent.hash.v2'), + (bool) $this->getParameter('app.index.torrent.source'), + (bool) $this->getParameter('app.index.torrent.comment'), + (bool) $this->getParameter('app.index.transliteration'), + (int) $this->getParameter('app.index.word.length.min'), + (int) $this->getParameter('app.index.word.length.max'), + $user->getId(), time(), (array) $locales, @@ -2440,7 +2452,17 @@ class TorrentController extends AbstractController ): Response { // Reindex keywords - $torrentService->reindexTorrentKeywordsAll(); + $torrentService->reindexTorrentKeywordsAll( + (bool) $this->getParameter('app.index.torrent.name'), + (bool) $this->getParameter('app.index.torrent.filenames'), + (bool) $this->getParameter('app.index.torrent.hash.v1'), + (bool) $this->getParameter('app.index.torrent.hash.v2'), + (bool) $this->getParameter('app.index.torrent.source'), + (bool) $this->getParameter('app.index.torrent.comment'), + (bool) $this->getParameter('app.index.transliteration'), + (int) $this->getParameter('app.index.word.length.min'), + (int) $this->getParameter('app.index.word.length.max') + ); // Render response return new Response(); // @TODO diff --git a/src/Service/TorrentService.php b/src/Service/TorrentService.php index b7129fe..7546e1b 100644 --- a/src/Service/TorrentService.php +++ b/src/Service/TorrentService.php @@ -62,63 +62,167 @@ class TorrentService ); } + public function generateTorrentKeywordsByString( + string $string, + bool $transliteration, + int $wordLengthMin, + int $wordLengthMax, + ): array + { + $words = explode( + ' ', + preg_replace( + '/[\s]+/', + ' ', + preg_replace( + '/[\W_]+/u', + ' ', + $string + ) + ) + ); + + // Apply words filter + foreach ((array) $words as $key => $value) + { + // Apply word length filter + $length = mb_strlen($value); + + if ($length < $wordLengthMin || $length > $wordLengthMax) + { + unset($words[$key]); + } + + else + { + // Apply case insensitive search conversion + $words[$key] = mb_strtolower($value); + + if ($transliteration) + { + // @TODO + } + } + } + + // Build simple array + $keywords = []; + foreach ((array) $words as $word) + { + $keywords[] = $word; + } + + // Return unique keywords + return array_unique( + $keywords + ); + } + public function generateTorrentKeywordsByTorrentFilepath( + string $filepath, - int $minLength = 3 + + bool $extractName, + bool $extractFilenames, + bool $extractInfoHashV1, + bool $extractInfoHashV2, + bool $extractSource, + bool $extractComment, + + bool $wordTransliteration, + int $wordLengthMin, + int $wordLengthMax + ): array { $keywords = []; if ($file = $this->readTorrentFileByFilepath($filepath)) { - foreach ($file->getFileList() as $list) + if ($extractName) { - $words = explode( - ' ', - preg_replace( - '/[\s]+/', - ' ', - preg_replace( - '/[\W_]+/u', - ' ', - $list['path'] + if ($name = $file->getName(false)) + { + $keywords = array_merge( + $keywords, + $this->generateTorrentKeywordsByString( + $name, + $wordTransliteration, + $wordLengthMin, + $wordLengthMax ) - ) - ); + ); + } + } - foreach ($words as $key => $value) + if ($extractFilenames) + { + foreach ($file->getFileList() as $list) { - if (mb_strlen($value) < $minLength) - { - unset($words[$key]); - } - - else - { - $words[$key] = mb_strtolower($value); - } + $keywords = array_merge( + $keywords, + $this->generateTorrentKeywordsByString( + $list['path'], + $wordTransliteration, + $wordLengthMin, + $wordLengthMax + ) + ); } + } - if ($hash = $file->getInfoHashV1(false)) + if ($extractSource) + { + if ($source = $file->getSource(false)) { - $keywords[] = $hash; + $keywords = array_merge( + $keywords, + $this->generateTorrentKeywordsByString( + $source, + $wordTransliteration, + $wordLengthMin, + $wordLengthMax + ) + ); } + } - if ($hash = $file->getInfoHashV2(false)) + if ($extractComment) + { + if ($comment = $file->getComment(false)) { - $keywords[] = $hash; + $keywords = array_merge( + $keywords, + $this->generateTorrentKeywordsByString( + $comment, + $wordTransliteration, + $wordLengthMin, + $wordLengthMax + ) + ); } + } - if ($name = $file->getName(false)) + if ($extractInfoHashV1) + { + if ($hash = $file->getInfoHashV1(false)) { - $keywords[] = $name; + $keywords[] = $hash; } + } - $keywords = array_merge($keywords, $words); + if ($extractInfoHashV2) + { + if ($hash = $file->getInfoHashV2(false)) + { + $keywords[] = $hash; + } } } - return array_unique($keywords); + return array_unique( + $keywords + ); } public function getStorageFilepathByTorrentId(int $torrentId): string @@ -187,13 +291,27 @@ class TorrentService } public function add( + string $filepath, - int $userId, - int $added, - array $locales, - bool $sensitive, - bool $approved, - bool $status + + bool $extractName, + bool $extractFilenames, + bool $extractInfoHashV1, + bool $extractInfoHashV2, + bool $extractSource, + bool $extractComment, + + bool $wordTransliteration, + int $wordLengthMin, + int $wordLengthMax, + + int $userId, + int $added, + array $locales, + bool $sensitive, + bool $approved, + bool $status + ): ?Torrent { $torrent = $this->addTorrent( @@ -201,7 +319,16 @@ class TorrentService $added, md5_file($filepath), $this->generateTorrentKeywordsByTorrentFilepath( - $filepath + $filepath, + $extractName, + $extractFilenames, + $extractInfoHashV1, + $extractInfoHashV2, + $extractSource, + $extractComment, + $wordTransliteration, + $wordLengthMin, + $wordLengthMax ), $locales, $sensitive, @@ -489,7 +616,17 @@ class TorrentService } } - public function reindexTorrentKeywordsAll(): void + public function reindexTorrentKeywordsAll( + bool $extractName, + bool $extractFilenames, + bool $extractInfoHashV1, + bool $extractInfoHashV2, + bool $extractSource, + bool $extractComment, + bool $wordTransliteration, + int $wordLengthMin, + int $wordLengthMax + ): void { foreach ($this->entityManagerInterface ->getRepository(Torrent::class) @@ -499,7 +636,16 @@ class TorrentService $this->generateTorrentKeywordsByTorrentFilepath( $this->getStorageFilepathByTorrentId( $torrent->getId() - ) + ), + $extractName, + $extractFilenames, + $extractInfoHashV1, + $extractInfoHashV2, + $extractSource, + $extractComment, + $wordTransliteration, + $wordLengthMin, + $wordLengthMax ) );