diff --git a/src/helpers/cleanSearchText.ts b/src/helpers/cleanSearchText.ts index beb51fcc..6c646b70 100644 --- a/src/helpers/cleanSearchText.ts +++ b/src/helpers/cleanSearchText.ts @@ -11,29 +11,69 @@ import LatinizeMap from "../config/latinizeMap"; -const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g; +export const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g; const trimRe = /^\s+|\s$/g; +const C2L: {[k: string]: string} = { + й: 'q', + ц: 'w', + у: 'e', + к: 'r', + е: 't', + н: 'y', + г: 'u', + ш: 'i', + щ: 'o', + з: 'p', + х: '[', + ъ: ']', + ф: 'a', + ы: 's', + в: 'd', + а: 'f', + п: 'g', + р: 'h', + о: 'j', + л: 'k', + д: 'l', + ж: ';', + э: '\'', + я: 'z', + ч: 'x', + с: 'c', + м: 'v', + и: 'b', + т: 'n', + ь: 'm', + б: ',', + ю: '.', + '.': '/' +}; + export function clearBadCharsAndTrim(text: string) { return text.replace(badCharsRe, '').replace(trimRe, ''); } +export function fixCyrillic(text: string) { + return text.toLowerCase().replace(/[\wа-я]/g, (ch) => { + const latinizeCh = C2L[ch]; + return latinizeCh ?? ch; + }); +} + export function latinizeString(text: string) { return text.replace(/[^A-Za-z0-9]/g, (ch) => { const latinizeCh = LatinizeMap[ch]; - return latinizeCh !== undefined ? latinizeCh : ch; + return latinizeCh ?? ch; }); } export default function cleanSearchText(text: string, latinize = true) { - const hasTag = text.charAt(0) === '%'; - text = clearBadCharsAndTrim(text); - if(latinize) text = latinizeString(text); - - text = text.toLowerCase(); - if(hasTag) text = '%' + text; - - return text; + return processSearchText(text, { + clearBadChars: true, + latinize, + ignoreCase: true + }); } export type ProcessSearchTextOptions = Partial<{ @@ -45,9 +85,11 @@ export type ProcessSearchTextOptions = Partial<{ export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) { const hasTag = options.includeTag && text.charAt(0) === '%'; + const originalText = text; if(options.clearBadChars) text = clearBadCharsAndTrim(text); if(options.latinize) text = latinizeString(text); if(options.ignoreCase) text = text.toLowerCase(); if(hasTag) text = '%' + text; + if(options.latinize) text += '\x01' + fixCyrillic(originalText); return text; } diff --git a/src/lib/searchIndex.ts b/src/lib/searchIndex.ts index 2311b722..9292efa8 100644 --- a/src/lib/searchIndex.ts +++ b/src/lib/searchIndex.ts @@ -10,6 +10,7 @@ */ import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText'; +import flatten from '../helpers/array/flatten'; export default class SearchIndex { private fullTexts: Map = new Map(); @@ -49,15 +50,9 @@ export default class SearchIndex { }); */ } - public search(query: string) { - const fullTexts = this.fullTexts; - //const shortIndexes = searchIndex.shortIndexes; - - if(this.options) { - query = processSearchText(query, this.options); - } - + private _search(query: string) { const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = []; + const fullTexts = this.fullTexts; const queryWords = query.split(' '); const queryWordsLength = queryWords.length; fullTexts.forEach((fullText, what) => { @@ -66,7 +61,7 @@ export default class SearchIndex { for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found const word = queryWords[i]; const idx = fullText.indexOf(word); - if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' ')) { // * search only from word beginning + if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' '/* && !badCharsRe.test(fullText[idx - 1]) */)) { // * search only from word beginning found = false; break; } @@ -83,39 +78,21 @@ export default class SearchIndex { } }); - newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars); - - //newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText)); - const newFoundObjs2: Set = new Set(newFoundObjs.map(o => o.what)); + return newFoundObjs; + } - /* const queryWords = query.split(' '); - let foundArr: number[]; - for(let i = 0; i < queryWords.length; i++) { - const newFound = shortIndexes[queryWords[i].substr(0, 3)]; - if(!newFound) { - foundArr = []; - break; - } - - if(foundArr === undefined || foundArr.length > newFound.length) { - foundArr = newFound; - } + public search(query: string) { + if(this.options) { + query = processSearchText(query, this.options); } - for(let j = 0; j < foundArr.length; j++) { - let found = true; - let searchText = fullTexts[foundArr[j]]; - for(let i = 0; i < queryWords.length; i++) { - if(searchText.indexOf(queryWords[i]) === -1) { - found = false; - break; - } - } + const queries = query.split('\x01'); + const newFoundObjs = flatten(queries.map(query => this._search(query))); - if(found) { - newFoundObjs[foundArr[j]] = true; - } - } */ + newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars); + + //newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText)); + const newFoundObjs2: Set = new Set(newFoundObjs.map(o => o.what)); return newFoundObjs2; }