Translate wrong cyrillic to latin and backwards
This commit is contained in:
parent
af3a7ee341
commit
62e2fc927e
@ -11,29 +11,69 @@
|
||||
|
||||
import LatinizeMap from "../config/latinizeMap";
|
||||
|
||||
const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
|
||||
export const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
|
||||
const trimRe = /^\s+|\s$/g;
|
||||
|
||||
const C2L: {[k: string]: string} = {
|
||||
й: 'q',
|
||||
ц: 'w',
|
||||
у: 'e',
|
||||
к: 'r',
|
||||
е: 't',
|
||||
н: 'y',
|
||||
г: 'u',
|
||||
ш: 'i',
|
||||
щ: 'o',
|
||||
з: 'p',
|
||||
х: '[',
|
||||
ъ: ']',
|
||||
ф: 'a',
|
||||
ы: 's',
|
||||
в: 'd',
|
||||
а: 'f',
|
||||
п: 'g',
|
||||
р: 'h',
|
||||
о: 'j',
|
||||
л: 'k',
|
||||
д: 'l',
|
||||
ж: ';',
|
||||
э: '\'',
|
||||
я: 'z',
|
||||
ч: 'x',
|
||||
с: 'c',
|
||||
м: 'v',
|
||||
и: 'b',
|
||||
т: 'n',
|
||||
ь: 'm',
|
||||
б: ',',
|
||||
ю: '.',
|
||||
'.': '/'
|
||||
};
|
||||
|
||||
export function clearBadCharsAndTrim(text: string) {
|
||||
return text.replace(badCharsRe, '').replace(trimRe, '');
|
||||
}
|
||||
|
||||
export function fixCyrillic(text: string) {
|
||||
return text.toLowerCase().replace(/[\wа-я]/g, (ch) => {
|
||||
const latinizeCh = C2L[ch];
|
||||
return latinizeCh ?? ch;
|
||||
});
|
||||
}
|
||||
|
||||
export function latinizeString(text: string) {
|
||||
return text.replace(/[^A-Za-z0-9]/g, (ch) => {
|
||||
const latinizeCh = LatinizeMap[ch];
|
||||
return latinizeCh !== undefined ? latinizeCh : ch;
|
||||
return latinizeCh ?? ch;
|
||||
});
|
||||
}
|
||||
|
||||
export default function cleanSearchText(text: string, latinize = true) {
|
||||
const hasTag = text.charAt(0) === '%';
|
||||
text = clearBadCharsAndTrim(text);
|
||||
if(latinize) text = latinizeString(text);
|
||||
|
||||
text = text.toLowerCase();
|
||||
if(hasTag) text = '%' + text;
|
||||
|
||||
return text;
|
||||
return processSearchText(text, {
|
||||
clearBadChars: true,
|
||||
latinize,
|
||||
ignoreCase: true
|
||||
});
|
||||
}
|
||||
|
||||
export type ProcessSearchTextOptions = Partial<{
|
||||
@ -45,9 +85,11 @@ export type ProcessSearchTextOptions = Partial<{
|
||||
|
||||
export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) {
|
||||
const hasTag = options.includeTag && text.charAt(0) === '%';
|
||||
const originalText = text;
|
||||
if(options.clearBadChars) text = clearBadCharsAndTrim(text);
|
||||
if(options.latinize) text = latinizeString(text);
|
||||
if(options.ignoreCase) text = text.toLowerCase();
|
||||
if(hasTag) text = '%' + text;
|
||||
if(options.latinize) text += '\x01' + fixCyrillic(originalText);
|
||||
return text;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
*/
|
||||
|
||||
import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText';
|
||||
import flatten from '../helpers/array/flatten';
|
||||
|
||||
export default class SearchIndex<SearchWhat> {
|
||||
private fullTexts: Map<SearchWhat, string> = new Map();
|
||||
@ -49,15 +50,9 @@ export default class SearchIndex<SearchWhat> {
|
||||
}); */
|
||||
}
|
||||
|
||||
public search(query: string) {
|
||||
const fullTexts = this.fullTexts;
|
||||
//const shortIndexes = searchIndex.shortIndexes;
|
||||
|
||||
if(this.options) {
|
||||
query = processSearchText(query, this.options);
|
||||
}
|
||||
|
||||
private _search(query: string) {
|
||||
const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = [];
|
||||
const fullTexts = this.fullTexts;
|
||||
const queryWords = query.split(' ');
|
||||
const queryWordsLength = queryWords.length;
|
||||
fullTexts.forEach((fullText, what) => {
|
||||
@ -66,7 +61,7 @@ export default class SearchIndex<SearchWhat> {
|
||||
for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found
|
||||
const word = queryWords[i];
|
||||
const idx = fullText.indexOf(word);
|
||||
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' ')) { // * search only from word beginning
|
||||
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' '/* && !badCharsRe.test(fullText[idx - 1]) */)) { // * search only from word beginning
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
@ -83,40 +78,22 @@ export default class SearchIndex<SearchWhat> {
|
||||
}
|
||||
});
|
||||
|
||||
return newFoundObjs;
|
||||
}
|
||||
|
||||
public search(query: string) {
|
||||
if(this.options) {
|
||||
query = processSearchText(query, this.options);
|
||||
}
|
||||
|
||||
const queries = query.split('\x01');
|
||||
const newFoundObjs = flatten(queries.map(query => this._search(query)));
|
||||
|
||||
newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars);
|
||||
|
||||
//newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
|
||||
const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
|
||||
|
||||
/* const queryWords = query.split(' ');
|
||||
let foundArr: number[];
|
||||
for(let i = 0; i < queryWords.length; i++) {
|
||||
const newFound = shortIndexes[queryWords[i].substr(0, 3)];
|
||||
if(!newFound) {
|
||||
foundArr = [];
|
||||
break;
|
||||
}
|
||||
|
||||
if(foundArr === undefined || foundArr.length > newFound.length) {
|
||||
foundArr = newFound;
|
||||
}
|
||||
}
|
||||
|
||||
for(let j = 0; j < foundArr.length; j++) {
|
||||
let found = true;
|
||||
let searchText = fullTexts[foundArr[j]];
|
||||
for(let i = 0; i < queryWords.length; i++) {
|
||||
if(searchText.indexOf(queryWords[i]) === -1) {
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(found) {
|
||||
newFoundObjs[foundArr[j]] = true;
|
||||
}
|
||||
} */
|
||||
|
||||
return newFoundObjs2;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user