Browse Source

Translate wrong cyrillic to latin and backwards

master
Eduard Kuzmenko 2 years ago
parent
commit
62e2fc927e
  1. 62
      src/helpers/cleanSearchText.ts
  2. 53
      src/lib/searchIndex.ts

62
src/helpers/cleanSearchText.ts

@ -11,29 +11,69 @@
import LatinizeMap from "../config/latinizeMap"; import LatinizeMap from "../config/latinizeMap";
const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g; export const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
const trimRe = /^\s+|\s$/g; const trimRe = /^\s+|\s$/g;
const C2L: {[k: string]: string} = {
й: 'q',
ц: 'w',
у: 'e',
к: 'r',
е: 't',
н: 'y',
г: 'u',
ш: 'i',
щ: 'o',
з: 'p',
х: '[',
ъ: ']',
ф: 'a',
ы: 's',
в: 'd',
а: 'f',
п: 'g',
р: 'h',
о: 'j',
л: 'k',
д: 'l',
ж: ';',
э: '\'',
я: 'z',
ч: 'x',
с: 'c',
м: 'v',
и: 'b',
т: 'n',
ь: 'm',
б: ',',
ю: '.',
'.': '/'
};
export function clearBadCharsAndTrim(text: string) { export function clearBadCharsAndTrim(text: string) {
return text.replace(badCharsRe, '').replace(trimRe, ''); return text.replace(badCharsRe, '').replace(trimRe, '');
} }
export function fixCyrillic(text: string) {
return text.toLowerCase().replace(/[\wа-я]/g, (ch) => {
const latinizeCh = C2L[ch];
return latinizeCh ?? ch;
});
}
export function latinizeString(text: string) { export function latinizeString(text: string) {
return text.replace(/[^A-Za-z0-9]/g, (ch) => { return text.replace(/[^A-Za-z0-9]/g, (ch) => {
const latinizeCh = LatinizeMap[ch]; const latinizeCh = LatinizeMap[ch];
return latinizeCh !== undefined ? latinizeCh : ch; return latinizeCh ?? ch;
}); });
} }
export default function cleanSearchText(text: string, latinize = true) { export default function cleanSearchText(text: string, latinize = true) {
const hasTag = text.charAt(0) === '%'; return processSearchText(text, {
text = clearBadCharsAndTrim(text); clearBadChars: true,
if(latinize) text = latinizeString(text); latinize,
ignoreCase: true
text = text.toLowerCase(); });
if(hasTag) text = '%' + text;
return text;
} }
export type ProcessSearchTextOptions = Partial<{ export type ProcessSearchTextOptions = Partial<{
@ -45,9 +85,11 @@ export type ProcessSearchTextOptions = Partial<{
export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) { export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) {
const hasTag = options.includeTag && text.charAt(0) === '%'; const hasTag = options.includeTag && text.charAt(0) === '%';
const originalText = text;
if(options.clearBadChars) text = clearBadCharsAndTrim(text); if(options.clearBadChars) text = clearBadCharsAndTrim(text);
if(options.latinize) text = latinizeString(text); if(options.latinize) text = latinizeString(text);
if(options.ignoreCase) text = text.toLowerCase(); if(options.ignoreCase) text = text.toLowerCase();
if(hasTag) text = '%' + text; if(hasTag) text = '%' + text;
if(options.latinize) text += '\x01' + fixCyrillic(originalText);
return text; return text;
} }

53
src/lib/searchIndex.ts

@ -10,6 +10,7 @@
*/ */
import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText'; import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText';
import flatten from '../helpers/array/flatten';
export default class SearchIndex<SearchWhat> { export default class SearchIndex<SearchWhat> {
private fullTexts: Map<SearchWhat, string> = new Map(); private fullTexts: Map<SearchWhat, string> = new Map();
@ -49,15 +50,9 @@ export default class SearchIndex<SearchWhat> {
}); */ }); */
} }
public search(query: string) { private _search(query: string) {
const fullTexts = this.fullTexts;
//const shortIndexes = searchIndex.shortIndexes;
if(this.options) {
query = processSearchText(query, this.options);
}
const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = []; const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = [];
const fullTexts = this.fullTexts;
const queryWords = query.split(' '); const queryWords = query.split(' ');
const queryWordsLength = queryWords.length; const queryWordsLength = queryWords.length;
fullTexts.forEach((fullText, what) => { fullTexts.forEach((fullText, what) => {
@ -66,7 +61,7 @@ export default class SearchIndex<SearchWhat> {
for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found
const word = queryWords[i]; const word = queryWords[i];
const idx = fullText.indexOf(word); const idx = fullText.indexOf(word);
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' ')) { // * search only from word beginning if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' '/* && !badCharsRe.test(fullText[idx - 1]) */)) { // * search only from word beginning
found = false; found = false;
break; break;
} }
@ -83,39 +78,21 @@ export default class SearchIndex<SearchWhat> {
} }
}); });
newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars); return newFoundObjs;
}
//newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
/* const queryWords = query.split(' '); public search(query: string) {
let foundArr: number[]; if(this.options) {
for(let i = 0; i < queryWords.length; i++) { query = processSearchText(query, this.options);
const newFound = shortIndexes[queryWords[i].substr(0, 3)];
if(!newFound) {
foundArr = [];
break;
}
if(foundArr === undefined || foundArr.length > newFound.length) {
foundArr = newFound;
}
} }
for(let j = 0; j < foundArr.length; j++) { const queries = query.split('\x01');
let found = true; const newFoundObjs = flatten(queries.map(query => this._search(query)));
let searchText = fullTexts[foundArr[j]];
for(let i = 0; i < queryWords.length; i++) {
if(searchText.indexOf(queryWords[i]) === -1) {
found = false;
break;
}
}
if(found) { newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars);
newFoundObjs[foundArr[j]] = true;
} //newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
} */ const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
return newFoundObjs2; return newFoundObjs2;
} }

Loading…
Cancel
Save