Browse Source

Translate wrong cyrillic to latin and backwards

master
Eduard Kuzmenko 2 years ago
parent
commit
62e2fc927e
  1. 62
      src/helpers/cleanSearchText.ts
  2. 53
      src/lib/searchIndex.ts

62
src/helpers/cleanSearchText.ts

@ -11,29 +11,69 @@ @@ -11,29 +11,69 @@
import LatinizeMap from "../config/latinizeMap";
const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
export const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
const trimRe = /^\s+|\s$/g;
const C2L: {[k: string]: string} = {
й: 'q',
ц: 'w',
у: 'e',
к: 'r',
е: 't',
н: 'y',
г: 'u',
ш: 'i',
щ: 'o',
з: 'p',
х: '[',
ъ: ']',
ф: 'a',
ы: 's',
в: 'd',
а: 'f',
п: 'g',
р: 'h',
о: 'j',
л: 'k',
д: 'l',
ж: ';',
э: '\'',
я: 'z',
ч: 'x',
с: 'c',
м: 'v',
и: 'b',
т: 'n',
ь: 'm',
б: ',',
ю: '.',
'.': '/'
};
export function clearBadCharsAndTrim(text: string) {
return text.replace(badCharsRe, '').replace(trimRe, '');
}
export function fixCyrillic(text: string) {
return text.toLowerCase().replace(/[\wа-я]/g, (ch) => {
const latinizeCh = C2L[ch];
return latinizeCh ?? ch;
});
}
export function latinizeString(text: string) {
return text.replace(/[^A-Za-z0-9]/g, (ch) => {
const latinizeCh = LatinizeMap[ch];
return latinizeCh !== undefined ? latinizeCh : ch;
return latinizeCh ?? ch;
});
}
export default function cleanSearchText(text: string, latinize = true) {
const hasTag = text.charAt(0) === '%';
text = clearBadCharsAndTrim(text);
if(latinize) text = latinizeString(text);
text = text.toLowerCase();
if(hasTag) text = '%' + text;
return text;
return processSearchText(text, {
clearBadChars: true,
latinize,
ignoreCase: true
});
}
export type ProcessSearchTextOptions = Partial<{
@ -45,9 +85,11 @@ export type ProcessSearchTextOptions = Partial<{ @@ -45,9 +85,11 @@ export type ProcessSearchTextOptions = Partial<{
export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) {
const hasTag = options.includeTag && text.charAt(0) === '%';
const originalText = text;
if(options.clearBadChars) text = clearBadCharsAndTrim(text);
if(options.latinize) text = latinizeString(text);
if(options.ignoreCase) text = text.toLowerCase();
if(hasTag) text = '%' + text;
if(options.latinize) text += '\x01' + fixCyrillic(originalText);
return text;
}

53
src/lib/searchIndex.ts

@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
*/
import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText';
import flatten from '../helpers/array/flatten';
export default class SearchIndex<SearchWhat> {
private fullTexts: Map<SearchWhat, string> = new Map();
@ -49,15 +50,9 @@ export default class SearchIndex<SearchWhat> { @@ -49,15 +50,9 @@ export default class SearchIndex<SearchWhat> {
}); */
}
public search(query: string) {
const fullTexts = this.fullTexts;
//const shortIndexes = searchIndex.shortIndexes;
if(this.options) {
query = processSearchText(query, this.options);
}
private _search(query: string) {
const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = [];
const fullTexts = this.fullTexts;
const queryWords = query.split(' ');
const queryWordsLength = queryWords.length;
fullTexts.forEach((fullText, what) => {
@ -66,7 +61,7 @@ export default class SearchIndex<SearchWhat> { @@ -66,7 +61,7 @@ export default class SearchIndex<SearchWhat> {
for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found
const word = queryWords[i];
const idx = fullText.indexOf(word);
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' ')) { // * search only from word beginning
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' '/* && !badCharsRe.test(fullText[idx - 1]) */)) { // * search only from word beginning
found = false;
break;
}
@ -83,39 +78,21 @@ export default class SearchIndex<SearchWhat> { @@ -83,39 +78,21 @@ export default class SearchIndex<SearchWhat> {
}
});
newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars);
//newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
return newFoundObjs;
}
/* const queryWords = query.split(' ');
let foundArr: number[];
for(let i = 0; i < queryWords.length; i++) {
const newFound = shortIndexes[queryWords[i].substr(0, 3)];
if(!newFound) {
foundArr = [];
break;
}
if(foundArr === undefined || foundArr.length > newFound.length) {
foundArr = newFound;
}
public search(query: string) {
if(this.options) {
query = processSearchText(query, this.options);
}
for(let j = 0; j < foundArr.length; j++) {
let found = true;
let searchText = fullTexts[foundArr[j]];
for(let i = 0; i < queryWords.length; i++) {
if(searchText.indexOf(queryWords[i]) === -1) {
found = false;
break;
}
}
const queries = query.split('\x01');
const newFoundObjs = flatten(queries.map(query => this._search(query)));
if(found) {
newFoundObjs[foundArr[j]] = true;
}
} */
newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars);
//newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
return newFoundObjs2;
}

Loading…
Cancel
Save