Translate wrong cyrillic to latin and backwards
This commit is contained in:
parent
af3a7ee341
commit
62e2fc927e
@ -11,29 +11,69 @@
|
|||||||
|
|
||||||
import LatinizeMap from "../config/latinizeMap";
|
import LatinizeMap from "../config/latinizeMap";
|
||||||
|
|
||||||
const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
|
export const badCharsRe = /[`~!@#$%^&*()\-_=+\[\]\\|{}'";:\/?.>,<]+/g;
|
||||||
const trimRe = /^\s+|\s$/g;
|
const trimRe = /^\s+|\s$/g;
|
||||||
|
|
||||||
|
const C2L: {[k: string]: string} = {
|
||||||
|
й: 'q',
|
||||||
|
ц: 'w',
|
||||||
|
у: 'e',
|
||||||
|
к: 'r',
|
||||||
|
е: 't',
|
||||||
|
н: 'y',
|
||||||
|
г: 'u',
|
||||||
|
ш: 'i',
|
||||||
|
щ: 'o',
|
||||||
|
з: 'p',
|
||||||
|
х: '[',
|
||||||
|
ъ: ']',
|
||||||
|
ф: 'a',
|
||||||
|
ы: 's',
|
||||||
|
в: 'd',
|
||||||
|
а: 'f',
|
||||||
|
п: 'g',
|
||||||
|
р: 'h',
|
||||||
|
о: 'j',
|
||||||
|
л: 'k',
|
||||||
|
д: 'l',
|
||||||
|
ж: ';',
|
||||||
|
э: '\'',
|
||||||
|
я: 'z',
|
||||||
|
ч: 'x',
|
||||||
|
с: 'c',
|
||||||
|
м: 'v',
|
||||||
|
и: 'b',
|
||||||
|
т: 'n',
|
||||||
|
ь: 'm',
|
||||||
|
б: ',',
|
||||||
|
ю: '.',
|
||||||
|
'.': '/'
|
||||||
|
};
|
||||||
|
|
||||||
export function clearBadCharsAndTrim(text: string) {
|
export function clearBadCharsAndTrim(text: string) {
|
||||||
return text.replace(badCharsRe, '').replace(trimRe, '');
|
return text.replace(badCharsRe, '').replace(trimRe, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function fixCyrillic(text: string) {
|
||||||
|
return text.toLowerCase().replace(/[\wа-я]/g, (ch) => {
|
||||||
|
const latinizeCh = C2L[ch];
|
||||||
|
return latinizeCh ?? ch;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export function latinizeString(text: string) {
|
export function latinizeString(text: string) {
|
||||||
return text.replace(/[^A-Za-z0-9]/g, (ch) => {
|
return text.replace(/[^A-Za-z0-9]/g, (ch) => {
|
||||||
const latinizeCh = LatinizeMap[ch];
|
const latinizeCh = LatinizeMap[ch];
|
||||||
return latinizeCh !== undefined ? latinizeCh : ch;
|
return latinizeCh ?? ch;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function cleanSearchText(text: string, latinize = true) {
|
export default function cleanSearchText(text: string, latinize = true) {
|
||||||
const hasTag = text.charAt(0) === '%';
|
return processSearchText(text, {
|
||||||
text = clearBadCharsAndTrim(text);
|
clearBadChars: true,
|
||||||
if(latinize) text = latinizeString(text);
|
latinize,
|
||||||
|
ignoreCase: true
|
||||||
text = text.toLowerCase();
|
});
|
||||||
if(hasTag) text = '%' + text;
|
|
||||||
|
|
||||||
return text;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ProcessSearchTextOptions = Partial<{
|
export type ProcessSearchTextOptions = Partial<{
|
||||||
@ -45,9 +85,11 @@ export type ProcessSearchTextOptions = Partial<{
|
|||||||
|
|
||||||
export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) {
|
export function processSearchText(text: string, options: ProcessSearchTextOptions = {}) {
|
||||||
const hasTag = options.includeTag && text.charAt(0) === '%';
|
const hasTag = options.includeTag && text.charAt(0) === '%';
|
||||||
|
const originalText = text;
|
||||||
if(options.clearBadChars) text = clearBadCharsAndTrim(text);
|
if(options.clearBadChars) text = clearBadCharsAndTrim(text);
|
||||||
if(options.latinize) text = latinizeString(text);
|
if(options.latinize) text = latinizeString(text);
|
||||||
if(options.ignoreCase) text = text.toLowerCase();
|
if(options.ignoreCase) text = text.toLowerCase();
|
||||||
if(hasTag) text = '%' + text;
|
if(hasTag) text = '%' + text;
|
||||||
|
if(options.latinize) text += '\x01' + fixCyrillic(originalText);
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText';
|
import { processSearchText, ProcessSearchTextOptions } from '../helpers/cleanSearchText';
|
||||||
|
import flatten from '../helpers/array/flatten';
|
||||||
|
|
||||||
export default class SearchIndex<SearchWhat> {
|
export default class SearchIndex<SearchWhat> {
|
||||||
private fullTexts: Map<SearchWhat, string> = new Map();
|
private fullTexts: Map<SearchWhat, string> = new Map();
|
||||||
@ -49,15 +50,9 @@ export default class SearchIndex<SearchWhat> {
|
|||||||
}); */
|
}); */
|
||||||
}
|
}
|
||||||
|
|
||||||
public search(query: string) {
|
private _search(query: string) {
|
||||||
const fullTexts = this.fullTexts;
|
|
||||||
//const shortIndexes = searchIndex.shortIndexes;
|
|
||||||
|
|
||||||
if(this.options) {
|
|
||||||
query = processSearchText(query, this.options);
|
|
||||||
}
|
|
||||||
|
|
||||||
const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = [];
|
const newFoundObjs: Array<{fullText: string, fullTextLength: number, what: SearchWhat, foundChars: number}> = [];
|
||||||
|
const fullTexts = this.fullTexts;
|
||||||
const queryWords = query.split(' ');
|
const queryWords = query.split(' ');
|
||||||
const queryWordsLength = queryWords.length;
|
const queryWordsLength = queryWords.length;
|
||||||
fullTexts.forEach((fullText, what) => {
|
fullTexts.forEach((fullText, what) => {
|
||||||
@ -66,7 +61,7 @@ export default class SearchIndex<SearchWhat> {
|
|||||||
for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found
|
for(let i = 0; i < queryWordsLength; ++i) { // * verify that all words are found
|
||||||
const word = queryWords[i];
|
const word = queryWords[i];
|
||||||
const idx = fullText.indexOf(word);
|
const idx = fullText.indexOf(word);
|
||||||
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' ')) { // * search only from word beginning
|
if(idx === -1 || (idx !== 0 && fullText[idx - 1] !== ' '/* && !badCharsRe.test(fullText[idx - 1]) */)) { // * search only from word beginning
|
||||||
found = false;
|
found = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -83,40 +78,22 @@ export default class SearchIndex<SearchWhat> {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
return newFoundObjs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public search(query: string) {
|
||||||
|
if(this.options) {
|
||||||
|
query = processSearchText(query, this.options);
|
||||||
|
}
|
||||||
|
|
||||||
|
const queries = query.split('\x01');
|
||||||
|
const newFoundObjs = flatten(queries.map(query => this._search(query)));
|
||||||
|
|
||||||
newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars);
|
newFoundObjs.sort((a, b) => a.fullTextLength - b.fullTextLength || b.foundChars - a.foundChars);
|
||||||
|
|
||||||
//newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
|
//newFoundObjs.sort((a, b) => a.fullText.localeCompare(b.fullText));
|
||||||
const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
|
const newFoundObjs2: Set<SearchWhat> = new Set(newFoundObjs.map(o => o.what));
|
||||||
|
|
||||||
/* const queryWords = query.split(' ');
|
|
||||||
let foundArr: number[];
|
|
||||||
for(let i = 0; i < queryWords.length; i++) {
|
|
||||||
const newFound = shortIndexes[queryWords[i].substr(0, 3)];
|
|
||||||
if(!newFound) {
|
|
||||||
foundArr = [];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(foundArr === undefined || foundArr.length > newFound.length) {
|
|
||||||
foundArr = newFound;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(let j = 0; j < foundArr.length; j++) {
|
|
||||||
let found = true;
|
|
||||||
let searchText = fullTexts[foundArr[j]];
|
|
||||||
for(let i = 0; i < queryWords.length; i++) {
|
|
||||||
if(searchText.indexOf(queryWords[i]) === -1) {
|
|
||||||
found = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(found) {
|
|
||||||
newFoundObjs[foundArr[j]] = true;
|
|
||||||
}
|
|
||||||
} */
|
|
||||||
|
|
||||||
return newFoundObjs2;
|
return newFoundObjs2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user