diff --git a/js/mobile_abstract.js b/js/mobile_abstract.js index d37ef37..a2406ea 100644 --- a/js/mobile_abstract.js +++ b/js/mobile_abstract.js @@ -550,8 +550,8 @@ function filterLang(string) { langFilterPass = (langFilterMode === 'whitelist') ? false : true; langFilterReason = polyglot.t('this doesnt contain that', {'this': polyglot.t(langFilterMode), 'that': polyglot.t('language of this')}); - // before detection attempts we cut out any mentions and links, and replace _ with space - langFilterSubj = string.replace(/@\S\w*|https?:\/\/\S*|twist:\S\S\S\S\S\S\S\S\S\S\S=/g, '').replace(/_+/g, ' ') + // before detection attempts we cut out any mentions, links and /me directives and replace _ with space + langFilterSubj = string.replace(/@\S\w*|https?:\/\/\S*|twist:\S\S\S\S\S\S\S\S\S\S\S=|^\/me\s/g, '').replace(/_+/g, ' ') // cut out common frequently used words FIXME I believe there is a list of similar international stuff somewhere outside which is waiting for us, we should just find it .replace(/\btwister|tox|github|linux|ubuntu|debian|windows|google|twitter|facebook|microsoft|ping|pong|email|javascript\b/ig, '') // replace zero-width word boundaries, such as between letters from different alphabets [or other symbols], with spaces