From c149e8bbc1f6c4a608aaf391cf92acbaf37a795c Mon Sep 17 00:00:00 2001 From: Simon Grim Date: Tue, 12 May 2015 20:38:41 +0500 Subject: [PATCH] fixes of RegExps and behaviour of htmlFormatMsg() and slightly filterLang() --- js/mobile_abstract.js | 4 +-- js/twister_formatpost.js | 55 +++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/js/mobile_abstract.js b/js/mobile_abstract.js index 2c1bcb6..bc165c1 100644 --- a/js/mobile_abstract.js +++ b/js/mobile_abstract.js @@ -506,13 +506,13 @@ function filterLang(string) { // before detection attempts we cut out any mentions and links, and replace _ with space langFilterSubj = string.replace(/@\S\w*|https?:\/\/\S*/g, '').replace(/_+/g, ' ') // cut out common frequently used words FIXME I believe there is a list of similar international stuff somewhere outside which is waiting for us, we should just find it - .replace(/\btwister|github|google|twitter\b/g, '') + .replace(/\btwister|tox|github|linux|ubuntu|debian|windows|google|twitter|facebook|microsoft|ping|pong|email|javascript\b/ig, '') // replace zero-width word boundaries, such as between letters from different alphabets [or other symbols], with spaces // FIXME not so good idea because 'Za pomocą białej listy' may turn into 'Za pomoc ą bia ł ej listy' for e.g. // FIXME but first one was recognized as 'hrv' and second as 'pol' and you know it's 'pol' actually .replace(/\b/g, ' ') // cut out some more symbols - .replace(/[#\[\]\(\)\{\}\-\+\=\^\:\;\\\/0-9]/g, '') + .replace(/[#<>\.,:;\?\!\*\[\]\(\)\{\}\-\+\=\^\\\/0-9\u201C\u201D\u2026\u2014\u4E00\u3002\uFF0C\uFF1A\uFF1F\uFF01\u3010\u3011]/g, '') // unicode escaped stuff is '“”…—一。,:?!【】' // clear unwanted spaces .replace(/\s+/g, ' ').trim(); diff --git a/js/twister_formatpost.js b/js/twister_formatpost.js index 45c4f65..c962a71 100644 --- a/js/twister_formatpost.js +++ b/js/twister_formatpost.js @@ -183,60 +183,73 @@ function htmlFormatMsg(msg, mentions) { function htmlMention(str, pre) { str = str.replace(new RegExp(['^', pre, '@'].join('')), '').toLowerCase(); - mentions.push(str); // FIXME feel the scope + mentions.push(str); // FIXME feel the pain of the scope chain // FIXME we're trying to not interact with DOM, coz' we want to run really fast [to hell of RegExps] // FIXME actually we should avoid it by dropping a template idea and construct html right here - return $('#msg-user-link-template')[0].outerHTML + html.push($('#msg-user-link-template')[0].outerHTML .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') - .replace(/]*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))/ig, [pre, ']*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))/ig, [']*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1@', str, '$2'].join('')) // $().closest('a.open-profile-modal').text('@'+username) - ; + ); + + return ['>', html.length - 1, '<'].join(''); } function htmlHashtag(str, pre) { str = str.replace(new RegExp(['^', pre, '#'].join('')), ''); - return $('#hashtag-link-template')[0].outerHTML + html.push($('#hashtag-link-template')[0].outerHTML .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') .replace(/]*?\bclass\s*=\s*"(?=[^"]*?\bopen-hashtag-modal\b))/ig, [']*?\bclass\s*=\s*"(?=[^"]*?\bopen-hashtag-modal\b))[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1#', str, '$2'].join('')) // $().closest('a.open-profile-modal').text('#'+hashtag) - ; + ); + + return ['>', html.length - 1, '<'].join(''); } function htmlHttp(str) { - return $('#external-page-link-template')[0].outerHTML + html.push($('#external-page-link-template')[0].outerHTML .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') .replace(/)[^]*?(<\/a>)/ig, ['$1', str, '$2'].join('')) // $().closest('a').text(url) - ; + ); + + return ['>', html.length - 1, '<'].join(''); } - function htmlEmail(str) { - return $('#external-page-link-template')[0].outerHTML + function htmlEmail(str, pre) { + str = str.replace(new RegExp(['^', pre].join('')), ''); + + html.push($('#external-page-link-template')[0].outerHTML .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') .replace(/)[^]*?(<\/a>)/ig, ['$1', str, '$2'].join('')) // $().closest('a').text(url) - ; + .replace(/()[^]*?(<\/a>)/ig, [pre, '$1', str, '$2'].join('')) // $().closest('a').text(url) + ); + + return ['>', html.length - 1, '<'].join(''); } function htmlSplitCounter(str) { - return ['', str, ''].join(''); + html.push(['', str, ''].join('')); + + return ['>', html.length - 1, '<'].join(''); } - msg = escapeHtmlEntities(msg) - .replace(/(^|\s|\w)@\S\w*/g, htmlMention) - .replace(/(^|\s|\w)#\S\w*/g, htmlHashtag) - .replace(/\bhttps?:\/\/\S+/ig, htmlHttp) - .replace(/\S+@\S+\.\S+/g, htmlEmail) - .replace(/\(\d{1,2}\/\d{1,2}\)$/, htmlSplitCounter) - ; + var html = []; - return _formatText(msg); + return _formatText(escapeHtmlEntities(msg) + .replace(/(^|[^\/]\B(?!\S*:\/\/\S*@))@\w+\b/g, htmlMention) + .replace(/(^|[^<\/]\B(?!\S*:\/\/\S*#))#[^#\\\/\.,:;\?\!\*\[\]\(\)\{\}\-\+\=\^\|%'"\u201C\u201D\u2026\u2014\u4E00\u3002\uFF0C\uFF1A\uFF1F\uFF01\u3010\u3011>\s]+/g, htmlHashtag) // unicode escaped stuff is '“”…—一。,:?!【】' for our chinese friends + .replace(/\bhttps?:\/\/\S[^>\s]+/ig, htmlHttp) + .replace(/([^<\/])\b(?!\S*:\/\/\S*@)\S+@\S+\.\S[^>\s]+/g, htmlEmail) + .replace(/\(\d{1,2}\/\d{1,2}\)$/, htmlSplitCounter) + .replace(/>(\d+)