Browse Source

fixes of RegExps and behaviour of htmlFormatMsg() and slightly filterLang()

master
Simon Grim 9 years ago
parent
commit
c149e8bbc1
  1. 4
      js/mobile_abstract.js
  2. 55
      js/twister_formatpost.js

4
js/mobile_abstract.js

@ -506,13 +506,13 @@ function filterLang(string) {
// before detection attempts we cut out any mentions and links, and replace _ with space // before detection attempts we cut out any mentions and links, and replace _ with space
langFilterSubj = string.replace(/@\S\w*|https?:\/\/\S*/g, '').replace(/_+/g, ' ') langFilterSubj = string.replace(/@\S\w*|https?:\/\/\S*/g, '').replace(/_+/g, ' ')
// cut out common frequently used words FIXME I believe there is a list of similar international stuff somewhere outside which is waiting for us, we should just find it // cut out common frequently used words FIXME I believe there is a list of similar international stuff somewhere outside which is waiting for us, we should just find it
.replace(/\btwister|github|google|twitter\b/g, '') .replace(/\btwister|tox|github|linux|ubuntu|debian|windows|google|twitter|facebook|microsoft|ping|pong|email|javascript\b/ig, '')
// replace zero-width word boundaries, such as between letters from different alphabets [or other symbols], with spaces // replace zero-width word boundaries, such as between letters from different alphabets [or other symbols], with spaces
// FIXME not so good idea because 'Za pomocą białej listy' may turn into 'Za pomoc ą bia ł ej listy' for e.g. // FIXME not so good idea because 'Za pomocą białej listy' may turn into 'Za pomoc ą bia ł ej listy' for e.g.
// FIXME but first one was recognized as 'hrv' and second as 'pol' and you know it's 'pol' actually // FIXME but first one was recognized as 'hrv' and second as 'pol' and you know it's 'pol' actually
.replace(/\b/g, ' ') .replace(/\b/g, ' ')
// cut out some more symbols // cut out some more symbols
.replace(/[#\[\]\(\)\{\}\-\+\=\^\:\;\\\/0-9]/g, '') .replace(/[#<>\.,:;\?\!\*\[\]\(\)\{\}\-\+\=\^\\\/0-9\u201C\u201D\u2026\u2014\u4E00\u3002\uFF0C\uFF1A\uFF1F\uFF01\u3010\u3011]/g, '') // unicode escaped stuff is '“”…—一。,:?!【】'
// clear unwanted spaces // clear unwanted spaces
.replace(/\s+/g, ' ').trim(); .replace(/\s+/g, ' ').trim();

55
js/twister_formatpost.js

@ -183,60 +183,73 @@ function htmlFormatMsg(msg, mentions) {
function htmlMention(str, pre) { function htmlMention(str, pre) {
str = str.replace(new RegExp(['^', pre, '@'].join('')), '').toLowerCase(); str = str.replace(new RegExp(['^', pre, '@'].join('')), '').toLowerCase();
mentions.push(str); // FIXME feel the scope mentions.push(str); // FIXME feel the pain of the scope chain
// FIXME we're trying to not interact with DOM, coz' we want to run really fast [to hell of RegExps] // FIXME we're trying to not interact with DOM, coz' we want to run really fast [to hell of RegExps]
// FIXME actually we should avoid it by dropping a template idea and construct html right here // FIXME actually we should avoid it by dropping a template idea and construct html right here
return $('#msg-user-link-template')[0].outerHTML html.push($('#msg-user-link-template')[0].outerHTML
.replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id')
//.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href')
.replace(/<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))/ig, [pre, '<a href="', $.MAL.userUrl(str), '" '].join('')) // $().closest('a.open-profile-modal').attr('href', $.MAL.userUrl(username)) .replace(/<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))/ig, ['<a href="', $.MAL.userUrl(str), '" '].join('')) // $().closest('a.open-profile-modal').attr('href', $.MAL.userUrl(username))
.replace(/(<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1@', str, '$2'].join('')) // $().closest('a.open-profile-modal').text('@'+username) .replace(/(<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-profile-modal\b))[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1@', str, '$2'].join('')) // $().closest('a.open-profile-modal').text('@'+username)
; );
return ['>', html.length - 1, '<'].join('');
} }
function htmlHashtag(str, pre) { function htmlHashtag(str, pre) {
str = str.replace(new RegExp(['^', pre, '#'].join('')), ''); str = str.replace(new RegExp(['^', pre, '#'].join('')), '');
return $('#hashtag-link-template')[0].outerHTML html.push($('#hashtag-link-template')[0].outerHTML
.replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id')
//.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href')
.replace(/<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-hashtag-modal\b))/ig, ['<a href="', $.MAL.hashtagUrl(str.toLowerCase()), '" '].join('')) // $().closest('a.open-profile-modal').attr('href', $.MAL.hashtagUrl(hashtag)) .replace(/<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-hashtag-modal\b))/ig, ['<a href="', $.MAL.hashtagUrl(str.toLowerCase()), '" '].join('')) // $().closest('a.open-profile-modal').attr('href', $.MAL.hashtagUrl(hashtag))
.replace(/(<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-hashtag-modal\b))[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1#', str, '$2'].join('')) // $().closest('a.open-profile-modal').text('#'+hashtag) .replace(/(<a\s+(?=[^>]*?\bclass\s*=\s*"(?=[^"]*?\bopen-hashtag-modal\b))[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1#', str, '$2'].join('')) // $().closest('a.open-profile-modal').text('#'+hashtag)
; );
return ['>', html.length - 1, '<'].join('');
} }
function htmlHttp(str) { function htmlHttp(str) {
return $('#external-page-link-template')[0].outerHTML html.push($('#external-page-link-template')[0].outerHTML
.replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id')
//.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href')
.replace(/<a\s+/ig, ['<a href="', proxyURL(str), '" '].join('')) // $().closest('a').attr('href', proxyURL(url)) .replace(/<a\s+/ig, ['<a href="', proxyURL(str), '" '].join('')) // $().closest('a').attr('href', proxyURL(url))
.replace(/(<a\s+[^]*?>)[^]*?(<\/a>)/ig, ['$1', str, '$2'].join('')) // $().closest('a').text(url) .replace(/(<a\s+[^]*?>)[^]*?(<\/a>)/ig, ['$1', str, '$2'].join('')) // $().closest('a').text(url)
; );
return ['>', html.length - 1, '<'].join('');
} }
function htmlEmail(str) { function htmlEmail(str, pre) {
return $('#external-page-link-template')[0].outerHTML str = str.replace(new RegExp(['^', pre].join('')), '');
html.push($('#external-page-link-template')[0].outerHTML
.replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id') .replace(/\bid\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('id')
//.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href') //.replace(/\bhref\s*=\s*"[^]*?"+/ig, '') // $().removeAttr('href')
.replace(/<a\s+/ig, ['<a href="mailto:', str.toLowerCase(), '" '].join('')) // $().closest('a').attr('href', 'mailto:'+url) .replace(/<a\s+/ig, ['<a href="mailto:', str.toLowerCase(), '" '].join('')) // $().closest('a').attr('href', 'mailto:'+url)
.replace(/(<a\s+[^]*?>)[^]*?(<\/a>)/ig, ['$1', str, '$2'].join('')) // $().closest('a').text(url) .replace(/(<a\s+[^]*?>)[^]*?(<\/a>)/ig, [pre, '$1', str, '$2'].join('')) // $().closest('a').text(url)
; );
return ['>', html.length - 1, '<'].join('');
} }
function htmlSplitCounter(str) { function htmlSplitCounter(str) {
return ['<span class="splited-post-counter">', str, '</span>'].join(''); html.push(['<span class="splited-post-counter">', str, '</span>'].join(''));
return ['>', html.length - 1, '<'].join('');
} }
msg = escapeHtmlEntities(msg) var html = [];
.replace(/(^|\s|\w)@\S\w*/g, htmlMention)
.replace(/(^|\s|\w)#\S\w*/g, htmlHashtag)
.replace(/\bhttps?:\/\/\S+/ig, htmlHttp)
.replace(/\S+@\S+\.\S+/g, htmlEmail)
.replace(/\(\d{1,2}\/\d{1,2}\)$/, htmlSplitCounter)
;
return _formatText(msg); return _formatText(escapeHtmlEntities(msg)
.replace(/(^|[^\/]\B(?!\S*:\/\/\S*@))@\w+\b/g, htmlMention)
.replace(/(^|[^<\/]\B(?!\S*:\/\/\S*#))#[^#\\\/\.,:;\?\!\*\[\]\(\)\{\}\-\+\=\^\|%'"\u201C\u201D\u2026\u2014\u4E00\u3002\uFF0C\uFF1A\uFF1F\uFF01\u3010\u3011>\s]+/g, htmlHashtag) // unicode escaped stuff is '“”…—一。,:?!【】' for our chinese friends
.replace(/\bhttps?:\/\/\S[^>\s]+/ig, htmlHttp)
.replace(/([^<\/])\b(?!\S*:\/\/\S*@)\S+@\S+\.\S[^>\s]+/g, htmlEmail)
.replace(/\(\d{1,2}\/\d{1,2}\)$/, htmlSplitCounter)
.replace(/>(\d+)</g, function(candy, core) {return html[core]})
);
} }
function proxyURL(url) { function proxyURL(url) {

Loading…
Cancel
Save