@ -40,6 +40,7 @@
@@ -40,6 +40,7 @@
# include <QXmlStreamEntityResolver>
# include <QXmlStreamReader>
# include "base/global.h"
# include "rss_article.h"
namespace
@ -55,303 +56,303 @@ namespace
@@ -55,303 +56,303 @@ namespace
// http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
static const QHash < QString , QString > HTMLEntities
{
{ " nbsp " , "   " } , // no-break space = non-breaking space, U+00A0 ISOnum
{ " iexcl " , " ¡ " } , // inverted exclamation mark, U+00A1 ISOnum
{ " cent " , " ¢ " } , // cent sign, U+00A2 ISOnum
{ " pound " , " £ " } , // pound sign, U+00A3 ISOnum
{ " curren " , " ¤ " } , // currency sign, U+00A4 ISOnum
{ " yen " , " ¥ " } , // yen sign = yuan sign, U+00A5 ISOnum
{ " brvbar " , " ¦ " } , // broken bar = broken vertical bar, U+00A6 ISOnum
{ " sect " , " § " } , // section sign, U+00A7 ISOnum
{ " uml " , " ¨ " } , // diaeresis = spacing diaeresis, U+00A8 ISOdia
{ " copy " , " © " } , // copyright sign, U+00A9 ISOnum
{ " ordf " , " ª " } , // feminine ordinal indicator, U+00AA ISOnum
{ " laquo " , " « " } , // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
{ " not " , " ¬ " } , // not sign = angled dash, U+00AC ISOnum
{ " shy " , " ­ " } , // soft hyphen = discretionary hyphen, U+00AD ISOnum
{ " reg " , " ® " } , // registered sign = registered trade mark sign, U+00AE ISOnum
{ " macr " , " ¯ " } , // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
{ " deg " , " ° " } , // degree sign, U+00B0 ISOnum
{ " plusmn " , " ± " } , // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
{ " sup2 " , " ² " } , // superscript two = superscript digit two = squared, U+00B2 ISOnum
{ " sup3 " , " ³ " } , // superscript three = superscript digit three = cubed, U+00B3 ISOnum
{ " acute " , " ´ " } , // acute accent = spacing acute, U+00B4 ISOdia
{ " micro " , " µ " } , // micro sign, U+00B5 ISOnum
{ " para " , " ¶ " } , // pilcrow sign = paragraph sign, U+00B6 ISOnum
{ " middot " , " · " } , // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
{ " cedil " , " ¸ " } , // cedilla = spacing cedilla, U+00B8 ISOdia
{ " sup1 " , " ¹ " } , // superscript one = superscript digit one, U+00B9 ISOnum
{ " ordm " , " º " } , // masculine ordinal indicator, U+00BA ISOnum
{ " raquo " , " » " } , // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
{ " frac14 " , " ¼ " } , // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
{ " frac12 " , " ½ " } , // vulgar fraction one half = fraction one half, U+00BD ISOnum
{ " frac34 " , " ¾ " } , // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
{ " iquest " , " ¿ " } , // inverted question mark = turned question mark, U+00BF ISOnum
{ " Agrave " , " À " } , // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
{ " Aacute " , " Á " } , // latin capital letter A with acute, U+00C1 ISOlat1
{ " Acirc " , " Â " } , // latin capital letter A with circumflex, U+00C2 ISOlat1
{ " Atilde " , " Ã " } , // latin capital letter A with tilde, U+00C3 ISOlat1
{ " Auml " , " Ä " } , // latin capital letter A with diaeresis, U+00C4 ISOlat1
{ " Aring " , " Å " } , // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
{ " AElig " , " Æ " } , // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
{ " Ccedil " , " Ç " } , // latin capital letter C with cedilla, U+00C7 ISOlat1
{ " Egrave " , " È " } , // latin capital letter E with grave, U+00C8 ISOlat1
{ " Eacute " , " É " } , // latin capital letter E with acute, U+00C9 ISOlat1
{ " Ecirc " , " Ê " } , // latin capital letter E with circumflex, U+00CA ISOlat1
{ " Euml " , " Ë " } , // latin capital letter E with diaeresis, U+00CB ISOlat1
{ " Igrave " , " Ì " } , // latin capital letter I with grave, U+00CC ISOlat1
{ " Iacute " , " Í " } , // latin capital letter I with acute, U+00CD ISOlat1
{ " Icirc " , " Î " } , // latin capital letter I with circumflex, U+00CE ISOlat1
{ " Iuml " , " Ï " } , // latin capital letter I with diaeresis, U+00CF ISOlat1
{ " ETH " , " Ð " } , // latin capital letter ETH, U+00D0 ISOlat1
{ " Ntilde " , " Ñ " } , // latin capital letter N with tilde, U+00D1 ISOlat1
{ " Ograve " , " Ò " } , // latin capital letter O with grave, U+00D2 ISOlat1
{ " Oacute " , " Ó " } , // latin capital letter O with acute, U+00D3 ISOlat1
{ " Ocirc " , " Ô " } , // latin capital letter O with circumflex, U+00D4 ISOlat1
{ " Otilde " , " Õ " } , // latin capital letter O with tilde, U+00D5 ISOlat1
{ " Ouml " , " Ö " } , // latin capital letter O with diaeresis, U+00D6 ISOlat1
{ " times " , " × " } , // multiplication sign, U+00D7 ISOnum
{ " Oslash " , " Ø " } , // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
{ " Ugrave " , " Ù " } , // latin capital letter U with grave, U+00D9 ISOlat1
{ " Uacute " , " Ú " } , // latin capital letter U with acute, U+00DA ISOlat1
{ " Ucirc " , " Û " } , // latin capital letter U with circumflex, U+00DB ISOlat1
{ " Uuml " , " Ü " } , // latin capital letter U with diaeresis, U+00DC ISOlat1
{ " Yacute " , " Ý " } , // latin capital letter Y with acute, U+00DD ISOlat1
{ " THORN " , " Þ " } , // latin capital letter THORN, U+00DE ISOlat1
{ " szlig " , " ß " } , // latin small letter sharp s = ess-zed, U+00DF ISOlat1
{ " agrave " , " à " } , // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
{ " aacute " , " á " } , // latin small letter a with acute, U+00E1 ISOlat1
{ " acirc " , " â " } , // latin small letter a with circumflex, U+00E2 ISOlat1
{ " atilde " , " ã " } , // latin small letter a with tilde, U+00E3 ISOlat1
{ " auml " , " ä " } , // latin small letter a with diaeresis, U+00E4 ISOlat1
{ " aring " , " å " } , // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
{ " aelig " , " æ " } , // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
{ " ccedil " , " ç " } , // latin small letter c with cedilla, U+00E7 ISOlat1
{ " egrave " , " è " } , // latin small letter e with grave, U+00E8 ISOlat1
{ " eacute " , " é " } , // latin small letter e with acute, U+00E9 ISOlat1
{ " ecirc " , " ê " } , // latin small letter e with circumflex, U+00EA ISOlat1
{ " euml " , " ë " } , // latin small letter e with diaeresis, U+00EB ISOlat1
{ " igrave " , " ì " } , // latin small letter i with grave, U+00EC ISOlat1
{ " iacute " , " í " } , // latin small letter i with acute, U+00ED ISOlat1
{ " icirc " , " î " } , // latin small letter i with circumflex, U+00EE ISOlat1
{ " iuml " , " ï " } , // latin small letter i with diaeresis, U+00EF ISOlat1
{ " eth " , " ð " } , // latin small letter eth, U+00F0 ISOlat1
{ " ntilde " , " ñ " } , // latin small letter n with tilde, U+00F1 ISOlat1
{ " ograve " , " ò " } , // latin small letter o with grave, U+00F2 ISOlat1
{ " oacute " , " ó " } , // latin small letter o with acute, U+00F3 ISOlat1
{ " ocirc " , " ô " } , // latin small letter o with circumflex, U+00F4 ISOlat1
{ " otilde " , " õ " } , // latin small letter o with tilde, U+00F5 ISOlat1
{ " ouml " , " ö " } , // latin small letter o with diaeresis, U+00F6 ISOlat1
{ " divide " , " ÷ " } , // division sign, U+00F7 ISOnum
{ " oslash " , " ø " } , // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
{ " ugrave " , " ù " } , // latin small letter u with grave, U+00F9 ISOlat1
{ " uacute " , " ú " } , // latin small letter u with acute, U+00FA ISOlat1
{ " ucirc " , " û " } , // latin small letter u with circumflex, U+00FB ISOlat1
{ " uuml " , " ü " } , // latin small letter u with diaeresis, U+00FC ISOlat1
{ " yacute " , " ý " } , // latin small letter y with acute, U+00FD ISOlat1
{ " thorn " , " þ " } , // latin small letter thorn, U+00FE ISOlat1
{ " yuml " , " ÿ " } , // latin small letter y with diaeresis, U+00FF ISOlat1
{ u " nbsp " _qs , u "   " _qs } , // no-break space = non-breaking space, U+00A0 ISOnum
{ u " iexcl " _qs , u " ¡ " _qs } , // inverted exclamation mark, U+00A1 ISOnum
{ u " cent " _qs , u " ¢ " _qs } , // cent sign, U+00A2 ISOnum
{ u " pound " _qs , u " £ " _qs } , // pound sign, U+00A3 ISOnum
{ u " curren " _qs , u " ¤ " _qs } , // currency sign, U+00A4 ISOnum
{ u " yen " _qs , u " ¥ " _qs } , // yen sign = yuan sign, U+00A5 ISOnum
{ u " brvbar " _qs , u " ¦ " _qs } , // broken bar = broken vertical bar, U+00A6 ISOnum
{ u " sect " _qs , u " § " _qs } , // section sign, U+00A7 ISOnum
{ u " uml " _qs , u " ¨ " _qs } , // diaeresis = spacing diaeresis, U+00A8 ISOdia
{ u " copy " _qs , u " © " _qs } , // copyright sign, U+00A9 ISOnum
{ u " ordf " _qs , u " ª " _qs } , // feminine ordinal indicator, U+00AA ISOnum
{ u " laquo " _qs , u " « " _qs } , // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
{ u " not " _qs , u " ¬ " _qs } , // not sign = angled dash, U+00AC ISOnum
{ u " shy " _qs , u " ­ " _qs } , // soft hyphen = discretionary hyphen, U+00AD ISOnum
{ u " reg " _qs , u " ® " _qs } , // registered sign = registered trade mark sign, U+00AE ISOnum
{ u " macr " _qs , u " ¯ " _qs } , // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
{ u " deg " _qs , u " ° " _qs } , // degree sign, U+00B0 ISOnum
{ u " plusmn " _qs , u " ± " _qs } , // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
{ u " sup2 " _qs , u " ² " _qs } , // superscript two = superscript digit two = squared, U+00B2 ISOnum
{ u " sup3 " _qs , u " ³ " _qs } , // superscript three = superscript digit three = cubed, U+00B3 ISOnum
{ u " acute " _qs , u " ´ " _qs } , // acute accent = spacing acute, U+00B4 ISOdia
{ u " micro " _qs , u " µ " _qs } , // micro sign, U+00B5 ISOnum
{ u " para " _qs , u " ¶ " _qs } , // pilcrow sign = paragraph sign, U+00B6 ISOnum
{ u " middot " _qs , u " · " _qs } , // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
{ u " cedil " _qs , u " ¸ " _qs } , // cedilla = spacing cedilla, U+00B8 ISOdia
{ u " sup1 " _qs , u " ¹ " _qs } , // superscript one = superscript digit one, U+00B9 ISOnum
{ u " ordm " _qs , u " º " _qs } , // masculine ordinal indicator, U+00BA ISOnum
{ u " raquo " _qs , u " » " _qs } , // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
{ u " frac14 " _qs , u " ¼ " _qs } , // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
{ u " frac12 " _qs , u " ½ " _qs } , // vulgar fraction one half = fraction one half, U+00BD ISOnum
{ u " frac34 " _qs , u " ¾ " _qs } , // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
{ u " iquest " _qs , u " ¿ " _qs } , // inverted question mark = turned question mark, U+00BF ISOnum
{ u " Agrave " _qs , u " À " _qs } , // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
{ u " Aacute " _qs , u " Á " _qs } , // latin capital letter A with acute, U+00C1 ISOlat1
{ u " Acirc " _qs , u " Â " _qs } , // latin capital letter A with circumflex, U+00C2 ISOlat1
{ u " Atilde " _qs , u " Ã " _qs } , // latin capital letter A with tilde, U+00C3 ISOlat1
{ u " Auml " _qs , u " Ä " _qs } , // latin capital letter A with diaeresis, U+00C4 ISOlat1
{ u " Aring " _qs , u " Å " _qs } , // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
{ u " AElig " _qs , u " Æ " _qs } , // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
{ u " Ccedil " _qs , u " Ç " _qs } , // latin capital letter C with cedilla, U+00C7 ISOlat1
{ u " Egrave " _qs , u " È " _qs } , // latin capital letter E with grave, U+00C8 ISOlat1
{ u " Eacute " _qs , u " É " _qs } , // latin capital letter E with acute, U+00C9 ISOlat1
{ u " Ecirc " _qs , u " Ê " _qs } , // latin capital letter E with circumflex, U+00CA ISOlat1
{ u " Euml " _qs , u " Ë " _qs } , // latin capital letter E with diaeresis, U+00CB ISOlat1
{ u " Igrave " _qs , u " Ì " _qs } , // latin capital letter I with grave, U+00CC ISOlat1
{ u " Iacute " _qs , u " Í " _qs } , // latin capital letter I with acute, U+00CD ISOlat1
{ u " Icirc " _qs , u " Î " _qs } , // latin capital letter I with circumflex, U+00CE ISOlat1
{ u " Iuml " _qs , u " Ï " _qs } , // latin capital letter I with diaeresis, U+00CF ISOlat1
{ u " ETH " _qs , u " Ð " _qs } , // latin capital letter ETH, U+00D0 ISOlat1
{ u " Ntilde " _qs , u " Ñ " _qs } , // latin capital letter N with tilde, U+00D1 ISOlat1
{ u " Ograve " _qs , u " Ò " _qs } , // latin capital letter O with grave, U+00D2 ISOlat1
{ u " Oacute " _qs , u " Ó " _qs } , // latin capital letter O with acute, U+00D3 ISOlat1
{ u " Ocirc " _qs , u " Ô " _qs } , // latin capital letter O with circumflex, U+00D4 ISOlat1
{ u " Otilde " _qs , u " Õ " _qs } , // latin capital letter O with tilde, U+00D5 ISOlat1
{ u " Ouml " _qs , u " Ö " _qs } , // latin capital letter O with diaeresis, U+00D6 ISOlat1
{ u " times " _qs , u " × " _qs } , // multiplication sign, U+00D7 ISOnum
{ u " Oslash " _qs , u " Ø " _qs } , // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
{ u " Ugrave " _qs , u " Ù " _qs } , // latin capital letter U with grave, U+00D9 ISOlat1
{ u " Uacute " _qs , u " Ú " _qs } , // latin capital letter U with acute, U+00DA ISOlat1
{ u " Ucirc " _qs , u " Û " _qs } , // latin capital letter U with circumflex, U+00DB ISOlat1
{ u " Uuml " _qs , u " Ü " _qs } , // latin capital letter U with diaeresis, U+00DC ISOlat1
{ u " Yacute " _qs , u " Ý " _qs } , // latin capital letter Y with acute, U+00DD ISOlat1
{ u " THORN " _qs , u " Þ " _qs } , // latin capital letter THORN, U+00DE ISOlat1
{ u " szlig " _qs , u " ß " _qs } , // latin small letter sharp s = ess-zed, U+00DF ISOlat1
{ u " agrave " _qs , u " à " _qs } , // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
{ u " aacute " _qs , u " á " _qs } , // latin small letter a with acute, U+00E1 ISOlat1
{ u " acirc " _qs , u " â " _qs } , // latin small letter a with circumflex, U+00E2 ISOlat1
{ u " atilde " _qs , u " ã " _qs } , // latin small letter a with tilde, U+00E3 ISOlat1
{ u " auml " _qs , u " ä " _qs } , // latin small letter a with diaeresis, U+00E4 ISOlat1
{ u " aring " _qs , u " å " _qs } , // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
{ u " aelig " _qs , u " æ " _qs } , // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
{ u " ccedil " _qs , u " ç " _qs } , // latin small letter c with cedilla, U+00E7 ISOlat1
{ u " egrave " _qs , u " è " _qs } , // latin small letter e with grave, U+00E8 ISOlat1
{ u " eacute " _qs , u " é " _qs } , // latin small letter e with acute, U+00E9 ISOlat1
{ u " ecirc " _qs , u " ê " _qs } , // latin small letter e with circumflex, U+00EA ISOlat1
{ u " euml " _qs , u " ë " _qs } , // latin small letter e with diaeresis, U+00EB ISOlat1
{ u " igrave " _qs , u " ì " _qs } , // latin small letter i with grave, U+00EC ISOlat1
{ u " iacute " _qs , u " í " _qs } , // latin small letter i with acute, U+00ED ISOlat1
{ u " icirc " _qs , u " î " _qs } , // latin small letter i with circumflex, U+00EE ISOlat1
{ u " iuml " _qs , u " ï " _qs } , // latin small letter i with diaeresis, U+00EF ISOlat1
{ u " eth " _qs , u " ð " _qs } , // latin small letter eth, U+00F0 ISOlat1
{ u " ntilde " _qs , u " ñ " _qs } , // latin small letter n with tilde, U+00F1 ISOlat1
{ u " ograve " _qs , u " ò " _qs } , // latin small letter o with grave, U+00F2 ISOlat1
{ u " oacute " _qs , u " ó " _qs } , // latin small letter o with acute, U+00F3 ISOlat1
{ u " ocirc " _qs , u " ô " _qs } , // latin small letter o with circumflex, U+00F4 ISOlat1
{ u " otilde " _qs , u " õ " _qs } , // latin small letter o with tilde, U+00F5 ISOlat1
{ u " ouml " _qs , u " ö " _qs } , // latin small letter o with diaeresis, U+00F6 ISOlat1
{ u " divide " _qs , u " ÷ " _qs } , // division sign, U+00F7 ISOnum
{ u " oslash " _qs , u " ø " _qs } , // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
{ u " ugrave " _qs , u " ù " _qs } , // latin small letter u with grave, U+00F9 ISOlat1
{ u " uacute " _qs , u " ú " _qs } , // latin small letter u with acute, U+00FA ISOlat1
{ u " ucirc " _qs , u " û " _qs } , // latin small letter u with circumflex, U+00FB ISOlat1
{ u " uuml " _qs , u " ü " _qs } , // latin small letter u with diaeresis, U+00FC ISOlat1
{ u " yacute " _qs , u " ý " _qs } , // latin small letter y with acute, U+00FD ISOlat1
{ u " thorn " _qs , u " þ " _qs } , // latin small letter thorn, U+00FE ISOlat1
{ u " yuml " _qs , u " ÿ " _qs } , // latin small letter y with diaeresis, U+00FF ISOlat1
// Latin Extended-A
{ " OElig " , " Œ " } , // latin capital ligature OE, U+0152 ISOlat2
{ " oelig " , " œ " } , // latin small ligature oe, U+0153 ISOlat2
{ u " OElig " _qs , u " Œ " _qs } , // latin capital ligature OE, U+0152 ISOlat2
{ u " oelig " _qs , u " œ " _qs } , // latin small ligature oe, U+0153 ISOlat2
// ligature is a misnomer, this is a separate character in some languages
{ " Scaron " , " Š " } , // latin capital letter S with caron, U+0160 ISOlat2
{ " scaron " , " š " } , // latin small letter s with caron, U+0161 ISOlat2
{ " Yuml " , " Ÿ " } , // latin capital letter Y with diaeresis, U+0178 ISOlat2
{ u " Scaron " _qs , u " Š " _qs } , // latin capital letter S with caron, U+0160 ISOlat2
{ u " scaron " _qs , u " š " _qs } , // latin small letter s with caron, U+0161 ISOlat2
{ u " Yuml " _qs , u " Ÿ " _qs } , // latin capital letter Y with diaeresis, U+0178 ISOlat2
// Spacing Modifier Letters
{ " circ " , " ˆ " } , // modifier letter circumflex accent, U+02C6 ISOpub
{ " tilde " , " ˜ " } , // small tilde, U+02DC ISOdia
{ u " circ " _qs , u " ˆ " _qs } , // modifier letter circumflex accent, U+02C6 ISOpub
{ u " tilde " _qs , u " ˜ " _qs } , // small tilde, U+02DC ISOdia
// General Punctuation
{ " ensp " , "   " } , // en space, U+2002 ISOpub
{ " emsp " , "   " } , // em space, U+2003 ISOpub
{ " thinsp " , "   " } , // thin space, U+2009 ISOpub
{ " zwnj " , " ‌ " } , // zero width non-joiner, U+200C NEW RFC 2070
{ " zwj " , " ‍ " } , // zero width joiner, U+200D NEW RFC 2070
{ " lrm " , " ‎ " } , // left-to-right mark, U+200E NEW RFC 2070
{ " rlm " , " ‏ " } , // right-to-left mark, U+200F NEW RFC 2070
{ " ndash " , " – " } , // en dash, U+2013 ISOpub
{ " mdash " , " — " } , // em dash, U+2014 ISOpub
{ " lsquo " , " ‘ " } , // left single quotation mark, U+2018 ISOnum
{ " rsquo " , " ’ " } , // right single quotation mark, U+2019 ISOnum
{ " sbquo " , " ‚ " } , // single low-9 quotation mark, U+201A NEW
{ " ldquo " , " “ " } , // left double quotation mark, U+201C ISOnum
{ " rdquo " , " ” " } , // right double quotation mark, U+201D ISOnum
{ " bdquo " , " „ " } , // double low-9 quotation mark, U+201E NEW
{ " dagger " , " † " } , // dagger, U+2020 ISOpub
{ " Dagger " , " ‡ " } , // double dagger, U+2021 ISOpub
{ " permil " , " ‰ " } , // per mille sign, U+2030 ISOtech
{ " lsaquo " , " ‹ " } , // single left-pointing angle quotation mark, U+2039 ISO proposed
{ u " ensp " _qs , u "   " _qs } , // en space, U+2002 ISOpub
{ u " emsp " _qs , u "   " _qs } , // em space, U+2003 ISOpub
{ u " thinsp " _qs , u "   " _qs } , // thin space, U+2009 ISOpub
{ u " zwnj " _qs , u " ‌ " _qs } , // zero width non-joiner, U+200C NEW RFC 2070
{ u " zwj " _qs , u " ‍ " _qs } , // zero width joiner, U+200D NEW RFC 2070
{ u " lrm " _qs , u " ‎ " _qs } , // left-to-right mark, U+200E NEW RFC 2070
{ u " rlm " _qs , u " ‏ " _qs } , // right-to-left mark, U+200F NEW RFC 2070
{ u " ndash " _qs , u " – " _qs } , // en dash, U+2013 ISOpub
{ u " mdash " _qs , u " — " _qs } , // em dash, U+2014 ISOpub
{ u " lsquo " _qs , u " ‘ " _qs } , // left single quotation mark, U+2018 ISOnum
{ u " rsquo " _qs , u " ’ " _qs } , // right single quotation mark, U+2019 ISOnum
{ u " sbquo " _qs , u " ‚ " _qs } , // single low-9 quotation mark, U+201A NEW
{ u " ldquo " _qs , u " “ " _qs } , // left double quotation mark, U+201C ISOnum
{ u " rdquo " _qs , u " ” " _qs } , // right double quotation mark, U+201D ISOnum
{ u " bdquo " _qs , u " „ " _qs } , // double low-9 quotation mark, U+201E NEW
{ u " dagger " _qs , u " † " _qs } , // dagger, U+2020 ISOpub
{ u " Dagger " _qs , u " ‡ " _qs } , // double dagger, U+2021 ISOpub
{ u " permil " _qs , u " ‰ " _qs } , // per mille sign, U+2030 ISOtech
{ u " lsaquo " _qs , u " ‹ " _qs } , // single left-pointing angle quotation mark, U+2039 ISO proposed
// lsaquo is proposed but not yet ISO standardized
{ " rsaquo " , " › " } , // single right-pointing angle quotation mark, U+203A ISO proposed
{ u " rsaquo " _qs , u " › " _qs } , // single right-pointing angle quotation mark, U+203A ISO proposed
// rsaquo is proposed but not yet ISO standardized
// Currency Symbols
{ " euro " , " € " } , // euro sign, U+20AC NEW
{ u " euro " _qs , u " € " _qs } , // euro sign, U+20AC NEW
// Latin Extended-B
{ " fnof " , " ƒ " } , // latin small letter f with hook = function = florin, U+0192 ISOtech
{ u " fnof " _qs , u " ƒ " _qs } , // latin small letter f with hook = function = florin, U+0192 ISOtech
// Greek
{ " Alpha " , " Α " } , // greek capital letter alpha, U+0391
{ " Beta " , " Β " } , // greek capital letter beta, U+0392
{ " Gamma " , " Γ " } , // greek capital letter gamma, U+0393 ISOgrk3
{ " Delta " , " Δ " } , // greek capital letter delta, U+0394 ISOgrk3
{ " Epsilon " , " Ε " } , // greek capital letter epsilon, U+0395
{ " Zeta " , " Ζ " } , // greek capital letter zeta, U+0396
{ " Eta " , " Η " } , // greek capital letter eta, U+0397
{ " Theta " , " Θ " } , // greek capital letter theta, U+0398 ISOgrk3
{ " Iota " , " Ι " } , // greek capital letter iota, U+0399
{ " Kappa " , " Κ " } , // greek capital letter kappa, U+039A
{ " Lambda " , " Λ " } , // greek capital letter lamda, U+039B ISOgrk3
{ " Mu " , " Μ " } , // greek capital letter mu, U+039C
{ " Nu " , " Ν " } , // greek capital letter nu, U+039D
{ " Xi " , " Ξ " } , // greek capital letter xi, U+039E ISOgrk3
{ " Omicron " , " Ο " } , // greek capital letter omicron, U+039F
{ " Pi " , " Π " } , // greek capital letter pi, U+03A0 ISOgrk3
{ " Rho " , " Ρ " } , // greek capital letter rho, U+03A1
{ " Sigma " , " Σ " } , // greek capital letter sigma, U+03A3 ISOgrk3
{ " Tau " , " Τ " } , // greek capital letter tau, U+03A4
{ " Upsilon " , " Υ " } , // greek capital letter upsilon, U+03A5 ISOgrk3
{ " Phi " , " Φ " } , // greek capital letter phi, U+03A6 ISOgrk3
{ " Chi " , " Χ " } , // greek capital letter chi, U+03A7
{ " Psi " , " Ψ " } , // greek capital letter psi, U+03A8 ISOgrk3
{ " Omega " , " Ω " } , // greek capital letter omega, U+03A9 ISOgrk3
{ " alpha " , " α " } , // greek small letter alpha, U+03B1 ISOgrk3
{ " beta " , " β " } , // greek small letter beta, U+03B2 ISOgrk3
{ " gamma " , " γ " } , // greek small letter gamma, U+03B3 ISOgrk3
{ " delta " , " δ " } , // greek small letter delta, U+03B4 ISOgrk3
{ " epsilon " , " ε " } , // greek small letter epsilon, U+03B5 ISOgrk3
{ " zeta " , " ζ " } , // greek small letter zeta, U+03B6 ISOgrk3
{ " eta " , " η " } , // greek small letter eta, U+03B7 ISOgrk3
{ " theta " , " θ " } , // greek small letter theta, U+03B8 ISOgrk3
{ " iota " , " ι " } , // greek small letter iota, U+03B9 ISOgrk3
{ " kappa " , " κ " } , // greek small letter kappa, U+03BA ISOgrk3
{ " lambda " , " λ " } , // greek small letter lamda, U+03BB ISOgrk3
{ " mu " , " μ " } , // greek small letter mu, U+03BC ISOgrk3
{ " nu " , " ν " } , // greek small letter nu, U+03BD ISOgrk3
{ " xi " , " ξ " } , // greek small letter xi, U+03BE ISOgrk3
{ " omicron " , " ο " } , // greek small letter omicron, U+03BF NEW
{ " pi " , " π " } , // greek small letter pi, U+03C0 ISOgrk3
{ " rho " , " ρ " } , // greek small letter rho, U+03C1 ISOgrk3
{ " sigmaf " , " ς " } , // greek small letter final sigma, U+03C2 ISOgrk3
{ " sigma " , " σ " } , // greek small letter sigma, U+03C3 ISOgrk3
{ " tau " , " τ " } , // greek small letter tau, U+03C4 ISOgrk3
{ " upsilon " , " υ " } , // greek small letter upsilon, U+03C5 ISOgrk3
{ " phi " , " φ " } , // greek small letter phi, U+03C6 ISOgrk3
{ " chi " , " χ " } , // greek small letter chi, U+03C7 ISOgrk3
{ " psi " , " ψ " } , // greek small letter psi, U+03C8 ISOgrk3
{ " omega " , " ω " } , // greek small letter omega, U+03C9 ISOgrk3
{ " thetasym " , " ϑ " } , // greek theta symbol, U+03D1 NEW
{ " upsih " , " ϒ " } , // greek upsilon with hook symbol, U+03D2 NEW
{ " piv " , " ϖ " } , // greek pi symbol, U+03D6 ISOgrk3
{ u " Alpha " _qs , u " Α " _qs } , // greek capital letter alpha, U+0391
{ u " Beta " _qs , u " Β " _qs } , // greek capital letter beta, U+0392
{ u " Gamma " _qs , u " Γ " _qs } , // greek capital letter gamma, U+0393 ISOgrk3
{ u " Delta " _qs , u " Δ " _qs } , // greek capital letter delta, U+0394 ISOgrk3
{ u " Epsilon " _qs , u " Ε " _qs } , // greek capital letter epsilon, U+0395
{ u " Zeta " _qs , u " Ζ " _qs } , // greek capital letter zeta, U+0396
{ u " Eta " _qs , u " Η " _qs } , // greek capital letter eta, U+0397
{ u " Theta " _qs , u " Θ " _qs } , // greek capital letter theta, U+0398 ISOgrk3
{ u " Iota " _qs , u " Ι " _qs } , // greek capital letter iota, U+0399
{ u " Kappa " _qs , u " Κ " _qs } , // greek capital letter kappa, U+039A
{ u " Lambda " _qs , u " Λ " _qs } , // greek capital letter lamda, U+039B ISOgrk3
{ u " Mu " _qs , u " Μ " _qs } , // greek capital letter mu, U+039C
{ u " Nu " _qs , u " Ν " _qs } , // greek capital letter nu, U+039D
{ u " Xi " _qs , u " Ξ " _qs } , // greek capital letter xi, U+039E ISOgrk3
{ u " Omicron " _qs , u " Ο " _qs } , // greek capital letter omicron, U+039F
{ u " Pi " _qs , u " Π " _qs } , // greek capital letter pi, U+03A0 ISOgrk3
{ u " Rho " _qs , u " Ρ " _qs } , // greek capital letter rho, U+03A1
{ u " Sigma " _qs , u " Σ " _qs } , // greek capital letter sigma, U+03A3 ISOgrk3
{ u " Tau " _qs , u " Τ " _qs } , // greek capital letter tau, U+03A4
{ u " Upsilon " _qs , u " Υ " _qs } , // greek capital letter upsilon, U+03A5 ISOgrk3
{ u " Phi " _qs , u " Φ " _qs } , // greek capital letter phi, U+03A6 ISOgrk3
{ u " Chi " _qs , u " Χ " _qs } , // greek capital letter chi, U+03A7
{ u " Psi " _qs , u " Ψ " _qs } , // greek capital letter psi, U+03A8 ISOgrk3
{ u " Omega " _qs , u " Ω " _qs } , // greek capital letter omega, U+03A9 ISOgrk3
{ u " alpha " _qs , u " α " _qs } , // greek small letter alpha, U+03B1 ISOgrk3
{ u " beta " _qs , u " β " _qs } , // greek small letter beta, U+03B2 ISOgrk3
{ u " gamma " _qs , u " γ " _qs } , // greek small letter gamma, U+03B3 ISOgrk3
{ u " delta " _qs , u " δ " _qs } , // greek small letter delta, U+03B4 ISOgrk3
{ u " epsilon " _qs , u " ε " _qs } , // greek small letter epsilon, U+03B5 ISOgrk3
{ u " zeta " _qs , u " ζ " _qs } , // greek small letter zeta, U+03B6 ISOgrk3
{ u " eta " _qs , u " η " _qs } , // greek small letter eta, U+03B7 ISOgrk3
{ u " theta " _qs , u " θ " _qs } , // greek small letter theta, U+03B8 ISOgrk3
{ u " iota " _qs , u " ι " _qs } , // greek small letter iota, U+03B9 ISOgrk3
{ u " kappa " _qs , u " κ " _qs } , // greek small letter kappa, U+03BA ISOgrk3
{ u " lambda " _qs , u " λ " _qs } , // greek small letter lamda, U+03BB ISOgrk3
{ u " mu " _qs , u " μ " _qs } , // greek small letter mu, U+03BC ISOgrk3
{ u " nu " _qs , u " ν " _qs } , // greek small letter nu, U+03BD ISOgrk3
{ u " xi " _qs , u " ξ " _qs } , // greek small letter xi, U+03BE ISOgrk3
{ u " omicron " _qs , u " ο " _qs } , // greek small letter omicron, U+03BF NEW
{ u " pi " _qs , u " π " _qs } , // greek small letter pi, U+03C0 ISOgrk3
{ u " rho " _qs , u " ρ " _qs } , // greek small letter rho, U+03C1 ISOgrk3
{ u " sigmaf " _qs , u " ς " _qs } , // greek small letter final sigma, U+03C2 ISOgrk3
{ u " sigma " _qs , u " σ " _qs } , // greek small letter sigma, U+03C3 ISOgrk3
{ u " tau " _qs , u " τ " _qs } , // greek small letter tau, U+03C4 ISOgrk3
{ u " upsilon " _qs , u " υ " _qs } , // greek small letter upsilon, U+03C5 ISOgrk3
{ u " phi " _qs , u " φ " _qs } , // greek small letter phi, U+03C6 ISOgrk3
{ u " chi " _qs , u " χ " _qs } , // greek small letter chi, U+03C7 ISOgrk3
{ u " psi " _qs , u " ψ " _qs } , // greek small letter psi, U+03C8 ISOgrk3
{ u " omega " _qs , u " ω " _qs } , // greek small letter omega, U+03C9 ISOgrk3
{ u " thetasym " _qs , u " ϑ " _qs } , // greek theta symbol, U+03D1 NEW
{ u " upsih " _qs , u " ϒ " _qs } , // greek upsilon with hook symbol, U+03D2 NEW
{ u " piv " _qs , u " ϖ " _qs } , // greek pi symbol, U+03D6 ISOgrk3
// General Punctuation
{ " bull " , " • " } , // bullet = black small circle, U+2022 ISOpub
{ u " bull " _qs , u " • " _qs } , // bullet = black small circle, U+2022 ISOpub
// bullet is NOT the same as bullet operator, U+2219
{ " hellip " , " … " } , // horizontal ellipsis = three dot leader, U+2026 ISOpub
{ " prime " , " ′ " } , // prime = minutes = feet, U+2032 ISOtech
{ " Prime " , " ″ " } , // double prime = seconds = inches, U+2033 ISOtech
{ " oline " , " ‾ " } , // overline = spacing overscore, U+203E NEW
{ " frasl " , " ⁄ " } , // fraction slash, U+2044 NEW
{ u " hellip " _qs , u " … " _qs } , // horizontal ellipsis = three dot leader, U+2026 ISOpub
{ u " prime " _qs , u " ′ " _qs } , // prime = minutes = feet, U+2032 ISOtech
{ u " Prime " _qs , u " ″ " _qs } , // double prime = seconds = inches, U+2033 ISOtech
{ u " oline " _qs , u " ‾ " _qs } , // overline = spacing overscore, U+203E NEW
{ u " frasl " _qs , u " ⁄ " _qs } , // fraction slash, U+2044 NEW
// Letterlike Symbols
{ " weierp " , " ℘ " } , // script capital P = power set = Weierstrass p, U+2118 ISOamso
{ " image " , " ℑ " } , // black-letter capital I = imaginary part, U+2111 ISOamso
{ " real " , " ℜ " } , // black-letter capital R = real part symbol, U+211C ISOamso
{ " trade " , " ™ " } , // trade mark sign, U+2122 ISOnum
{ " alefsym " , " ℵ " } , // alef symbol = first transfinite cardinal, U+2135 NEW
{ u " weierp " _qs , u " ℘ " _qs } , // script capital P = power set = Weierstrass p, U+2118 ISOamso
{ u " image " _qs , u " ℑ " _qs } , // black-letter capital I = imaginary part, U+2111 ISOamso
{ u " real " _qs , u " ℜ " _qs } , // black-letter capital R = real part symbol, U+211C ISOamso
{ u " trade " _qs , u " ™ " _qs } , // trade mark sign, U+2122 ISOnum
{ u " alefsym " _qs , u " ℵ " _qs } , // alef symbol = first transfinite cardinal, U+2135 NEW
// alef symbol is NOT the same as hebrew letter alef,
// U+05D0 although the same glyph could be used to depict both characters
// Arrows
{ " larr " , " ← " } , // leftwards arrow, U+2190 ISOnum
{ " uarr " , " ↑ " } , // upwards arrow, U+2191 ISOnum
{ " rarr " , " → " } , // rightwards arrow, U+2192 ISOnum
{ " darr " , " ↓ " } , // downwards arrow, U+2193 ISOnum
{ " harr " , " ↔ " } , // left right arrow, U+2194 ISOamsa
{ " crarr " , " ↵ " } , // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
{ " lArr " , " ⇐ " } , // leftwards double arrow, U+21D0 ISOtech
{ u " larr " _qs , u " ← " _qs } , // leftwards arrow, U+2190 ISOnum
{ u " uarr " _qs , u " ↑ " _qs } , // upwards arrow, U+2191 ISOnum
{ u " rarr " _qs , u " → " _qs } , // rightwards arrow, U+2192 ISOnum
{ u " darr " _qs , u " ↓ " _qs } , // downwards arrow, U+2193 ISOnum
{ u " harr " _qs , u " ↔ " _qs } , // left right arrow, U+2194 ISOamsa
{ u " crarr " _qs , u " ↵ " _qs } , // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
{ u " lArr " _qs , u " ⇐ " _qs } , // leftwards double arrow, U+21D0 ISOtech
// Unicode does not say that lArr is the same as the 'is implied by' arrow
// but also does not have any other character for that function. So lArr can
// be used for 'is implied by' as ISOtech suggests
{ " uArr " , " ⇑ " } , // upwards double arrow, U+21D1 ISOamsa
{ " rArr " , " ⇒ " } , // rightwards double arrow, U+21D2 ISOtech
{ u " uArr " _qs , u " ⇑ " _qs } , // upwards double arrow, U+21D1 ISOamsa
{ u " rArr " _qs , u " ⇒ " _qs } , // rightwards double arrow, U+21D2 ISOtech
// Unicode does not say this is the 'implies' character but does not have
// another character with this function so rArr can be used for 'implies'
// as ISOtech suggests
{ " dArr " , " ⇓ " } , // downwards double arrow, U+21D3 ISOamsa
{ " hArr " , " ⇔ " } , // left right double arrow, U+21D4 ISOamsa
{ u " dArr " _qs , u " ⇓ " _qs } , // downwards double arrow, U+21D3 ISOamsa
{ u " hArr " _qs , u " ⇔ " _qs } , // left right double arrow, U+21D4 ISOamsa
// Mathematical Operators
{ " forall " , " ∀ " } , // for all, U+2200 ISOtech
{ " part " , " ∂ " } , // partial differential, U+2202 ISOtech
{ " exist " , " ∃ " } , // there exists, U+2203 ISOtech
{ " empty " , " ∅ " } , // empty set = null set, U+2205 ISOamso
{ " nabla " , " ∇ " } , // nabla = backward difference, U+2207 ISOtech
{ " isin " , " ∈ " } , // element of, U+2208 ISOtech
{ " notin " , " ∉ " } , // not an element of, U+2209 ISOtech
{ " ni " , " ∋ " } , // contains as member, U+220B ISOtech
{ " prod " , " ∏ " } , // n-ary product = product sign, U+220F ISOamsb
{ u " forall " _qs , u " ∀ " _qs } , // for all, U+2200 ISOtech
{ u " part " _qs , u " ∂ " _qs } , // partial differential, U+2202 ISOtech
{ u " exist " _qs , u " ∃ " _qs } , // there exists, U+2203 ISOtech
{ u " empty " _qs , u " ∅ " _qs } , // empty set = null set, U+2205 ISOamso
{ u " nabla " _qs , u " ∇ " _qs } , // nabla = backward difference, U+2207 ISOtech
{ u " isin " _qs , u " ∈ " _qs } , // element of, U+2208 ISOtech
{ u " notin " _qs , u " ∉ " _qs } , // not an element of, U+2209 ISOtech
{ u " ni " _qs , u " ∋ " _qs } , // contains as member, U+220B ISOtech
{ u " prod " _qs , u " ∏ " _qs } , // n-ary product = product sign, U+220F ISOamsb
// prod is NOT the same character as U+03A0 'greek capital letter pi' though
// the same glyph might be used for both
{ " sum " , " ∑ " } , // n-ary summation, U+2211 ISOamsb
{ u " sum " _qs , u " ∑ " _qs } , // n-ary summation, U+2211 ISOamsb
// sum is NOT the same character as U+03A3 'greek capital letter sigma'
// though the same glyph might be used for both
{ " minus " , " − " } , // minus sign, U+2212 ISOtech
{ " lowast " , " ∗ " } , // asterisk operator, U+2217 ISOtech
{ " radic " , " √ " } , // square root = radical sign, U+221A ISOtech
{ " prop " , " ∝ " } , // proportional to, U+221D ISOtech
{ " infin " , " ∞ " } , // infinity, U+221E ISOtech
{ " ang " , " ∠ " } , // angle, U+2220 ISOamso
{ " and " , " ∧ " } , // logical and = wedge, U+2227 ISOtech
{ " or " , " ∨ " } , // logical or = vee, U+2228 ISOtech
{ " cap " , " ∩ " } , // intersection = cap, U+2229 ISOtech
{ " cup " , " ∪ " } , // union = cup, U+222A ISOtech
{ " int " , " ∫ " } , // integral, U+222B ISOtech
{ " there4 " , " ∴ " } , // therefore, U+2234 ISOtech
{ " sim " , " ∼ " } , // tilde operator = varies with = similar to, U+223C ISOtech
{ u " minus " _qs , u " − " _qs } , // minus sign, U+2212 ISOtech
{ u " lowast " _qs , u " ∗ " _qs } , // asterisk operator, U+2217 ISOtech
{ u " radic " _qs , u " √ " _qs } , // square root = radical sign, U+221A ISOtech
{ u " prop " _qs , u " ∝ " _qs } , // proportional to, U+221D ISOtech
{ u " infin " _qs , u " ∞ " _qs } , // infinity, U+221E ISOtech
{ u " ang " _qs , u " ∠ " _qs } , // angle, U+2220 ISOamso
{ u " and " _qs , u " ∧ " _qs } , // logical and = wedge, U+2227 ISOtech
{ u " or " _qs , u " ∨ " _qs } , // logical or = vee, U+2228 ISOtech
{ u " cap " _qs , u " ∩ " _qs } , // intersection = cap, U+2229 ISOtech
{ u " cup " _qs , u " ∪ " _qs } , // union = cup, U+222A ISOtech
{ u " int " _qs , u " ∫ " _qs } , // integral, U+222B ISOtech
{ u " there4 " _qs , u " ∴ " _qs } , // therefore, U+2234 ISOtech
{ u " sim " _qs , u " ∼ " _qs } , // tilde operator = varies with = similar to, U+223C ISOtech
// tilde operator is NOT the same character as the tilde, U+007E,
// although the same glyph might be used to represent both
{ " cong " , " ≅ " } , // approximately equal to, U+2245 ISOtech
{ " asymp " , " ≈ " } , // almost equal to = asymptotic to, U+2248 ISOamsr
{ " ne " , " ≠ " } , // not equal to, U+2260 ISOtech
{ " equiv " , " ≡ " } , // identical to, U+2261 ISOtech
{ " le " , " ≤ " } , // less-than or equal to, U+2264 ISOtech
{ " ge " , " ≥ " } , // greater-than or equal to, U+2265 ISOtech
{ " sub " , " ⊂ " } , // subset of, U+2282 ISOtech
{ " sup " , " ⊃ " } , // superset of, U+2283 ISOtech
{ " nsub " , " ⊄ " } , // not a subset of, U+2284 ISOamsn
{ " sube " , " ⊆ " } , // subset of or equal to, U+2286 ISOtech
{ " supe " , " ⊇ " } , // superset of or equal to, U+2287 ISOtech
{ " oplus " , " ⊕ " } , // circled plus = direct sum, U+2295 ISOamsb
{ " otimes " , " ⊗ " } , // circled times = vector product, U+2297 ISOamsb
{ " perp " , " ⊥ " } , // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
{ " sdot " , " ⋅ " } , // dot operator, U+22C5 ISOamsb
{ u " cong " _qs , u " ≅ " _qs } , // approximately equal to, U+2245 ISOtech
{ u " asymp " _qs , u " ≈ " _qs } , // almost equal to = asymptotic to, U+2248 ISOamsr
{ u " ne " _qs , u " ≠ " _qs } , // not equal to, U+2260 ISOtech
{ u " equiv " _qs , u " ≡ " _qs } , // identical to, U+2261 ISOtech
{ u " le " _qs , u " ≤ " _qs } , // less-than or equal to, U+2264 ISOtech
{ u " ge " _qs , u " ≥ " _qs } , // greater-than or equal to, U+2265 ISOtech
{ u " sub " _qs , u " ⊂ " _qs } , // subset of, U+2282 ISOtech
{ u " sup " _qs , u " ⊃ " _qs } , // superset of, U+2283 ISOtech
{ u " nsub " _qs , u " ⊄ " _qs } , // not a subset of, U+2284 ISOamsn
{ u " sube " _qs , u " ⊆ " _qs } , // subset of or equal to, U+2286 ISOtech
{ u " supe " _qs , u " ⊇ " _qs } , // superset of or equal to, U+2287 ISOtech
{ u " oplus " _qs , u " ⊕ " _qs } , // circled plus = direct sum, U+2295 ISOamsb
{ u " otimes " _qs , u " ⊗ " _qs } , // circled times = vector product, U+2297 ISOamsb
{ u " perp " _qs , u " ⊥ " _qs } , // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
{ u " sdot " _qs , u " ⋅ " _qs } , // dot operator, U+22C5 ISOamsb
// dot operator is NOT the same character as U+00B7 middle dot
// Miscellaneous Technical
{ " lceil " , " ⌈ " } , // left ceiling = APL upstile, U+2308 ISOamsc
{ " rceil " , " ⌉ " } , // right ceiling, U+2309 ISOamsc
{ " lfloor " , " ⌊ " } , // left floor = APL downstile, U+230A ISOamsc
{ " rfloor " , " ⌋ " } , // right floor, U+230B ISOamsc
{ " lang " , " 〈 " } , // left-pointing angle bracket = bra, U+2329 ISOtech
{ u " lceil " _qs , u " ⌈ " _qs } , // left ceiling = APL upstile, U+2308 ISOamsc
{ u " rceil " _qs , u " ⌉ " _qs } , // right ceiling, U+2309 ISOamsc
{ u " lfloor " _qs , u " ⌊ " _qs } , // left floor = APL downstile, U+230A ISOamsc
{ u " rfloor " _qs , u " ⌋ " _qs } , // right floor, U+230B ISOamsc
{ u " lang " _qs , u " 〈 " _qs } , // left-pointing angle bracket = bra, U+2329 ISOtech
// lang is NOT the same character as U+003C 'less than sign'
// or U+2039 'single left-pointing angle quotation mark'
{ " rang " , " 〉 " } , // right-pointing angle bracket = ket, U+232A ISOtech
{ u " rang " _qs , u " 〉 " _qs } , // right-pointing angle bracket = ket, U+232A ISOtech
// rang is NOT the same character as U+003E 'greater than sign'
// or U+203A 'single right-pointing angle quotation mark'
// Geometric Shapes
{ " loz " , " ◊ " } , // lozenge, U+25CA ISOpub
{ u " loz " _qs , u " ◊ " _qs } , // lozenge, U+25CA ISOpub
// Miscellaneous Symbols
{ " spades " , " ♠ " } , // black spade suit, U+2660 ISOpub
{ " clubs " , " ♣ " } , // black club suit = shamrock, U+2663 ISOpub
{ " hearts " , " ♥ " } , // black heart suit = valentine, U+2665 ISOpub
{ " diams " , " ♦ " } // black diamond suit, U+2666 ISOpub
{ u " spades " _qs , u " ♠ " _qs } , // black spade suit, U+2660 ISOpub
{ u " clubs " _qs , u " ♣ " _qs } , // black club suit = shamrock, U+2663 ISOpub
{ u " hearts " _qs , u " ♥ " _qs } , // black heart suit = valentine, U+2665 ISOpub
{ u " diams " _qs , u " ♦ " _qs } // black diamond suit, U+2666 ISOpub
} ;
return HTMLEntities . value ( name ) ;
}
@ -360,23 +361,23 @@ namespace
@@ -360,23 +361,23 @@ namespace
// Ported to Qt from KDElibs4
QDateTime parseDate ( const QString & string )
{
const char shortDay [ ] [ 4 ] =
const char16_t shortDay [ ] [ 4 ] =
{
" Mon " , " Tue " , " Wed " ,
" Thu " , " Fri " , " Sat " ,
" Sun "
u " Mon " , u " Tue " , u " Wed " ,
u " Thu " , u " Fri " , u " Sat " ,
u " Sun "
} ;
const char longDay [ ] [ 10 ] =
const char16_t longDay [ ] [ 10 ] =
{
" Monday " , " Tuesday " , " Wednesday " ,
" Thursday " , " Friday " , " Saturday " ,
" Sunday "
u " Monday " , u " Tuesday " , u " Wednesday " ,
u " Thursday " , u " Friday " , u " Saturday " ,
u " Sunday "
} ;
const char shortMonth [ ] [ 4 ] =
const char16_t shortMonth [ ] [ 4 ] =
{
" Jan " , " Feb " , " Mar " , " Apr " ,
" May " , " Jun " , " Jul " , " Aug " ,
" Sep " , " Oct " , " Nov " , " Dec "
u " Jan " , u " Feb " , u " Mar " , u " Apr " ,
u " May " , u " Jun " , u " Jul " , u " Aug " ,
u " Sep " , u " Oct " , u " Nov " , u " Dec "
} ;
const QString str = string . trimmed ( ) ;
@ -391,7 +392,7 @@ namespace
@@ -391,7 +392,7 @@ namespace
int nmin = 8 ;
int nsec = 9 ;
// Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm"
QRegularExpression rx { " ^(?:([A-Z][a-z]+), \\ s*)?( \\ d{1,2})( \\ s+|-)([^- \\ s]+)( \\ s+|-)( \\ d{2,4}) \\ s+( \\ d \\ d):( \\ d \\ d)(?::( \\ d \\ d))? \\ s+( \\ S+)$ " } ;
QRegularExpression rx { u " ^(?:([A-Z][a-z]+), \\ s*)?( \\ d{1,2})( \\ s+|-)([^- \\ s]+)( \\ s+|-)( \\ d{2,4}) \\ s+( \\ d \\ d):( \\ d \\ d)(?::( \\ d \\ d))? \\ s+( \\ S+)$ " _qs } ;
QRegularExpressionMatch rxMatch ;
QStringList parts ;
if ( str . indexOf ( rx , 0 , & rxMatch ) = = 0 )
@ -406,7 +407,7 @@ namespace
@@ -406,7 +407,7 @@ namespace
else
{
// Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY"
rx = QRegularExpression { " ^([A-Z][a-z]+) \\ s+( \\ S+) \\ s+( \\ d \\ d) \\ s+( \\ d \\ d):( \\ d \\ d):( \\ d \\ d) \\ s+( \\ d \\ d \\ d \\ d)$ " } ;
rx = QRegularExpression { u " ^([A-Z][a-z]+) \\ s+( \\ S+) \\ s+( \\ d \\ d) \\ s+( \\ d \\ d):( \\ d \\ d):( \\ d \\ d) \\ s+( \\ d \\ d \\ d \\ d)$ " _qs } ;
if ( str . indexOf ( rx , 0 , & rxMatch ) ! = 0 )
return QDateTime : : currentDateTime ( ) ;
@ -465,7 +466,7 @@ namespace
@@ -465,7 +466,7 @@ namespace
bool negOffset = false ;
if ( parts . count ( ) > 10 )
{
rx = QRegularExpression { " ^([+-])( \\ d \\ d)( \\ d \\ d)$ " } ;
rx = QRegularExpression { u " ^([+-])( \\ d \\ d)( \\ d \\ d)$ " _qs } ;
if ( parts [ 10 ] . indexOf ( rx , 0 , & rxMatch ) = = 0 )
{
// It's a UTC offset ±hhmm
@ -628,9 +629,9 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
@@ -628,9 +629,9 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
}
else if ( name = = QLatin1String ( " enclosure " ) )
{
if ( xml . attributes ( ) . value ( " type " ) = = QLatin1String ( " application/x-bittorrent " ) )
if ( xml . attributes ( ) . value ( u " type " _qs ) = = QLatin1String ( " application/x-bittorrent " ) )
article [ Article : : KeyTorrentURL ] = xml . attributes ( ) . value ( QLatin1String ( " url " ) ) . toString ( ) ;
else if ( xml . attributes ( ) . value ( " type " ) . isEmpty ( ) )
else if ( xml . attributes ( ) . value ( u " type " _qs ) . isEmpty ( ) )
altTorrentUrl = xml . attributes ( ) . value ( QLatin1String ( " url " ) ) . toString ( ) ;
}
else if ( name = = QLatin1String ( " link " ) )
@ -786,7 +787,7 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
@@ -786,7 +787,7 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
void Parser : : parseAtomChannel ( QXmlStreamReader & xml )
{
m_baseUrl = xml . attributes ( ) . value ( " xml:base " ) . toString ( ) ;
m_baseUrl = xml . attributes ( ) . value ( u " xml:base " _qs ) . toString ( ) ;
while ( ! xml . atEnd ( ) )
{