|
|
@ -25,24 +25,339 @@ |
|
|
|
* modify file(s), you may extend this exception to your version of the file(s), |
|
|
|
* modify file(s), you may extend this exception to your version of the file(s), |
|
|
|
* but you are not obligated to do so. If you do not wish to do so, delete this |
|
|
|
* but you are not obligated to do so. If you do not wish to do so, delete this |
|
|
|
* exception statement from your version. |
|
|
|
* exception statement from your version. |
|
|
|
* |
|
|
|
|
|
|
|
* Contact : chris@qbittorrent.org |
|
|
|
|
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
#include "rss_parser.h" |
|
|
|
#include "rss_parser.h" |
|
|
|
|
|
|
|
|
|
|
|
#include <QDebug> |
|
|
|
#include <QDebug> |
|
|
|
#include <QDateTime> |
|
|
|
#include <QDateTime> |
|
|
|
|
|
|
|
#include <QGlobalStatic> |
|
|
|
|
|
|
|
#include <QHash> |
|
|
|
#include <QMetaObject> |
|
|
|
#include <QMetaObject> |
|
|
|
#include <QRegExp> |
|
|
|
#include <QRegExp> |
|
|
|
#include <QStringList> |
|
|
|
#include <QStringList> |
|
|
|
#include <QVariant> |
|
|
|
#include <QVariant> |
|
|
|
|
|
|
|
#include <QXmlStreamEntityResolver> |
|
|
|
#include <QXmlStreamReader> |
|
|
|
#include <QXmlStreamReader> |
|
|
|
|
|
|
|
|
|
|
|
#include "../rss_article.h" |
|
|
|
#include "../rss_article.h" |
|
|
|
|
|
|
|
|
|
|
|
namespace |
|
|
|
namespace |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
// (X)HTML entities declared in:
|
|
|
|
|
|
|
|
// http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
|
|
|
|
|
|
|
|
// http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
|
|
|
|
|
|
|
|
// http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
|
|
|
|
|
|
|
|
using StringHash = QHash<QString, QString>; |
|
|
|
|
|
|
|
Q_GLOBAL_STATIC_WITH_ARGS(StringHash, HTML_ENTITIES, ({ |
|
|
|
|
|
|
|
{"nbsp", " "}, // no-break space = non-breaking space, U+00A0 ISOnum
|
|
|
|
|
|
|
|
{"iexcl", "¡"}, // inverted exclamation mark, U+00A1 ISOnum
|
|
|
|
|
|
|
|
{"cent", "¢"}, // cent sign, U+00A2 ISOnum
|
|
|
|
|
|
|
|
{"pound", "£"}, // pound sign, U+00A3 ISOnum
|
|
|
|
|
|
|
|
{"curren", "¤"}, // currency sign, U+00A4 ISOnum
|
|
|
|
|
|
|
|
{"yen", "¥"}, // yen sign = yuan sign, U+00A5 ISOnum
|
|
|
|
|
|
|
|
{"brvbar", "¦"}, // broken bar = broken vertical bar, U+00A6 ISOnum
|
|
|
|
|
|
|
|
{"sect", "§"}, // section sign, U+00A7 ISOnum
|
|
|
|
|
|
|
|
{"uml", "¨"}, // diaeresis = spacing diaeresis, U+00A8 ISOdia
|
|
|
|
|
|
|
|
{"copy", "©"}, // copyright sign, U+00A9 ISOnum
|
|
|
|
|
|
|
|
{"ordf", "ª"}, // feminine ordinal indicator, U+00AA ISOnum
|
|
|
|
|
|
|
|
{"laquo", "«"}, // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
|
|
|
|
|
|
|
|
{"not", "¬"}, // not sign = angled dash, U+00AC ISOnum
|
|
|
|
|
|
|
|
{"shy", "­"}, // soft hyphen = discretionary hyphen, U+00AD ISOnum
|
|
|
|
|
|
|
|
{"reg", "®"}, // registered sign = registered trade mark sign, U+00AE ISOnum
|
|
|
|
|
|
|
|
{"macr", "¯"}, // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
|
|
|
|
|
|
|
|
{"deg", "°"}, // degree sign, U+00B0 ISOnum
|
|
|
|
|
|
|
|
{"plusmn", "±"}, // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
|
|
|
|
|
|
|
|
{"sup2", "²"}, // superscript two = superscript digit two = squared, U+00B2 ISOnum
|
|
|
|
|
|
|
|
{"sup3", "³"}, // superscript three = superscript digit three = cubed, U+00B3 ISOnum
|
|
|
|
|
|
|
|
{"acute", "´"}, // acute accent = spacing acute, U+00B4 ISOdia
|
|
|
|
|
|
|
|
{"micro", "µ"}, // micro sign, U+00B5 ISOnum
|
|
|
|
|
|
|
|
{"para", "¶"}, // pilcrow sign = paragraph sign, U+00B6 ISOnum
|
|
|
|
|
|
|
|
{"middot", "·"}, // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
|
|
|
|
|
|
|
|
{"cedil", "¸"}, // cedilla = spacing cedilla, U+00B8 ISOdia
|
|
|
|
|
|
|
|
{"sup1", "¹"}, // superscript one = superscript digit one, U+00B9 ISOnum
|
|
|
|
|
|
|
|
{"ordm", "º"}, // masculine ordinal indicator, U+00BA ISOnum
|
|
|
|
|
|
|
|
{"raquo", "»"}, // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
|
|
|
|
|
|
|
|
{"frac14", "¼"}, // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
|
|
|
|
|
|
|
|
{"frac12", "½"}, // vulgar fraction one half = fraction one half, U+00BD ISOnum
|
|
|
|
|
|
|
|
{"frac34", "¾"}, // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
|
|
|
|
|
|
|
|
{"iquest", "¿"}, // inverted question mark = turned question mark, U+00BF ISOnum
|
|
|
|
|
|
|
|
{"Agrave", "À"}, // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
|
|
|
|
|
|
|
|
{"Aacute", "Á"}, // latin capital letter A with acute, U+00C1 ISOlat1
|
|
|
|
|
|
|
|
{"Acirc", "Â"}, // latin capital letter A with circumflex, U+00C2 ISOlat1
|
|
|
|
|
|
|
|
{"Atilde", "Ã"}, // latin capital letter A with tilde, U+00C3 ISOlat1
|
|
|
|
|
|
|
|
{"Auml", "Ä"}, // latin capital letter A with diaeresis, U+00C4 ISOlat1
|
|
|
|
|
|
|
|
{"Aring", "Å"}, // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
|
|
|
|
|
|
|
|
{"AElig", "Æ"}, // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
|
|
|
|
|
|
|
|
{"Ccedil", "Ç"}, // latin capital letter C with cedilla, U+00C7 ISOlat1
|
|
|
|
|
|
|
|
{"Egrave", "È"}, // latin capital letter E with grave, U+00C8 ISOlat1
|
|
|
|
|
|
|
|
{"Eacute", "É"}, // latin capital letter E with acute, U+00C9 ISOlat1
|
|
|
|
|
|
|
|
{"Ecirc", "Ê"}, // latin capital letter E with circumflex, U+00CA ISOlat1
|
|
|
|
|
|
|
|
{"Euml", "Ë"}, // latin capital letter E with diaeresis, U+00CB ISOlat1
|
|
|
|
|
|
|
|
{"Igrave", "Ì"}, // latin capital letter I with grave, U+00CC ISOlat1
|
|
|
|
|
|
|
|
{"Iacute", "Í"}, // latin capital letter I with acute, U+00CD ISOlat1
|
|
|
|
|
|
|
|
{"Icirc", "Î"}, // latin capital letter I with circumflex, U+00CE ISOlat1
|
|
|
|
|
|
|
|
{"Iuml", "Ï"}, // latin capital letter I with diaeresis, U+00CF ISOlat1
|
|
|
|
|
|
|
|
{"ETH", "Ð"}, // latin capital letter ETH, U+00D0 ISOlat1
|
|
|
|
|
|
|
|
{"Ntilde", "Ñ"}, // latin capital letter N with tilde, U+00D1 ISOlat1
|
|
|
|
|
|
|
|
{"Ograve", "Ò"}, // latin capital letter O with grave, U+00D2 ISOlat1
|
|
|
|
|
|
|
|
{"Oacute", "Ó"}, // latin capital letter O with acute, U+00D3 ISOlat1
|
|
|
|
|
|
|
|
{"Ocirc", "Ô"}, // latin capital letter O with circumflex, U+00D4 ISOlat1
|
|
|
|
|
|
|
|
{"Otilde", "Õ"}, // latin capital letter O with tilde, U+00D5 ISOlat1
|
|
|
|
|
|
|
|
{"Ouml", "Ö"}, // latin capital letter O with diaeresis, U+00D6 ISOlat1
|
|
|
|
|
|
|
|
{"times", "×"}, // multiplication sign, U+00D7 ISOnum
|
|
|
|
|
|
|
|
{"Oslash", "Ø"}, // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
|
|
|
|
|
|
|
|
{"Ugrave", "Ù"}, // latin capital letter U with grave, U+00D9 ISOlat1
|
|
|
|
|
|
|
|
{"Uacute", "Ú"}, // latin capital letter U with acute, U+00DA ISOlat1
|
|
|
|
|
|
|
|
{"Ucirc", "Û"}, // latin capital letter U with circumflex, U+00DB ISOlat1
|
|
|
|
|
|
|
|
{"Uuml", "Ü"}, // latin capital letter U with diaeresis, U+00DC ISOlat1
|
|
|
|
|
|
|
|
{"Yacute", "Ý"}, // latin capital letter Y with acute, U+00DD ISOlat1
|
|
|
|
|
|
|
|
{"THORN", "Þ"}, // latin capital letter THORN, U+00DE ISOlat1
|
|
|
|
|
|
|
|
{"szlig", "ß"}, // latin small letter sharp s = ess-zed, U+00DF ISOlat1
|
|
|
|
|
|
|
|
{"agrave", "à"}, // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
|
|
|
|
|
|
|
|
{"aacute", "á"}, // latin small letter a with acute, U+00E1 ISOlat1
|
|
|
|
|
|
|
|
{"acirc", "â"}, // latin small letter a with circumflex, U+00E2 ISOlat1
|
|
|
|
|
|
|
|
{"atilde", "ã"}, // latin small letter a with tilde, U+00E3 ISOlat1
|
|
|
|
|
|
|
|
{"auml", "ä"}, // latin small letter a with diaeresis, U+00E4 ISOlat1
|
|
|
|
|
|
|
|
{"aring", "å"}, // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
|
|
|
|
|
|
|
|
{"aelig", "æ"}, // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
|
|
|
|
|
|
|
|
{"ccedil", "ç"}, // latin small letter c with cedilla, U+00E7 ISOlat1
|
|
|
|
|
|
|
|
{"egrave", "è"}, // latin small letter e with grave, U+00E8 ISOlat1
|
|
|
|
|
|
|
|
{"eacute", "é"}, // latin small letter e with acute, U+00E9 ISOlat1
|
|
|
|
|
|
|
|
{"ecirc", "ê"}, // latin small letter e with circumflex, U+00EA ISOlat1
|
|
|
|
|
|
|
|
{"euml", "ë"}, // latin small letter e with diaeresis, U+00EB ISOlat1
|
|
|
|
|
|
|
|
{"igrave", "ì"}, // latin small letter i with grave, U+00EC ISOlat1
|
|
|
|
|
|
|
|
{"iacute", "í"}, // latin small letter i with acute, U+00ED ISOlat1
|
|
|
|
|
|
|
|
{"icirc", "î"}, // latin small letter i with circumflex, U+00EE ISOlat1
|
|
|
|
|
|
|
|
{"iuml", "ï"}, // latin small letter i with diaeresis, U+00EF ISOlat1
|
|
|
|
|
|
|
|
{"eth", "ð"}, // latin small letter eth, U+00F0 ISOlat1
|
|
|
|
|
|
|
|
{"ntilde", "ñ"}, // latin small letter n with tilde, U+00F1 ISOlat1
|
|
|
|
|
|
|
|
{"ograve", "ò"}, // latin small letter o with grave, U+00F2 ISOlat1
|
|
|
|
|
|
|
|
{"oacute", "ó"}, // latin small letter o with acute, U+00F3 ISOlat1
|
|
|
|
|
|
|
|
{"ocirc", "ô"}, // latin small letter o with circumflex, U+00F4 ISOlat1
|
|
|
|
|
|
|
|
{"otilde", "õ"}, // latin small letter o with tilde, U+00F5 ISOlat1
|
|
|
|
|
|
|
|
{"ouml", "ö"}, // latin small letter o with diaeresis, U+00F6 ISOlat1
|
|
|
|
|
|
|
|
{"divide", "÷"}, // division sign, U+00F7 ISOnum
|
|
|
|
|
|
|
|
{"oslash", "ø"}, // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
|
|
|
|
|
|
|
|
{"ugrave", "ù"}, // latin small letter u with grave, U+00F9 ISOlat1
|
|
|
|
|
|
|
|
{"uacute", "ú"}, // latin small letter u with acute, U+00FA ISOlat1
|
|
|
|
|
|
|
|
{"ucirc", "û"}, // latin small letter u with circumflex, U+00FB ISOlat1
|
|
|
|
|
|
|
|
{"uuml", "ü"}, // latin small letter u with diaeresis, U+00FC ISOlat1
|
|
|
|
|
|
|
|
{"yacute", "ý"}, // latin small letter y with acute, U+00FD ISOlat1
|
|
|
|
|
|
|
|
{"thorn", "þ"}, // latin small letter thorn, U+00FE ISOlat1
|
|
|
|
|
|
|
|
{"yuml", "ÿ"}, // latin small letter y with diaeresis, U+00FF ISOlat1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Latin Extended-A
|
|
|
|
|
|
|
|
{"OElig", "Œ"}, // latin capital ligature OE, U+0152 ISOlat2
|
|
|
|
|
|
|
|
{"oelig", "œ"}, // latin small ligature oe, U+0153 ISOlat2
|
|
|
|
|
|
|
|
// ligature is a misnomer, this is a separate character in some languages
|
|
|
|
|
|
|
|
{"Scaron", "Š"}, // latin capital letter S with caron, U+0160 ISOlat2
|
|
|
|
|
|
|
|
{"scaron", "š"}, // latin small letter s with caron, U+0161 ISOlat2
|
|
|
|
|
|
|
|
{"Yuml", "Ÿ"}, // latin capital letter Y with diaeresis, U+0178 ISOlat2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Spacing Modifier Letters
|
|
|
|
|
|
|
|
{"circ", "ˆ"}, // modifier letter circumflex accent, U+02C6 ISOpub
|
|
|
|
|
|
|
|
{"tilde", "˜"}, // small tilde, U+02DC ISOdia
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// General Punctuation
|
|
|
|
|
|
|
|
{"ensp", " "}, // en space, U+2002 ISOpub
|
|
|
|
|
|
|
|
{"emsp", " "}, // em space, U+2003 ISOpub
|
|
|
|
|
|
|
|
{"thinsp", " "}, // thin space, U+2009 ISOpub
|
|
|
|
|
|
|
|
{"zwnj", "‌"}, // zero width non-joiner, U+200C NEW RFC 2070
|
|
|
|
|
|
|
|
{"zwj", "‍"}, // zero width joiner, U+200D NEW RFC 2070
|
|
|
|
|
|
|
|
{"lrm", "‎"}, // left-to-right mark, U+200E NEW RFC 2070
|
|
|
|
|
|
|
|
{"rlm", "‏"}, // right-to-left mark, U+200F NEW RFC 2070
|
|
|
|
|
|
|
|
{"ndash", "–"}, // en dash, U+2013 ISOpub
|
|
|
|
|
|
|
|
{"mdash", "—"}, // em dash, U+2014 ISOpub
|
|
|
|
|
|
|
|
{"lsquo", "‘"}, // left single quotation mark, U+2018 ISOnum
|
|
|
|
|
|
|
|
{"rsquo", "’"}, // right single quotation mark, U+2019 ISOnum
|
|
|
|
|
|
|
|
{"sbquo", "‚"}, // single low-9 quotation mark, U+201A NEW
|
|
|
|
|
|
|
|
{"ldquo", "“"}, // left double quotation mark, U+201C ISOnum
|
|
|
|
|
|
|
|
{"rdquo", "”"}, // right double quotation mark, U+201D ISOnum
|
|
|
|
|
|
|
|
{"bdquo", "„"}, // double low-9 quotation mark, U+201E NEW
|
|
|
|
|
|
|
|
{"dagger", "†"}, // dagger, U+2020 ISOpub
|
|
|
|
|
|
|
|
{"Dagger", "‡"}, // double dagger, U+2021 ISOpub
|
|
|
|
|
|
|
|
{"permil", "‰"}, // per mille sign, U+2030 ISOtech
|
|
|
|
|
|
|
|
{"lsaquo", "‹"}, // single left-pointing angle quotation mark, U+2039 ISO proposed
|
|
|
|
|
|
|
|
// lsaquo is proposed but not yet ISO standardized
|
|
|
|
|
|
|
|
{"rsaquo", "›"}, // single right-pointing angle quotation mark, U+203A ISO proposed
|
|
|
|
|
|
|
|
// rsaquo is proposed but not yet ISO standardized
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Currency Symbols
|
|
|
|
|
|
|
|
{"euro", "€"}, // euro sign, U+20AC NEW
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Latin Extended-B
|
|
|
|
|
|
|
|
{"fnof", "ƒ"}, // latin small letter f with hook = function = florin, U+0192 ISOtech
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Greek
|
|
|
|
|
|
|
|
{"Alpha", "Α"}, // greek capital letter alpha, U+0391
|
|
|
|
|
|
|
|
{"Beta", "Β"}, // greek capital letter beta, U+0392
|
|
|
|
|
|
|
|
{"Gamma", "Γ"}, // greek capital letter gamma, U+0393 ISOgrk3
|
|
|
|
|
|
|
|
{"Delta", "Δ"}, // greek capital letter delta, U+0394 ISOgrk3
|
|
|
|
|
|
|
|
{"Epsilon", "Ε"}, // greek capital letter epsilon, U+0395
|
|
|
|
|
|
|
|
{"Zeta", "Ζ"}, // greek capital letter zeta, U+0396
|
|
|
|
|
|
|
|
{"Eta", "Η"}, // greek capital letter eta, U+0397
|
|
|
|
|
|
|
|
{"Theta", "Θ"}, // greek capital letter theta, U+0398 ISOgrk3
|
|
|
|
|
|
|
|
{"Iota", "Ι"}, // greek capital letter iota, U+0399
|
|
|
|
|
|
|
|
{"Kappa", "Κ"}, // greek capital letter kappa, U+039A
|
|
|
|
|
|
|
|
{"Lambda", "Λ"}, // greek capital letter lamda, U+039B ISOgrk3
|
|
|
|
|
|
|
|
{"Mu", "Μ"}, // greek capital letter mu, U+039C
|
|
|
|
|
|
|
|
{"Nu", "Ν"}, // greek capital letter nu, U+039D
|
|
|
|
|
|
|
|
{"Xi", "Ξ"}, // greek capital letter xi, U+039E ISOgrk3
|
|
|
|
|
|
|
|
{"Omicron", "Ο"}, // greek capital letter omicron, U+039F
|
|
|
|
|
|
|
|
{"Pi", "Π"}, // greek capital letter pi, U+03A0 ISOgrk3
|
|
|
|
|
|
|
|
{"Rho", "Ρ"}, // greek capital letter rho, U+03A1
|
|
|
|
|
|
|
|
{"Sigma", "Σ"}, // greek capital letter sigma, U+03A3 ISOgrk3
|
|
|
|
|
|
|
|
{"Tau", "Τ"}, // greek capital letter tau, U+03A4
|
|
|
|
|
|
|
|
{"Upsilon", "Υ"}, // greek capital letter upsilon, U+03A5 ISOgrk3
|
|
|
|
|
|
|
|
{"Phi", "Φ"}, // greek capital letter phi, U+03A6 ISOgrk3
|
|
|
|
|
|
|
|
{"Chi", "Χ"}, // greek capital letter chi, U+03A7
|
|
|
|
|
|
|
|
{"Psi", "Ψ"}, // greek capital letter psi, U+03A8 ISOgrk3
|
|
|
|
|
|
|
|
{"Omega", "Ω"}, // greek capital letter omega, U+03A9 ISOgrk3
|
|
|
|
|
|
|
|
{"alpha", "α"}, // greek small letter alpha, U+03B1 ISOgrk3
|
|
|
|
|
|
|
|
{"beta", "β"}, // greek small letter beta, U+03B2 ISOgrk3
|
|
|
|
|
|
|
|
{"gamma", "γ"}, // greek small letter gamma, U+03B3 ISOgrk3
|
|
|
|
|
|
|
|
{"delta", "δ"}, // greek small letter delta, U+03B4 ISOgrk3
|
|
|
|
|
|
|
|
{"epsilon", "ε"}, // greek small letter epsilon, U+03B5 ISOgrk3
|
|
|
|
|
|
|
|
{"zeta", "ζ"}, // greek small letter zeta, U+03B6 ISOgrk3
|
|
|
|
|
|
|
|
{"eta", "η"}, // greek small letter eta, U+03B7 ISOgrk3
|
|
|
|
|
|
|
|
{"theta", "θ"}, // greek small letter theta, U+03B8 ISOgrk3
|
|
|
|
|
|
|
|
{"iota", "ι"}, // greek small letter iota, U+03B9 ISOgrk3
|
|
|
|
|
|
|
|
{"kappa", "κ"}, // greek small letter kappa, U+03BA ISOgrk3
|
|
|
|
|
|
|
|
{"lambda", "λ"}, // greek small letter lamda, U+03BB ISOgrk3
|
|
|
|
|
|
|
|
{"mu", "μ"}, // greek small letter mu, U+03BC ISOgrk3
|
|
|
|
|
|
|
|
{"nu", "ν"}, // greek small letter nu, U+03BD ISOgrk3
|
|
|
|
|
|
|
|
{"xi", "ξ"}, // greek small letter xi, U+03BE ISOgrk3
|
|
|
|
|
|
|
|
{"omicron", "ο"}, // greek small letter omicron, U+03BF NEW
|
|
|
|
|
|
|
|
{"pi", "π"}, // greek small letter pi, U+03C0 ISOgrk3
|
|
|
|
|
|
|
|
{"rho", "ρ"}, // greek small letter rho, U+03C1 ISOgrk3
|
|
|
|
|
|
|
|
{"sigmaf", "ς"}, // greek small letter final sigma, U+03C2 ISOgrk3
|
|
|
|
|
|
|
|
{"sigma", "σ"}, // greek small letter sigma, U+03C3 ISOgrk3
|
|
|
|
|
|
|
|
{"tau", "τ"}, // greek small letter tau, U+03C4 ISOgrk3
|
|
|
|
|
|
|
|
{"upsilon", "υ"}, // greek small letter upsilon, U+03C5 ISOgrk3
|
|
|
|
|
|
|
|
{"phi", "φ"}, // greek small letter phi, U+03C6 ISOgrk3
|
|
|
|
|
|
|
|
{"chi", "χ"}, // greek small letter chi, U+03C7 ISOgrk3
|
|
|
|
|
|
|
|
{"psi", "ψ"}, // greek small letter psi, U+03C8 ISOgrk3
|
|
|
|
|
|
|
|
{"omega", "ω"}, // greek small letter omega, U+03C9 ISOgrk3
|
|
|
|
|
|
|
|
{"thetasym", "ϑ"}, // greek theta symbol, U+03D1 NEW
|
|
|
|
|
|
|
|
{"upsih", "ϒ"}, // greek upsilon with hook symbol, U+03D2 NEW
|
|
|
|
|
|
|
|
{"piv", "ϖ"}, // greek pi symbol, U+03D6 ISOgrk3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// General Punctuation
|
|
|
|
|
|
|
|
{"bull", "•"}, // bullet = black small circle, U+2022 ISOpub
|
|
|
|
|
|
|
|
// bullet is NOT the same as bullet operator, U+2219
|
|
|
|
|
|
|
|
{"hellip", "…"}, // horizontal ellipsis = three dot leader, U+2026 ISOpub
|
|
|
|
|
|
|
|
{"prime", "′"}, // prime = minutes = feet, U+2032 ISOtech
|
|
|
|
|
|
|
|
{"Prime", "″"}, // double prime = seconds = inches, U+2033 ISOtech
|
|
|
|
|
|
|
|
{"oline", "‾"}, // overline = spacing overscore, U+203E NEW
|
|
|
|
|
|
|
|
{"frasl", "⁄"}, // fraction slash, U+2044 NEW
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Letterlike Symbols
|
|
|
|
|
|
|
|
{"weierp", "℘"}, // script capital P = power set = Weierstrass p, U+2118 ISOamso
|
|
|
|
|
|
|
|
{"image", "ℑ"}, // black-letter capital I = imaginary part, U+2111 ISOamso
|
|
|
|
|
|
|
|
{"real", "ℜ"}, // black-letter capital R = real part symbol, U+211C ISOamso
|
|
|
|
|
|
|
|
{"trade", "™"}, // trade mark sign, U+2122 ISOnum
|
|
|
|
|
|
|
|
{"alefsym", "ℵ"}, // alef symbol = first transfinite cardinal, U+2135 NEW
|
|
|
|
|
|
|
|
// alef symbol is NOT the same as hebrew letter alef,
|
|
|
|
|
|
|
|
// U+05D0 although the same glyph could be used to depict both characters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Arrows
|
|
|
|
|
|
|
|
{"larr", "←"}, // leftwards arrow, U+2190 ISOnum
|
|
|
|
|
|
|
|
{"uarr", "↑"}, // upwards arrow, U+2191 ISOnum
|
|
|
|
|
|
|
|
{"rarr", "→"}, // rightwards arrow, U+2192 ISOnum
|
|
|
|
|
|
|
|
{"darr", "↓"}, // downwards arrow, U+2193 ISOnum
|
|
|
|
|
|
|
|
{"harr", "↔"}, // left right arrow, U+2194 ISOamsa
|
|
|
|
|
|
|
|
{"crarr", "↵"}, // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
|
|
|
|
|
|
|
|
{"lArr", "⇐"}, // leftwards double arrow, U+21D0 ISOtech
|
|
|
|
|
|
|
|
// Unicode does not say that lArr is the same as the 'is implied by' arrow
|
|
|
|
|
|
|
|
// but also does not have any other character for that function. So lArr can
|
|
|
|
|
|
|
|
// be used for 'is implied by' as ISOtech suggests
|
|
|
|
|
|
|
|
{"uArr", "⇑"}, // upwards double arrow, U+21D1 ISOamsa
|
|
|
|
|
|
|
|
{"rArr", "⇒"}, // rightwards double arrow, U+21D2 ISOtech
|
|
|
|
|
|
|
|
// Unicode does not say this is the 'implies' character but does not have
|
|
|
|
|
|
|
|
// another character with this function so rArr can be used for 'implies'
|
|
|
|
|
|
|
|
// as ISOtech suggests
|
|
|
|
|
|
|
|
{"dArr", "⇓"}, // downwards double arrow, U+21D3 ISOamsa
|
|
|
|
|
|
|
|
{"hArr", "⇔"}, // left right double arrow, U+21D4 ISOamsa
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Mathematical Operators
|
|
|
|
|
|
|
|
{"forall", "∀"}, // for all, U+2200 ISOtech
|
|
|
|
|
|
|
|
{"part", "∂"}, // partial differential, U+2202 ISOtech
|
|
|
|
|
|
|
|
{"exist", "∃"}, // there exists, U+2203 ISOtech
|
|
|
|
|
|
|
|
{"empty", "∅"}, // empty set = null set, U+2205 ISOamso
|
|
|
|
|
|
|
|
{"nabla", "∇"}, // nabla = backward difference, U+2207 ISOtech
|
|
|
|
|
|
|
|
{"isin", "∈"}, // element of, U+2208 ISOtech
|
|
|
|
|
|
|
|
{"notin", "∉"}, // not an element of, U+2209 ISOtech
|
|
|
|
|
|
|
|
{"ni", "∋"}, // contains as member, U+220B ISOtech
|
|
|
|
|
|
|
|
{"prod", "∏"}, // n-ary product = product sign, U+220F ISOamsb
|
|
|
|
|
|
|
|
// prod is NOT the same character as U+03A0 'greek capital letter pi' though
|
|
|
|
|
|
|
|
// the same glyph might be used for both
|
|
|
|
|
|
|
|
{"sum", "∑"}, // n-ary summation, U+2211 ISOamsb
|
|
|
|
|
|
|
|
// sum is NOT the same character as U+03A3 'greek capital letter sigma'
|
|
|
|
|
|
|
|
// though the same glyph might be used for both
|
|
|
|
|
|
|
|
{"minus", "−"}, // minus sign, U+2212 ISOtech
|
|
|
|
|
|
|
|
{"lowast", "∗"}, // asterisk operator, U+2217 ISOtech
|
|
|
|
|
|
|
|
{"radic", "√"}, // square root = radical sign, U+221A ISOtech
|
|
|
|
|
|
|
|
{"prop", "∝"}, // proportional to, U+221D ISOtech
|
|
|
|
|
|
|
|
{"infin", "∞"}, // infinity, U+221E ISOtech
|
|
|
|
|
|
|
|
{"ang", "∠"}, // angle, U+2220 ISOamso
|
|
|
|
|
|
|
|
{"and", "∧"}, // logical and = wedge, U+2227 ISOtech
|
|
|
|
|
|
|
|
{"or", "∨"}, // logical or = vee, U+2228 ISOtech
|
|
|
|
|
|
|
|
{"cap", "∩"}, // intersection = cap, U+2229 ISOtech
|
|
|
|
|
|
|
|
{"cup", "∪"}, // union = cup, U+222A ISOtech
|
|
|
|
|
|
|
|
{"int", "∫"}, // integral, U+222B ISOtech
|
|
|
|
|
|
|
|
{"there4", "∴"}, // therefore, U+2234 ISOtech
|
|
|
|
|
|
|
|
{"sim", "∼"}, // tilde operator = varies with = similar to, U+223C ISOtech
|
|
|
|
|
|
|
|
// tilde operator is NOT the same character as the tilde, U+007E,
|
|
|
|
|
|
|
|
// although the same glyph might be used to represent both
|
|
|
|
|
|
|
|
{"cong", "≅"}, // approximately equal to, U+2245 ISOtech
|
|
|
|
|
|
|
|
{"asymp", "≈"}, // almost equal to = asymptotic to, U+2248 ISOamsr
|
|
|
|
|
|
|
|
{"ne", "≠"}, // not equal to, U+2260 ISOtech
|
|
|
|
|
|
|
|
{"equiv", "≡"}, // identical to, U+2261 ISOtech
|
|
|
|
|
|
|
|
{"le", "≤"}, // less-than or equal to, U+2264 ISOtech
|
|
|
|
|
|
|
|
{"ge", "≥"}, // greater-than or equal to, U+2265 ISOtech
|
|
|
|
|
|
|
|
{"sub", "⊂"}, // subset of, U+2282 ISOtech
|
|
|
|
|
|
|
|
{"sup", "⊃"}, // superset of, U+2283 ISOtech
|
|
|
|
|
|
|
|
{"nsub", "⊄"}, // not a subset of, U+2284 ISOamsn
|
|
|
|
|
|
|
|
{"sube", "⊆"}, // subset of or equal to, U+2286 ISOtech
|
|
|
|
|
|
|
|
{"supe", "⊇"}, // superset of or equal to, U+2287 ISOtech
|
|
|
|
|
|
|
|
{"oplus", "⊕"}, // circled plus = direct sum, U+2295 ISOamsb
|
|
|
|
|
|
|
|
{"otimes", "⊗"}, // circled times = vector product, U+2297 ISOamsb
|
|
|
|
|
|
|
|
{"perp", "⊥"}, // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
|
|
|
|
|
|
|
|
{"sdot", "⋅"}, // dot operator, U+22C5 ISOamsb
|
|
|
|
|
|
|
|
// dot operator is NOT the same character as U+00B7 middle dot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Miscellaneous Technical
|
|
|
|
|
|
|
|
{"lceil", "⌈"}, // left ceiling = APL upstile, U+2308 ISOamsc
|
|
|
|
|
|
|
|
{"rceil", "⌉"}, // right ceiling, U+2309 ISOamsc
|
|
|
|
|
|
|
|
{"lfloor", "⌊"}, // left floor = APL downstile, U+230A ISOamsc
|
|
|
|
|
|
|
|
{"rfloor", "⌋"}, // right floor, U+230B ISOamsc
|
|
|
|
|
|
|
|
{"lang", "〈"}, // left-pointing angle bracket = bra, U+2329 ISOtech
|
|
|
|
|
|
|
|
// lang is NOT the same character as U+003C 'less than sign'
|
|
|
|
|
|
|
|
// or U+2039 'single left-pointing angle quotation mark'
|
|
|
|
|
|
|
|
{"rang", "〉"}, // right-pointing angle bracket = ket, U+232A ISOtech
|
|
|
|
|
|
|
|
// rang is NOT the same character as U+003E 'greater than sign'
|
|
|
|
|
|
|
|
// or U+203A 'single right-pointing angle quotation mark'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Geometric Shapes
|
|
|
|
|
|
|
|
{"loz", "◊"}, // lozenge, U+25CA ISOpub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Miscellaneous Symbols
|
|
|
|
|
|
|
|
{"spades", "♠"}, // black spade suit, U+2660 ISOpub
|
|
|
|
|
|
|
|
{"clubs", "♣"}, // black club suit = shamrock, U+2663 ISOpub
|
|
|
|
|
|
|
|
{"hearts", "♥"}, // black heart suit = valentine, U+2665 ISOpub
|
|
|
|
|
|
|
|
{"diams", "♦"} // black diamond suit, U+2666 ISOpub
|
|
|
|
|
|
|
|
})) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class XmlStreamEntityResolver : public QXmlStreamEntityResolver |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
public: |
|
|
|
|
|
|
|
QString resolveUndeclaredEntity(const QString &name) override |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
return HTML_ENTITIES->value(name); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const char shortDay[][4] = { |
|
|
|
const char shortDay[][4] = { |
|
|
|
"Mon", "Tue", "Wed", |
|
|
|
"Mon", "Tue", "Wed", |
|
|
|
"Thu", "Fri", "Sat", |
|
|
|
"Thu", "Fri", "Sat", |
|
|
@ -228,6 +543,8 @@ void Parser::parse(const QByteArray &feedData) |
|
|
|
void Parser::parse_impl(const QByteArray &feedData) |
|
|
|
void Parser::parse_impl(const QByteArray &feedData) |
|
|
|
{ |
|
|
|
{ |
|
|
|
QXmlStreamReader xml(feedData); |
|
|
|
QXmlStreamReader xml(feedData); |
|
|
|
|
|
|
|
XmlStreamEntityResolver resolver; |
|
|
|
|
|
|
|
xml.setEntityResolver(&resolver); |
|
|
|
bool foundChannel = false; |
|
|
|
bool foundChannel = false; |
|
|
|
|
|
|
|
|
|
|
|
while (xml.readNextStartElement()) { |
|
|
|
while (xml.readNextStartElement()) { |
|
|
|