Browse Source

Merge pull request #7821 from glassez/fix-rss

Fix RSS Parser. Closes #7751, #7763, #7768, #7786
adaptive-webui-19844
Vladimir Golovnev 7 years ago committed by GitHub
parent
commit
342f5f1a09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 85
      src/base/rss/private/rss_parser.cpp

85
src/base/rss/private/rss_parser.cpp

@ -227,10 +227,9 @@ void Parser::parse(const QByteArray &feedData)
// read and create items from a rss document // read and create items from a rss document
void Parser::parse_impl(const QByteArray &feedData) void Parser::parse_impl(const QByteArray &feedData)
{ {
qDebug() << Q_FUNC_INFO;
QXmlStreamReader xml(feedData); QXmlStreamReader xml(feedData);
bool foundChannel = false; bool foundChannel = false;
while (xml.readNextStartElement()) { while (xml.readNextStartElement()) {
if (xml.name() == "rss") { if (xml.name() == "rss") {
// Find channels // Find channels
@ -258,11 +257,15 @@ void Parser::parse_impl(const QByteArray &feedData)
} }
} }
if (xml.hasError()) if (!foundChannel) {
m_result.error = xml.errorString();
else if (!foundChannel)
m_result.error = tr("Invalid RSS feed."); m_result.error = tr("Invalid RSS feed.");
else }
else if (xml.hasError()) {
m_result.error = tr("%1 (line: %2, column: %3, offset: %4).")
.arg(xml.errorString()).arg(xml.lineNumber())
.arg(xml.columnNumber()).arg(xml.characterOffset());
}
else {
// Sort article list chronologically // Sort article list chronologically
// NOTE: We don't need to sort it here if articles are always // NOTE: We don't need to sort it here if articles are always
// sorted in fetched XML in reverse chronological order // sorted in fetched XML in reverse chronological order
@ -271,6 +274,7 @@ void Parser::parse_impl(const QByteArray &feedData)
{ {
return a1["date"].toDateTime() < a2["date"].toDateTime(); return a1["date"].toDateTime() < a2["date"].toDateTime();
}); });
}
emit finished(m_result); emit finished(m_result);
m_result.articles.clear(); // clear articles only m_result.articles.clear(); // clear articles only
@ -288,35 +292,34 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
break; break;
if (xml.isStartElement()) { if (xml.isStartElement()) {
const QString text(xml.readElementText().trimmed());
if (name == QLatin1String("title")) { if (name == QLatin1String("title")) {
article[Article::KeyTitle] = text; article[Article::KeyTitle] = xml.readElementText().trimmed();
} }
else if (name == QLatin1String("enclosure")) { else if (name == QLatin1String("enclosure")) {
if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent")) if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent"))
article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString(); article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString();
} }
else if (name == QLatin1String("link")) { else if (name == QLatin1String("link")) {
const QString text {xml.readElementText().trimmed()};
if (text.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive)) if (text.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
article[Article::KeyTorrentURL] = text; // magnet link instead of a news URL article[Article::KeyTorrentURL] = text; // magnet link instead of a news URL
else else
article[Article::KeyLink] = text; article[Article::KeyLink] = text;
} }
else if (name == QLatin1String("description")) { else if (name == QLatin1String("description")) {
article[Article::KeyDescription] = text; article[Article::KeyDescription] = xml.readElementText(QXmlStreamReader::IncludeChildElements);
} }
else if (name == QLatin1String("pubDate")) { else if (name == QLatin1String("pubDate")) {
article[Article::KeyDate] = parseDate(text); article[Article::KeyDate] = parseDate(xml.readElementText().trimmed());
} }
else if (name == QLatin1String("author")) { else if (name == QLatin1String("author")) {
article[Article::KeyAuthor] = text; article[Article::KeyAuthor] = xml.readElementText().trimmed();
} }
else if (name == QLatin1String("guid")) { else if (name == QLatin1String("guid")) {
article[Article::KeyId] = text; article[Article::KeyId] = xml.readElementText().trimmed();
} }
else { else {
article[name] = text; article[name] = xml.readElementText(QXmlStreamReader::IncludeChildElements);
} }
} }
} }
@ -326,17 +329,14 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
void Parser::parseRSSChannel(QXmlStreamReader &xml) void Parser::parseRSSChannel(QXmlStreamReader &xml)
{ {
qDebug() << Q_FUNC_INFO;
Q_ASSERT(xml.isStartElement() && xml.name() == "channel");
while (!xml.atEnd()) { while (!xml.atEnd()) {
xml.readNext(); xml.readNext();
if (xml.isStartElement()) { if (xml.isStartElement()) {
if (xml.name() == "title") { if (xml.name() == QLatin1String("title")) {
m_result.title = xml.readElementText(); m_result.title = xml.readElementText();
} }
else if (xml.name() == "lastBuildDate") { else if (xml.name() == QLatin1String("lastBuildDate")) {
QString lastBuildDate = xml.readElementText(); QString lastBuildDate = xml.readElementText();
if (!lastBuildDate.isEmpty()) { if (!lastBuildDate.isEmpty()) {
if (m_result.lastBuildDate == lastBuildDate) { if (m_result.lastBuildDate == lastBuildDate) {
@ -346,7 +346,7 @@ void Parser::parseRSSChannel(QXmlStreamReader &xml)
m_result.lastBuildDate = lastBuildDate; m_result.lastBuildDate = lastBuildDate;
} }
} }
else if (xml.name() == "item") { else if (xml.name() == QLatin1String("item")) {
parseRssArticle(xml); parseRssArticle(xml);
} }
} }
@ -366,14 +366,12 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
break; break;
if (xml.isStartElement()) { if (xml.isStartElement()) {
const QString text(xml.readElementText().trimmed());
if (name == QLatin1String("title")) { if (name == QLatin1String("title")) {
article[Article::KeyTitle] = text; article[Article::KeyTitle] = xml.readElementText().trimmed();
} }
else if (name == QLatin1String("link")) { else if (name == QLatin1String("link")) {
QString link = (xml.attributes().isEmpty() QString link = (xml.attributes().isEmpty()
? text ? xml.readElementText().trimmed()
: xml.attributes().value(QLatin1String("href")).toString()); : xml.attributes().value(QLatin1String("href")).toString());
if (link.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive)) if (link.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
@ -385,42 +383,38 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
article[Article::KeyLink] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link); article[Article::KeyLink] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link);
} }
else if ((name == QLatin1String("summary")) || (name == QLatin1String("content"))){ else if ((name == QLatin1String("summary")) || (name == QLatin1String("content"))) {
if (doubleContent) { // Duplicate content -> ignore if (doubleContent) { // Duplicate content -> ignore
xml.readNext(); xml.skipCurrentElement();
while ((xml.name() != QLatin1String("summary")) && (xml.name() != QLatin1String("content")))
xml.readNext();
continue; continue;
} }
// Try to also parse broken articles, which don't use html '&' escapes // Try to also parse broken articles, which don't use html '&' escapes
// Actually works great for non-broken content too // Actually works great for non-broken content too
QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements); QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements).trimmed();
if (!feedText.isEmpty()) if (!feedText.isEmpty()) {
article[Article::KeyDescription] = feedText.trimmed(); article[Article::KeyDescription] = feedText;
doubleContent = true;
doubleContent = true; }
} }
else if (name == QLatin1String("updated")) { else if (name == QLatin1String("updated")) {
// ATOM uses standard compliant date, don't do fancy stuff // ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime::fromString(text, Qt::ISODate); QDateTime articleDate = QDateTime::fromString(xml.readElementText().trimmed(), Qt::ISODate);
article[Article::KeyDate] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime()); article[Article::KeyDate] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime());
} }
else if (name == QLatin1String("author")) { else if (name == QLatin1String("author")) {
xml.readNext(); while (xml.readNextStartElement()) {
while (xml.name() != QLatin1String("author")) {
if (xml.name() == QLatin1String("name")) if (xml.name() == QLatin1String("name"))
article[Article::KeyAuthor] = xml.readElementText().trimmed(); article[Article::KeyAuthor] = xml.readElementText().trimmed();
xml.readNext(); else
xml.skipCurrentElement();
} }
} }
else if (name == QLatin1String("id")) { else if (name == QLatin1String("id")) {
article[Article::KeyId] = text; article[Article::KeyId] = xml.readElementText().trimmed();
} }
else { else {
article[name] = text; article[name] = xml.readElementText(QXmlStreamReader::IncludeChildElements);
} }
} }
} }
@ -430,19 +424,16 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
void Parser::parseAtomChannel(QXmlStreamReader &xml) void Parser::parseAtomChannel(QXmlStreamReader &xml)
{ {
qDebug() << Q_FUNC_INFO;
Q_ASSERT(xml.isStartElement() && xml.name() == "feed");
m_baseUrl = xml.attributes().value("xml:base").toString(); m_baseUrl = xml.attributes().value("xml:base").toString();
while (!xml.atEnd()) { while (!xml.atEnd()) {
xml.readNext(); xml.readNext();
if (xml.isStartElement()) { if (xml.isStartElement()) {
if (xml.name() == "title") { if (xml.name() == QLatin1String("title")) {
m_result.title = xml.readElementText(); m_result.title = xml.readElementText();
} }
else if (xml.name() == "updated") { else if (xml.name() == QLatin1String("updated")) {
QString lastBuildDate = xml.readElementText(); QString lastBuildDate = xml.readElementText();
if (!lastBuildDate.isEmpty()) { if (!lastBuildDate.isEmpty()) {
if (m_result.lastBuildDate == lastBuildDate) { if (m_result.lastBuildDate == lastBuildDate) {
@ -452,7 +443,7 @@ void Parser::parseAtomChannel(QXmlStreamReader &xml)
m_result.lastBuildDate = lastBuildDate; m_result.lastBuildDate = lastBuildDate;
} }
} }
else if (xml.name() == "entry") { else if (xml.name() == QLatin1String("entry")) {
parseAtomArticle(xml); parseAtomArticle(xml);
} }
} }

Loading…
Cancel
Save