Browse Source

Improve RSS article parsing

Use QLatin1String for string literals.
Use predefined constants for article hash table keys.
adaptive-webui-19844
Vladimir Golovnev (Glassez) 8 years ago
parent
commit
02f77a05dc
  1. 84
      src/base/rss/private/rss_parser.cpp

84
src/base/rss/private/rss_parser.cpp

@ -39,6 +39,8 @@
#include <QVariant> #include <QVariant>
#include <QXmlStreamReader> #include <QXmlStreamReader>
#include "../rss_article.h"
namespace namespace
{ {
const char shortDay[][4] = { const char shortDay[][4] = {
@ -282,37 +284,39 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
xml.readNext(); xml.readNext();
const QString name(xml.name().toString()); const QString name(xml.name().toString());
if (xml.isEndElement() && (name == "item")) if (xml.isEndElement() && (name == QLatin1String("item")))
break; break;
if (xml.isStartElement()) { if (xml.isStartElement()) {
const QString text(xml.readElementText().trimmed()); const QString text(xml.readElementText().trimmed());
article[name] = text;
if (name == "title") { if (name == QLatin1String("title")) {
article["title"] = text; article[Article::KeyTitle] = text;
} }
else if (name == "enclosure") { else if (name == QLatin1String("enclosure")) {
if (xml.attributes().value("type") == "application/x-bittorrent") if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent"))
article["torrentURL"] = xml.attributes().value("url").toString(); article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString();
} }
else if (name == "link") { else if (name == QLatin1String("link")) {
if (text.startsWith("magnet:", Qt::CaseInsensitive)) if (text.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
article["torrentURL"] = text; // magnet link instead of a news URL article[Article::KeyTorrentURL] = text; // magnet link instead of a news URL
else else
article["link"] = text; article[Article::KeyLink] = text;
}
else if (name == QLatin1String("description")) {
article[Article::KeyDescription] = text;
} }
else if (name == "description") { else if (name == QLatin1String("pubDate")) {
article["description"] = text; article[Article::KeyDate] = parseDate(text);
} }
else if (name == "pubDate") { else if (name == QLatin1String("author")) {
article["date"] = parseDate(text); article[Article::KeyAuthor] = text;
} }
else if (name == "author") { else if (name == QLatin1String("guid")) {
article["author"] = text; article[Article::KeyId] = text;
} }
else if (name == "guid") { else {
article["id"] = text; article[name] = text;
} }
} }
} }
@ -358,35 +362,34 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
xml.readNext(); xml.readNext();
const QString name(xml.name().toString()); const QString name(xml.name().toString());
if (xml.isEndElement() && (name == "entry")) if (xml.isEndElement() && (name == QLatin1String("entry")))
break; break;
if (xml.isStartElement()) { if (xml.isStartElement()) {
const QString text(xml.readElementText().trimmed()); const QString text(xml.readElementText().trimmed());
article[name] = text;
if (name == "title") { if (name == QLatin1String("title")) {
article["title"] = text; article[Article::KeyTitle] = text;
} }
else if (name == "link") { else if (name == QLatin1String("link")) {
QString link = (xml.attributes().isEmpty() QString link = (xml.attributes().isEmpty()
? text ? text
: xml.attributes().value("href").toString()); : xml.attributes().value(QLatin1String("href")).toString());
if (link.startsWith("magnet:", Qt::CaseInsensitive)) if (link.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
article["torrentURL"] = link; // magnet link instead of a news URL article[Article::KeyTorrentURL] = link; // magnet link instead of a news URL
else else
// Atom feeds can have relative links, work around this and // Atom feeds can have relative links, work around this and
// take the stress of figuring article full URI from UI // take the stress of figuring article full URI from UI
// Assemble full URI // Assemble full URI
article["link"] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link); article[Article::KeyLink] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link);
} }
else if ((name == "summary") || (name == "content")){ else if ((name == QLatin1String("summary")) || (name == QLatin1String("content"))){
if (doubleContent) { // Duplicate content -> ignore if (doubleContent) { // Duplicate content -> ignore
xml.readNext(); xml.readNext();
while ((xml.name() != "summary") && (xml.name() != "content")) while ((xml.name() != QLatin1String("summary")) && (xml.name() != QLatin1String("content")))
xml.readNext(); xml.readNext();
continue; continue;
@ -396,25 +399,28 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
// Actually works great for non-broken content too // Actually works great for non-broken content too
QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements); QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements);
if (!feedText.isEmpty()) if (!feedText.isEmpty())
article["description"] = feedText.trimmed(); article[Article::KeyDescription] = feedText.trimmed();
doubleContent = true; doubleContent = true;
} }
else if (name == "updated") { else if (name == QLatin1String("updated")) {
// ATOM uses standard compliant date, don't do fancy stuff // ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime::fromString(text, Qt::ISODate); QDateTime articleDate = QDateTime::fromString(text, Qt::ISODate);
article["date"] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime()); article[Article::KeyDate] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime());
} }
else if (name == "author") { else if (name == QLatin1String("author")) {
xml.readNext(); xml.readNext();
while (xml.name() != "author") { while (xml.name() != QLatin1String("author")) {
if (xml.name() == "name") if (xml.name() == QLatin1String("name"))
article["author"] = xml.readElementText().trimmed(); article[Article::KeyAuthor] = xml.readElementText().trimmed();
xml.readNext(); xml.readNext();
} }
} }
else if (name == "id") { else if (name == QLatin1String("id")) {
article["id"] = text; article[Article::KeyId] = text;
}
else {
article[name] = text;
} }
} }
} }

Loading…
Cancel
Save