Browse Source

Merge pull request #6864 from glassez/rss

Improve RSS Article handling
adaptive-webui-19844
Vladimir Golovnev 8 years ago committed by GitHub
parent
commit
c503583046
  1. 93
      src/base/rss/private/rss_parser.cpp
  2. 121
      src/base/rss/rss_article.cpp
  3. 18
      src/base/rss/rss_article.h
  4. 33
      src/base/rss/rss_autodownloader.cpp
  5. 28
      src/base/rss/rss_feed.cpp

93
src/base/rss/private/rss_parser.cpp

@ -39,6 +39,8 @@
#include <QVariant> #include <QVariant>
#include <QXmlStreamReader> #include <QXmlStreamReader>
#include "../rss_article.h"
namespace namespace
{ {
const char shortDay[][4] = { const char shortDay[][4] = {
@ -280,36 +282,41 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
while (!xml.atEnd()) { while (!xml.atEnd()) {
xml.readNext(); xml.readNext();
const QString name(xml.name().toString());
if(xml.isEndElement() && xml.name() == "item") if (xml.isEndElement() && (name == QLatin1String("item")))
break; break;
if (xml.isStartElement()) { if (xml.isStartElement()) {
if (xml.name() == "title") { const QString text(xml.readElementText().trimmed());
article["title"] = xml.readElementText().trimmed();
if (name == QLatin1String("title")) {
article[Article::KeyTitle] = text;
} }
else if (xml.name() == "enclosure") { else if (name == QLatin1String("enclosure")) {
if (xml.attributes().value("type") == "application/x-bittorrent") if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent"))
article["torrent_url"] = xml.attributes().value("url").toString(); article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString();
} }
else if (xml.name() == "link") { else if (name == QLatin1String("link")) {
QString link = xml.readElementText().trimmed(); if (text.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
if (link.startsWith("magnet:", Qt::CaseInsensitive)) article[Article::KeyTorrentURL] = text; // magnet link instead of a news URL
article["torrent_url"] = link; // magnet link instead of a news URL
else else
article["news_link"] = link; article[Article::KeyLink] = text;
} }
else if (xml.name() == "description") { else if (name == QLatin1String("description")) {
article["description"] = xml.readElementText().trimmed(); article[Article::KeyDescription] = text;
} }
else if (xml.name() == "pubDate") { else if (name == QLatin1String("pubDate")) {
article["date"] = parseDate(xml.readElementText().trimmed()); article[Article::KeyDate] = parseDate(text);
} }
else if (xml.name() == "author") { else if (name == QLatin1String("author")) {
article["author"] = xml.readElementText().trimmed(); article[Article::KeyAuthor] = text;
} }
else if (xml.name() == "guid") { else if (name == QLatin1String("guid")) {
article["id"] = xml.readElementText().trimmed(); article[Article::KeyId] = text;
}
else {
article[name] = text;
} }
} }
} }
@ -353,33 +360,36 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
while (!xml.atEnd()) { while (!xml.atEnd()) {
xml.readNext(); xml.readNext();
const QString name(xml.name().toString());
if (xml.isEndElement() && (xml.name() == "entry")) if (xml.isEndElement() && (name == QLatin1String("entry")))
break; break;
if (xml.isStartElement()) { if (xml.isStartElement()) {
if (xml.name() == "title") { const QString text(xml.readElementText().trimmed());
article["title"] = xml.readElementText().trimmed();
if (name == QLatin1String("title")) {
article[Article::KeyTitle] = text;
} }
else if (xml.name() == "link") { else if (name == QLatin1String("link")) {
QString link = (xml.attributes().isEmpty() QString link = (xml.attributes().isEmpty()
? xml.readElementText().trimmed() ? text
: xml.attributes().value("href").toString()); : xml.attributes().value(QLatin1String("href")).toString());
if (link.startsWith("magnet:", Qt::CaseInsensitive)) if (link.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
article["torrent_url"] = link; // magnet link instead of a news URL article[Article::KeyTorrentURL] = link; // magnet link instead of a news URL
else else
// Atom feeds can have relative links, work around this and // Atom feeds can have relative links, work around this and
// take the stress of figuring article full URI from UI // take the stress of figuring article full URI from UI
// Assemble full URI // Assemble full URI
article["news_link"] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link); article[Article::KeyLink] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link);
} }
else if ((xml.name() == "summary") || (xml.name() == "content")){ else if ((name == QLatin1String("summary")) || (name == QLatin1String("content"))){
if (doubleContent) { // Duplicate content -> ignore if (doubleContent) { // Duplicate content -> ignore
xml.readNext(); xml.readNext();
while ((xml.name() != "summary") && (xml.name() != "content")) while ((xml.name() != QLatin1String("summary")) && (xml.name() != QLatin1String("content")))
xml.readNext(); xml.readNext();
continue; continue;
@ -389,25 +399,28 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
// Actually works great for non-broken content too // Actually works great for non-broken content too
QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements); QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements);
if (!feedText.isEmpty()) if (!feedText.isEmpty())
article["description"] = feedText.trimmed(); article[Article::KeyDescription] = feedText.trimmed();
doubleContent = true; doubleContent = true;
} }
else if (xml.name() == "updated") { else if (name == QLatin1String("updated")) {
// ATOM uses standard compliant date, don't do fancy stuff // ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime::fromString(xml.readElementText().trimmed(), Qt::ISODate); QDateTime articleDate = QDateTime::fromString(text, Qt::ISODate);
article["date"] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime()); article[Article::KeyDate] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime());
} }
else if (xml.name() == "author") { else if (name == QLatin1String("author")) {
xml.readNext(); xml.readNext();
while (xml.name() != "author") { while (xml.name() != QLatin1String("author")) {
if (xml.name() == "name") if (xml.name() == QLatin1String("name"))
article["author"] = xml.readElementText().trimmed(); article[Article::KeyAuthor] = xml.readElementText().trimmed();
xml.readNext(); xml.readNext();
} }
} }
else if (xml.name() == "id") { else if (name == QLatin1String("id")) {
article["id"] = xml.readElementText().trimmed(); article[Article::KeyId] = text;
}
else {
article[name] = text;
} }
} }
} }

121
src/base/rss/rss_article.cpp

@ -30,38 +30,53 @@
#include "rss_article.h" #include "rss_article.h"
#include <stdexcept>
#include <QJsonObject> #include <QJsonObject>
#include <QVariant> #include <QVariant>
#include "rss_feed.h" #include "rss_feed.h"
const QString Str_Id(QStringLiteral("id"));
const QString Str_Date(QStringLiteral("date"));
const QString Str_Title(QStringLiteral("title"));
const QString Str_Author(QStringLiteral("author"));
const QString Str_Description(QStringLiteral("description"));
const QString Str_TorrentURL(QStringLiteral("torrentURL"));
const QString Str_Torrent_Url(QStringLiteral("torrent_url"));
const QString Str_Link(QStringLiteral("link"));
const QString Str_News_Link(QStringLiteral("news_link"));
const QString Str_IsRead(QStringLiteral("isRead"));
const QString Str_Read(QStringLiteral("read"));
using namespace RSS; using namespace RSS;
Article::Article(Feed *feed, QString guid, QDateTime date, QString title, QString author const QString Article::KeyId(QStringLiteral("id"));
, QString description, QString torrentUrl, QString link, bool isRead) const QString Article::KeyDate(QStringLiteral("date"));
const QString Article::KeyTitle(QStringLiteral("title"));
const QString Article::KeyAuthor(QStringLiteral("author"));
const QString Article::KeyDescription(QStringLiteral("description"));
const QString Article::KeyTorrentURL(QStringLiteral("torrentURL"));
const QString Article::KeyLink(QStringLiteral("link"));
const QString Article::KeyIsRead(QStringLiteral("isRead"));
Article::Article(Feed *feed, const QVariantHash &varHash)
: QObject(feed) : QObject(feed)
, m_feed(feed) , m_feed(feed)
, m_guid(guid) , m_guid(varHash.value(KeyId).toString())
, m_date(date) , m_date(varHash.value(KeyDate).toDateTime())
, m_title(title) , m_title(varHash.value(KeyTitle).toString())
, m_author(author) , m_author(varHash.value(KeyAuthor).toString())
, m_description(description) , m_description(varHash.value(KeyDescription).toString())
, m_torrentURL(torrentUrl) , m_torrentURL(varHash.value(KeyTorrentURL).toString())
, m_link(link) , m_link(varHash.value(KeyLink).toString())
, m_isRead(isRead) , m_isRead(varHash.value(KeyIsRead, false).toBool())
, m_data(varHash)
{ {
// If item does not have a guid, fall back to some other identifier
if (m_guid.isEmpty())
m_guid = varHash.value(KeyTorrentURL).toString();
if (m_guid.isEmpty())
m_guid = varHash.value(KeyTitle).toString();
if (m_guid.isEmpty())
throw std::runtime_error("Bad RSS Article data");
m_data[KeyId] = m_guid;
}
Article::Article(Feed *feed, const QJsonObject &jsonObj)
: Article(feed, jsonObj.toVariantHash())
{
// JSON object store DateTime as string so we need to convert it
m_date = QDateTime::fromString(jsonObj.value(KeyDate).toString(), Qt::RFC2822Date);
m_data[KeyDate] = m_date;
} }
QString Article::guid() const QString Article::guid() const
@ -104,26 +119,27 @@ bool Article::isRead() const
return m_isRead; return m_isRead;
} }
QVariantHash Article::data() const
{
return m_data;
}
void Article::markAsRead() void Article::markAsRead()
{ {
if (!m_isRead) { if (!m_isRead) {
m_isRead = true; m_isRead = true;
m_data[KeyIsRead] = m_isRead;
emit read(this); emit read(this);
} }
} }
QJsonObject Article::toJsonObject() const QJsonObject Article::toJsonObject() const
{ {
return { auto jsonObj = QJsonObject::fromVariantHash(m_data);
{Str_Id, m_guid}, // JSON object doesn't support DateTime so we need to convert it
{Str_Date, m_date.toString(Qt::RFC2822Date)}, jsonObj[KeyDate] = m_date.toString(Qt::RFC2822Date);
{Str_Title, m_title},
{Str_Author, m_author}, return jsonObj;
{Str_Description, m_description},
{Str_TorrentURL, m_torrentURL},
{Str_Link, m_link},
{Str_IsRead, m_isRead}
};
} }
bool Article::articleDateRecentThan(Article *article, const QDateTime &date) bool Article::articleDateRecentThan(Article *article, const QDateTime &date)
@ -131,47 +147,6 @@ bool Article::articleDateRecentThan(Article *article, const QDateTime &date)
return article->date() > date; return article->date() > date;
} }
Article *Article::fromJsonObject(Feed *feed, const QJsonObject &jsonObj)
{
QString guid = jsonObj.value(Str_Id).toString();
// If item does not have a guid, fall back to some other identifier
if (guid.isEmpty())
guid = jsonObj.value(Str_Torrent_Url).toString();
if (guid.isEmpty())
guid = jsonObj.value(Str_Title).toString();
if (guid.isEmpty()) return nullptr;
return new Article(
feed, guid
, QDateTime::fromString(jsonObj.value(Str_Date).toString(), Qt::RFC2822Date)
, jsonObj.value(Str_Title).toString()
, jsonObj.value(Str_Author).toString()
, jsonObj.value(Str_Description).toString()
, jsonObj.value(Str_TorrentURL).toString()
, jsonObj.value(Str_Link).toString()
, jsonObj.value(Str_IsRead).toBool(false));
}
Article *Article::fromVariantHash(Feed *feed, const QVariantHash &varHash)
{
QString guid = varHash[Str_Id].toString();
// If item does not have a guid, fall back to some other identifier
if (guid.isEmpty())
guid = varHash.value(Str_Torrent_Url).toString();
if (guid.isEmpty())
guid = varHash.value(Str_Title).toString();
if (guid.isEmpty()) return nullptr;
return new Article(feed, guid
, varHash.value(Str_Date).toDateTime()
, varHash.value(Str_Title).toString()
, varHash.value(Str_Author).toString()
, varHash.value(Str_Description).toString()
, varHash.value(Str_Torrent_Url).toString()
, varHash.value(Str_News_Link).toString()
, varHash.value(Str_Read, false).toBool());
}
Feed *Article::feed() const Feed *Article::feed() const
{ {
return m_feed; return m_feed;

18
src/base/rss/rss_article.h

@ -33,6 +33,7 @@
#include <QDateTime> #include <QDateTime>
#include <QObject> #include <QObject>
#include <QString> #include <QString>
#include <QVariantHash>
namespace RSS namespace RSS
{ {
@ -45,12 +46,19 @@ namespace RSS
friend class Feed; friend class Feed;
Article(Feed *feed, QString guid, QDateTime date, QString title, QString author Article(Feed *feed, const QVariantHash &varHash);
, QString description, QString torrentUrl, QString link, bool isRead = false); Article(Feed *feed, const QJsonObject &jsonObj);
static Article *fromJsonObject(Feed *feed, const QJsonObject &jsonObj);
static Article *fromVariantHash(Feed *feed, const QVariantHash &varHash);
public: public:
static const QString KeyId;
static const QString KeyDate;
static const QString KeyTitle;
static const QString KeyAuthor;
static const QString KeyDescription;
static const QString KeyTorrentURL;
static const QString KeyLink;
static const QString KeyIsRead;
Feed *feed() const; Feed *feed() const;
QString guid() const; QString guid() const;
QDateTime date() const; QDateTime date() const;
@ -60,6 +68,7 @@ namespace RSS
QString torrentUrl() const; QString torrentUrl() const;
QString link() const; QString link() const;
bool isRead() const; bool isRead() const;
QVariantHash data() const;
void markAsRead(); void markAsRead();
@ -80,5 +89,6 @@ namespace RSS
QString m_torrentURL; QString m_torrentURL;
QString m_link; QString m_link;
bool m_isRead = false; bool m_isRead = false;
QVariantHash m_data;
}; };
} }

33
src/base/rss/rss_autodownloader.cpp

@ -36,6 +36,7 @@
#include <QSaveFile> #include <QSaveFile>
#include <QThread> #include <QThread>
#include <QTimer> #include <QTimer>
#include <QVariant>
#include "../bittorrent/magneturi.h" #include "../bittorrent/magneturi.h"
#include "../bittorrent/session.h" #include "../bittorrent/session.h"
@ -54,10 +55,7 @@
struct ProcessingJob struct ProcessingJob
{ {
QString feedURL; QString feedURL;
QString articleGUID; QVariantHash articleData;
QString articleTitle;
QDateTime articleDate;
QString torrentURL;
}; };
const QString ConfFolderName(QStringLiteral("rss")); const QString ConfFolderName(QStringLiteral("rss"));
@ -191,8 +189,8 @@ void AutoDownloader::handleTorrentDownloadFinished(const QString &url)
auto job = m_waitingJobs.take(url); auto job = m_waitingJobs.take(url);
if (!job) return; if (!job) return;
if (auto feed = Session::instance()->feedByURL(job->feedURL)) if (Feed *feed = Session::instance()->feedByURL(job->feedURL))
if (auto article = feed->articleByGUID(job->articleGUID)) if (Article *article = feed->articleByGUID(job->articleData.value(Article::KeyId).toString()))
article->markAsRead(); article->markAsRead();
} }
@ -220,10 +218,7 @@ void AutoDownloader::addJobForArticle(Article *article)
QSharedPointer<ProcessingJob> job(new ProcessingJob); QSharedPointer<ProcessingJob> job(new ProcessingJob);
job->feedURL = article->feed()->url(); job->feedURL = article->feed()->url();
job->articleGUID = article->guid(); job->articleData = article->data();
job->articleTitle = article->title();
job->articleDate = article->date();
job->torrentURL = torrentURL;
m_processingQueue.append(job); m_processingQueue.append(job);
if (!m_processingTimer->isActive()) if (!m_processingTimer->isActive())
m_processingTimer->start(); m_processingTimer->start();
@ -234,17 +229,18 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job)
for (AutoDownloadRule &rule: m_rules) { for (AutoDownloadRule &rule: m_rules) {
if (!rule.isEnabled()) continue; if (!rule.isEnabled()) continue;
if (!rule.feedURLs().contains(job->feedURL)) continue; if (!rule.feedURLs().contains(job->feedURL)) continue;
if (!rule.matches(job->articleTitle)) continue; if (!rule.matches(job->articleData.value(Article::KeyTitle).toString())) continue;
auto articleDate = job->articleData.value(Article::KeyDate).toDateTime();
// if rule is in ignoring state do nothing with matched torrent // if rule is in ignoring state do nothing with matched torrent
if (rule.ignoreDays() > 0) { if (rule.ignoreDays() > 0) {
if (rule.lastMatch().isValid()) { if (rule.lastMatch().isValid()) {
if (job->articleDate < rule.lastMatch().addDays(rule.ignoreDays())) if (articleDate < rule.lastMatch().addDays(rule.ignoreDays()))
return; return;
} }
} }
rule.setLastMatch(job->articleDate); rule.setLastMatch(articleDate);
m_dirty = true; m_dirty = true;
storeDeferred(); storeDeferred();
@ -252,18 +248,19 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job)
params.savePath = rule.savePath(); params.savePath = rule.savePath();
params.category = rule.assignedCategory(); params.category = rule.assignedCategory();
params.addPaused = rule.addPaused(); params.addPaused = rule.addPaused();
BitTorrent::Session::instance()->addTorrent(job->torrentURL, params); auto torrentURL = job->articleData.value(Article::KeyTorrentURL).toString();
BitTorrent::Session::instance()->addTorrent(torrentURL, params);
if (BitTorrent::MagnetUri(job->torrentURL).isValid()) { if (BitTorrent::MagnetUri(torrentURL).isValid()) {
if (auto feed = Session::instance()->feedByURL(job->feedURL)) { if (Feed *feed = Session::instance()->feedByURL(job->feedURL)) {
if (auto article = feed->articleByGUID(job->articleGUID)) if (Article *article = feed->articleByGUID(job->articleData.value(Article::KeyId).toString()))
article->markAsRead(); article->markAsRead();
} }
} }
else { else {
// waiting for torrent file downloading // waiting for torrent file downloading
// normalize URL string via QUrl since DownloadManager do it // normalize URL string via QUrl since DownloadManager do it
m_waitingJobs.insert(QUrl(job->torrentURL).toString(), job); m_waitingJobs.insert(QUrl(torrentURL).toString(), job);
} }
return; return;

28
src/base/rss/rss_feed.cpp

@ -207,13 +207,14 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result)
m_lastBuildDate = result.lastBuildDate; m_lastBuildDate = result.lastBuildDate;
foreach (const QVariantHash &varHash, result.articles) { foreach (const QVariantHash &varHash, result.articles) {
auto article = Article::fromVariantHash(this, varHash); try {
if (article) { auto article = new Article(this, varHash);
if (!addArticle(article)) if (!addArticle(article))
delete article; delete article;
else else
m_dirty = true; m_dirty = true;
} }
catch (const std::runtime_error&) {}
} }
store(); store();
@ -272,9 +273,12 @@ void Feed::loadArticles(const QByteArray &data)
continue; continue;
} }
auto article = Article::fromJsonObject(this, jsonVal.toObject()); try {
if (article && !addArticle(article)) auto article = new Article(this, jsonVal.toObject());
delete article; if (!addArticle(article))
delete article;
}
catch (const std::runtime_error&) {}
} }
} }
@ -284,9 +288,17 @@ void Feed::loadArticlesLegacy()
QVariantHash allOldItems = qBTRSSFeeds->value("old_items").toHash(); QVariantHash allOldItems = qBTRSSFeeds->value("old_items").toHash();
foreach (const QVariant &var, allOldItems.value(m_url).toList()) { foreach (const QVariant &var, allOldItems.value(m_url).toList()) {
auto article = Article::fromVariantHash(this, var.toHash()); auto hash = var.toHash();
if (article && !addArticle(article)) // update legacy keys
delete article; hash[Article::KeyLink] = hash.take(QLatin1String("news_link"));
hash[Article::KeyTorrentURL] = hash.take(QLatin1String("torrent_url"));
hash[Article::KeyIsRead] = hash.take(QLatin1String("read"));
try {
auto article = new Article(this, hash);
if (!addArticle(article))
delete article;
}
catch (const std::runtime_error&) {}
} }
} }

Loading…
Cancel
Save