Browse Source

Merge pull request #6864 from glassez/rss

Improve RSS Article handling
adaptive-webui-19844
Vladimir Golovnev 7 years ago committed by GitHub
parent
commit
c503583046
  1. 93
      src/base/rss/private/rss_parser.cpp
  2. 121
      src/base/rss/rss_article.cpp
  3. 18
      src/base/rss/rss_article.h
  4. 33
      src/base/rss/rss_autodownloader.cpp
  5. 24
      src/base/rss/rss_feed.cpp

93
src/base/rss/private/rss_parser.cpp

@ -39,6 +39,8 @@ @@ -39,6 +39,8 @@
#include <QVariant>
#include <QXmlStreamReader>
#include "../rss_article.h"
namespace
{
const char shortDay[][4] = {
@ -280,36 +282,41 @@ void Parser::parseRssArticle(QXmlStreamReader &xml) @@ -280,36 +282,41 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
while (!xml.atEnd()) {
xml.readNext();
const QString name(xml.name().toString());
if(xml.isEndElement() && xml.name() == "item")
if (xml.isEndElement() && (name == QLatin1String("item")))
break;
if (xml.isStartElement()) {
if (xml.name() == "title") {
article["title"] = xml.readElementText().trimmed();
const QString text(xml.readElementText().trimmed());
if (name == QLatin1String("title")) {
article[Article::KeyTitle] = text;
}
else if (xml.name() == "enclosure") {
if (xml.attributes().value("type") == "application/x-bittorrent")
article["torrent_url"] = xml.attributes().value("url").toString();
else if (name == QLatin1String("enclosure")) {
if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent"))
article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString();
}
else if (xml.name() == "link") {
QString link = xml.readElementText().trimmed();
if (link.startsWith("magnet:", Qt::CaseInsensitive))
article["torrent_url"] = link; // magnet link instead of a news URL
else if (name == QLatin1String("link")) {
if (text.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
article[Article::KeyTorrentURL] = text; // magnet link instead of a news URL
else
article["news_link"] = link;
article[Article::KeyLink] = text;
}
else if (xml.name() == "description") {
article["description"] = xml.readElementText().trimmed();
else if (name == QLatin1String("description")) {
article[Article::KeyDescription] = text;
}
else if (xml.name() == "pubDate") {
article["date"] = parseDate(xml.readElementText().trimmed());
else if (name == QLatin1String("pubDate")) {
article[Article::KeyDate] = parseDate(text);
}
else if (xml.name() == "author") {
article["author"] = xml.readElementText().trimmed();
else if (name == QLatin1String("author")) {
article[Article::KeyAuthor] = text;
}
else if (xml.name() == "guid") {
article["id"] = xml.readElementText().trimmed();
else if (name == QLatin1String("guid")) {
article[Article::KeyId] = text;
}
else {
article[name] = text;
}
}
}
@ -353,33 +360,36 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml) @@ -353,33 +360,36 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
while (!xml.atEnd()) {
xml.readNext();
const QString name(xml.name().toString());
if (xml.isEndElement() && (xml.name() == "entry"))
if (xml.isEndElement() && (name == QLatin1String("entry")))
break;
if (xml.isStartElement()) {
if (xml.name() == "title") {
article["title"] = xml.readElementText().trimmed();
const QString text(xml.readElementText().trimmed());
if (name == QLatin1String("title")) {
article[Article::KeyTitle] = text;
}
else if (xml.name() == "link") {
else if (name == QLatin1String("link")) {
QString link = (xml.attributes().isEmpty()
? xml.readElementText().trimmed()
: xml.attributes().value("href").toString());
? text
: xml.attributes().value(QLatin1String("href")).toString());
if (link.startsWith("magnet:", Qt::CaseInsensitive))
article["torrent_url"] = link; // magnet link instead of a news URL
if (link.startsWith(QLatin1String("magnet:"), Qt::CaseInsensitive))
article[Article::KeyTorrentURL] = link; // magnet link instead of a news URL
else
// Atom feeds can have relative links, work around this and
// take the stress of figuring article full URI from UI
// Assemble full URI
article["news_link"] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link);
article[Article::KeyLink] = (m_baseUrl.isEmpty() ? link : m_baseUrl + link);
}
else if ((xml.name() == "summary") || (xml.name() == "content")){
else if ((name == QLatin1String("summary")) || (name == QLatin1String("content"))){
if (doubleContent) { // Duplicate content -> ignore
xml.readNext();
while ((xml.name() != "summary") && (xml.name() != "content"))
while ((xml.name() != QLatin1String("summary")) && (xml.name() != QLatin1String("content")))
xml.readNext();
continue;
@ -389,25 +399,28 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml) @@ -389,25 +399,28 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
// Actually works great for non-broken content too
QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements);
if (!feedText.isEmpty())
article["description"] = feedText.trimmed();
article[Article::KeyDescription] = feedText.trimmed();
doubleContent = true;
}
else if (xml.name() == "updated") {
else if (name == QLatin1String("updated")) {
// ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime::fromString(xml.readElementText().trimmed(), Qt::ISODate);
article["date"] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime());
QDateTime articleDate = QDateTime::fromString(text, Qt::ISODate);
article[Article::KeyDate] = (articleDate.isValid() ? articleDate : QDateTime::currentDateTime());
}
else if (xml.name() == "author") {
else if (name == QLatin1String("author")) {
xml.readNext();
while (xml.name() != "author") {
if (xml.name() == "name")
article["author"] = xml.readElementText().trimmed();
while (xml.name() != QLatin1String("author")) {
if (xml.name() == QLatin1String("name"))
article[Article::KeyAuthor] = xml.readElementText().trimmed();
xml.readNext();
}
}
else if (xml.name() == "id") {
article["id"] = xml.readElementText().trimmed();
else if (name == QLatin1String("id")) {
article[Article::KeyId] = text;
}
else {
article[name] = text;
}
}
}

121
src/base/rss/rss_article.cpp

@ -30,38 +30,53 @@ @@ -30,38 +30,53 @@
#include "rss_article.h"
#include <stdexcept>
#include <QJsonObject>
#include <QVariant>
#include "rss_feed.h"
const QString Str_Id(QStringLiteral("id"));
const QString Str_Date(QStringLiteral("date"));
const QString Str_Title(QStringLiteral("title"));
const QString Str_Author(QStringLiteral("author"));
const QString Str_Description(QStringLiteral("description"));
const QString Str_TorrentURL(QStringLiteral("torrentURL"));
const QString Str_Torrent_Url(QStringLiteral("torrent_url"));
const QString Str_Link(QStringLiteral("link"));
const QString Str_News_Link(QStringLiteral("news_link"));
const QString Str_IsRead(QStringLiteral("isRead"));
const QString Str_Read(QStringLiteral("read"));
using namespace RSS;
Article::Article(Feed *feed, QString guid, QDateTime date, QString title, QString author
, QString description, QString torrentUrl, QString link, bool isRead)
const QString Article::KeyId(QStringLiteral("id"));
const QString Article::KeyDate(QStringLiteral("date"));
const QString Article::KeyTitle(QStringLiteral("title"));
const QString Article::KeyAuthor(QStringLiteral("author"));
const QString Article::KeyDescription(QStringLiteral("description"));
const QString Article::KeyTorrentURL(QStringLiteral("torrentURL"));
const QString Article::KeyLink(QStringLiteral("link"));
const QString Article::KeyIsRead(QStringLiteral("isRead"));
Article::Article(Feed *feed, const QVariantHash &varHash)
: QObject(feed)
, m_feed(feed)
, m_guid(guid)
, m_date(date)
, m_title(title)
, m_author(author)
, m_description(description)
, m_torrentURL(torrentUrl)
, m_link(link)
, m_isRead(isRead)
, m_guid(varHash.value(KeyId).toString())
, m_date(varHash.value(KeyDate).toDateTime())
, m_title(varHash.value(KeyTitle).toString())
, m_author(varHash.value(KeyAuthor).toString())
, m_description(varHash.value(KeyDescription).toString())
, m_torrentURL(varHash.value(KeyTorrentURL).toString())
, m_link(varHash.value(KeyLink).toString())
, m_isRead(varHash.value(KeyIsRead, false).toBool())
, m_data(varHash)
{
// If item does not have a guid, fall back to some other identifier
if (m_guid.isEmpty())
m_guid = varHash.value(KeyTorrentURL).toString();
if (m_guid.isEmpty())
m_guid = varHash.value(KeyTitle).toString();
if (m_guid.isEmpty())
throw std::runtime_error("Bad RSS Article data");
m_data[KeyId] = m_guid;
}
Article::Article(Feed *feed, const QJsonObject &jsonObj)
: Article(feed, jsonObj.toVariantHash())
{
// JSON object store DateTime as string so we need to convert it
m_date = QDateTime::fromString(jsonObj.value(KeyDate).toString(), Qt::RFC2822Date);
m_data[KeyDate] = m_date;
}
QString Article::guid() const
@ -104,26 +119,27 @@ bool Article::isRead() const @@ -104,26 +119,27 @@ bool Article::isRead() const
return m_isRead;
}
QVariantHash Article::data() const
{
return m_data;
}
void Article::markAsRead()
{
if (!m_isRead) {
m_isRead = true;
m_data[KeyIsRead] = m_isRead;
emit read(this);
}
}
QJsonObject Article::toJsonObject() const
{
return {
{Str_Id, m_guid},
{Str_Date, m_date.toString(Qt::RFC2822Date)},
{Str_Title, m_title},
{Str_Author, m_author},
{Str_Description, m_description},
{Str_TorrentURL, m_torrentURL},
{Str_Link, m_link},
{Str_IsRead, m_isRead}
};
auto jsonObj = QJsonObject::fromVariantHash(m_data);
// JSON object doesn't support DateTime so we need to convert it
jsonObj[KeyDate] = m_date.toString(Qt::RFC2822Date);
return jsonObj;
}
bool Article::articleDateRecentThan(Article *article, const QDateTime &date)
@ -131,47 +147,6 @@ bool Article::articleDateRecentThan(Article *article, const QDateTime &date) @@ -131,47 +147,6 @@ bool Article::articleDateRecentThan(Article *article, const QDateTime &date)
return article->date() > date;
}
Article *Article::fromJsonObject(Feed *feed, const QJsonObject &jsonObj)
{
QString guid = jsonObj.value(Str_Id).toString();
// If item does not have a guid, fall back to some other identifier
if (guid.isEmpty())
guid = jsonObj.value(Str_Torrent_Url).toString();
if (guid.isEmpty())
guid = jsonObj.value(Str_Title).toString();
if (guid.isEmpty()) return nullptr;
return new Article(
feed, guid
, QDateTime::fromString(jsonObj.value(Str_Date).toString(), Qt::RFC2822Date)
, jsonObj.value(Str_Title).toString()
, jsonObj.value(Str_Author).toString()
, jsonObj.value(Str_Description).toString()
, jsonObj.value(Str_TorrentURL).toString()
, jsonObj.value(Str_Link).toString()
, jsonObj.value(Str_IsRead).toBool(false));
}
Article *Article::fromVariantHash(Feed *feed, const QVariantHash &varHash)
{
QString guid = varHash[Str_Id].toString();
// If item does not have a guid, fall back to some other identifier
if (guid.isEmpty())
guid = varHash.value(Str_Torrent_Url).toString();
if (guid.isEmpty())
guid = varHash.value(Str_Title).toString();
if (guid.isEmpty()) return nullptr;
return new Article(feed, guid
, varHash.value(Str_Date).toDateTime()
, varHash.value(Str_Title).toString()
, varHash.value(Str_Author).toString()
, varHash.value(Str_Description).toString()
, varHash.value(Str_Torrent_Url).toString()
, varHash.value(Str_News_Link).toString()
, varHash.value(Str_Read, false).toBool());
}
Feed *Article::feed() const
{
return m_feed;

18
src/base/rss/rss_article.h

@ -33,6 +33,7 @@ @@ -33,6 +33,7 @@
#include <QDateTime>
#include <QObject>
#include <QString>
#include <QVariantHash>
namespace RSS
{
@ -45,12 +46,19 @@ namespace RSS @@ -45,12 +46,19 @@ namespace RSS
friend class Feed;
Article(Feed *feed, QString guid, QDateTime date, QString title, QString author
, QString description, QString torrentUrl, QString link, bool isRead = false);
static Article *fromJsonObject(Feed *feed, const QJsonObject &jsonObj);
static Article *fromVariantHash(Feed *feed, const QVariantHash &varHash);
Article(Feed *feed, const QVariantHash &varHash);
Article(Feed *feed, const QJsonObject &jsonObj);
public:
static const QString KeyId;
static const QString KeyDate;
static const QString KeyTitle;
static const QString KeyAuthor;
static const QString KeyDescription;
static const QString KeyTorrentURL;
static const QString KeyLink;
static const QString KeyIsRead;
Feed *feed() const;
QString guid() const;
QDateTime date() const;
@ -60,6 +68,7 @@ namespace RSS @@ -60,6 +68,7 @@ namespace RSS
QString torrentUrl() const;
QString link() const;
bool isRead() const;
QVariantHash data() const;
void markAsRead();
@ -80,5 +89,6 @@ namespace RSS @@ -80,5 +89,6 @@ namespace RSS
QString m_torrentURL;
QString m_link;
bool m_isRead = false;
QVariantHash m_data;
};
}

33
src/base/rss/rss_autodownloader.cpp

@ -36,6 +36,7 @@ @@ -36,6 +36,7 @@
#include <QSaveFile>
#include <QThread>
#include <QTimer>
#include <QVariant>
#include "../bittorrent/magneturi.h"
#include "../bittorrent/session.h"
@ -54,10 +55,7 @@ @@ -54,10 +55,7 @@
struct ProcessingJob
{
QString feedURL;
QString articleGUID;
QString articleTitle;
QDateTime articleDate;
QString torrentURL;
QVariantHash articleData;
};
const QString ConfFolderName(QStringLiteral("rss"));
@ -191,8 +189,8 @@ void AutoDownloader::handleTorrentDownloadFinished(const QString &url) @@ -191,8 +189,8 @@ void AutoDownloader::handleTorrentDownloadFinished(const QString &url)
auto job = m_waitingJobs.take(url);
if (!job) return;
if (auto feed = Session::instance()->feedByURL(job->feedURL))
if (auto article = feed->articleByGUID(job->articleGUID))
if (Feed *feed = Session::instance()->feedByURL(job->feedURL))
if (Article *article = feed->articleByGUID(job->articleData.value(Article::KeyId).toString()))
article->markAsRead();
}
@ -220,10 +218,7 @@ void AutoDownloader::addJobForArticle(Article *article) @@ -220,10 +218,7 @@ void AutoDownloader::addJobForArticle(Article *article)
QSharedPointer<ProcessingJob> job(new ProcessingJob);
job->feedURL = article->feed()->url();
job->articleGUID = article->guid();
job->articleTitle = article->title();
job->articleDate = article->date();
job->torrentURL = torrentURL;
job->articleData = article->data();
m_processingQueue.append(job);
if (!m_processingTimer->isActive())
m_processingTimer->start();
@ -234,17 +229,18 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job) @@ -234,17 +229,18 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job)
for (AutoDownloadRule &rule: m_rules) {
if (!rule.isEnabled()) continue;
if (!rule.feedURLs().contains(job->feedURL)) continue;
if (!rule.matches(job->articleTitle)) continue;
if (!rule.matches(job->articleData.value(Article::KeyTitle).toString())) continue;
auto articleDate = job->articleData.value(Article::KeyDate).toDateTime();
// if rule is in ignoring state do nothing with matched torrent
if (rule.ignoreDays() > 0) {
if (rule.lastMatch().isValid()) {
if (job->articleDate < rule.lastMatch().addDays(rule.ignoreDays()))
if (articleDate < rule.lastMatch().addDays(rule.ignoreDays()))
return;
}
}
rule.setLastMatch(job->articleDate);
rule.setLastMatch(articleDate);
m_dirty = true;
storeDeferred();
@ -252,18 +248,19 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job) @@ -252,18 +248,19 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job)
params.savePath = rule.savePath();
params.category = rule.assignedCategory();
params.addPaused = rule.addPaused();
BitTorrent::Session::instance()->addTorrent(job->torrentURL, params);
auto torrentURL = job->articleData.value(Article::KeyTorrentURL).toString();
BitTorrent::Session::instance()->addTorrent(torrentURL, params);
if (BitTorrent::MagnetUri(job->torrentURL).isValid()) {
if (auto feed = Session::instance()->feedByURL(job->feedURL)) {
if (auto article = feed->articleByGUID(job->articleGUID))
if (BitTorrent::MagnetUri(torrentURL).isValid()) {
if (Feed *feed = Session::instance()->feedByURL(job->feedURL)) {
if (Article *article = feed->articleByGUID(job->articleData.value(Article::KeyId).toString()))
article->markAsRead();
}
}
else {
// waiting for torrent file downloading
// normalize URL string via QUrl since DownloadManager do it
m_waitingJobs.insert(QUrl(job->torrentURL).toString(), job);
m_waitingJobs.insert(QUrl(torrentURL).toString(), job);
}
return;

24
src/base/rss/rss_feed.cpp

@ -207,13 +207,14 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result) @@ -207,13 +207,14 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result)
m_lastBuildDate = result.lastBuildDate;
foreach (const QVariantHash &varHash, result.articles) {
auto article = Article::fromVariantHash(this, varHash);
if (article) {
try {
auto article = new Article(this, varHash);
if (!addArticle(article))
delete article;
else
m_dirty = true;
}
catch (const std::runtime_error&) {}
}
store();
@ -272,10 +273,13 @@ void Feed::loadArticles(const QByteArray &data) @@ -272,10 +273,13 @@ void Feed::loadArticles(const QByteArray &data)
continue;
}
auto article = Article::fromJsonObject(this, jsonVal.toObject());
if (article && !addArticle(article))
try {
auto article = new Article(this, jsonVal.toObject());
if (!addArticle(article))
delete article;
}
catch (const std::runtime_error&) {}
}
}
void Feed::loadArticlesLegacy()
@ -284,10 +288,18 @@ void Feed::loadArticlesLegacy() @@ -284,10 +288,18 @@ void Feed::loadArticlesLegacy()
QVariantHash allOldItems = qBTRSSFeeds->value("old_items").toHash();
foreach (const QVariant &var, allOldItems.value(m_url).toList()) {
auto article = Article::fromVariantHash(this, var.toHash());
if (article && !addArticle(article))
auto hash = var.toHash();
// update legacy keys
hash[Article::KeyLink] = hash.take(QLatin1String("news_link"));
hash[Article::KeyTorrentURL] = hash.take(QLatin1String("torrent_url"));
hash[Article::KeyIsRead] = hash.take(QLatin1String("read"));
try {
auto article = new Article(this, hash);
if (!addArticle(article))
delete article;
}
catch (const std::runtime_error&) {}
}
}
void Feed::store()

Loading…
Cancel
Save