Browse Source

Improve RSS Feed updating

Don't process "out-of-limit" articles.
Closes #9833.
adaptive-webui-19844
Vladimir Golovnev (Glassez) 6 years ago
parent
commit
64d7cf4794
No known key found for this signature in database
GPG Key ID: 52A2C7DEE2DFA6F7
  1. 10
      src/base/rss/private/rss_parser.cpp
  2. 18
      src/base/rss/rss_article.cpp
  3. 127
      src/base/rss/rss_feed.cpp
  4. 1
      src/base/rss/rss_feed.h

10
src/base/rss/private/rss_parser.cpp

@ -582,16 +582,6 @@ void Parser::parse_impl(const QByteArray &feedData) @@ -582,16 +582,6 @@ void Parser::parse_impl(const QByteArray &feedData)
.arg(xml.errorString()).arg(xml.lineNumber())
.arg(xml.columnNumber()).arg(xml.characterOffset());
}
else {
// Sort article list chronologically
// NOTE: We don't need to sort it here if articles are always
// sorted in fetched XML in reverse chronological order
std::sort(m_result.articles.begin(), m_result.articles.end()
, [](const QVariantHash &a1, const QVariantHash &a2)
{
return a1["date"].toDateTime() < a2["date"].toDateTime();
});
}
emit finished(m_result);
m_result.articles.clear(); // clear articles only

18
src/base/rss/rss_article.cpp

@ -30,7 +30,6 @@ @@ -30,7 +30,6 @@
#include "rss_article.h"
#include <stdexcept>
#include <QJsonObject>
#include <QVariant>
@ -73,23 +72,6 @@ Article::Article(Feed *feed, const QVariantHash &varHash) @@ -73,23 +72,6 @@ Article::Article(Feed *feed, const QVariantHash &varHash)
, m_isRead(varHash.value(KeyIsRead, false).toBool())
, m_data(varHash)
{
if (!m_date.isValid())
throw std::runtime_error("Bad RSS Article data");
// If item does not have a guid, fall back to some other identifier
if (m_guid.isEmpty())
m_guid = varHash.value(KeyTorrentURL).toString();
if (m_guid.isEmpty())
m_guid = varHash.value(KeyTitle).toString();
if (m_guid.isEmpty())
throw std::runtime_error("Bad RSS Article data");
m_data[KeyId] = m_guid;
if (m_torrentURL.isEmpty()) {
m_torrentURL = m_link;
m_data[KeyTorrentURL] = m_torrentURL;
}
}
Article::Article(Feed *feed, const QJsonObject &jsonObj)

127
src/base/rss/rss_feed.cpp

@ -30,6 +30,9 @@ @@ -30,6 +30,9 @@
#include "rss_feed.h"
#include <algorithm>
#include <vector>
#include <QDebug>
#include <QDir>
#include <QJsonArray>
@ -215,47 +218,30 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result) @@ -215,47 +218,30 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result)
{
m_hasError = !result.error.isEmpty();
// For some reason, the RSS feed may contain malformed XML data and it may not be
// successfully parsed by the XML parser. We are still trying to load as many articles
// as possible until we encounter corrupted data. So we can have some articles here
// even in case of parsing error.
if (!m_hasError || !result.articles.isEmpty()) {
if (title() != result.title) {
if (!result.title.isEmpty() && (title() != result.title)) {
m_title = result.title;
m_dirty = true;
emit titleChanged(this);
}
if (!result.lastBuildDate.isEmpty()) {
m_lastBuildDate = result.lastBuildDate;
int newArticlesCount = 0;
const QDateTime now {QDateTime::currentDateTime()};
for (QVariantHash varHash : result.articles) {
// if article has no publication date we use feed update time as a fallback
QVariant &articleDate = varHash[Article::KeyDate];
if (!articleDate.toDateTime().isValid())
articleDate = now;
try {
auto article = new Article(this, varHash);
if (addArticle(article))
++newArticlesCount;
else
delete article;
}
catch (const std::runtime_error&) {}
m_dirty = true;
}
m_dirty = (newArticlesCount > 0);
// For some reason, the RSS feed may contain malformed XML data and it may not be
// successfully parsed by the XML parser. We are still trying to load as many articles
// as possible until we encounter corrupted data. So we can have some articles here
// even in case of parsing error.
const int newArticlesCount = updateArticles(result.articles);
store();
LogMsg(tr("RSS feed at '%1' updated. Added %2 new articles.")
.arg(m_url, QString::number(newArticlesCount)));
}
if (m_hasError) {
LogMsg(tr("Failed to parse RSS feed at '%1'. Reason: %2").arg(m_url, result.error)
, Log::WARNING);
}
LogMsg(tr("RSS feed at '%1' updated. Added %2 new articles.")
.arg(url(), QString::number(newArticlesCount)));
m_isLoading = false;
emit stateChanged(this);
@ -358,9 +344,7 @@ void Feed::storeDeferred() @@ -358,9 +344,7 @@ void Feed::storeDeferred()
bool Feed::addArticle(Article *article)
{
Q_ASSERT(article);
if (m_articles.contains(article->guid()))
return false;
Q_ASSERT(!m_articles.contains(article->guid()));
// Insertion sort
const int maxArticles = m_session->maxArticlesPerFeed();
@ -375,6 +359,8 @@ bool Feed::addArticle(Article *article) @@ -375,6 +359,8 @@ bool Feed::addArticle(Article *article)
increaseUnreadCount();
connect(article, &Article::read, this, &Feed::handleArticleRead);
}
m_dirty = true;
emit newArticle(article);
if (m_articlesByDate.size() > maxArticles)
@ -424,6 +410,85 @@ void Feed::downloadIcon() @@ -424,6 +410,85 @@ void Feed::downloadIcon()
, this, &Feed::handleIconDownloadFinished);
}
int Feed::updateArticles(const QList<QVariantHash> &loadedArticles)
{
if (loadedArticles.empty())
return 0;
QDateTime dummyPubDate {QDateTime::currentDateTime()};
QVector<QVariantHash> newArticles;
newArticles.reserve(loadedArticles.size());
for (QVariantHash article : loadedArticles) {
QVariant &torrentURL = article[Article::KeyTorrentURL];
if (torrentURL.toString().isEmpty())
torrentURL = article[Article::KeyLink];
// If item does not have an ID, fall back to some other identifier.
QVariant &localId = article[Article::KeyId];
if (localId.toString().isEmpty())
localId = article.value(Article::KeyTorrentURL);
if (localId.toString().isEmpty())
localId = article.value(Article::KeyTitle);
if (localId.toString().isEmpty())
continue;
// If article has no publication date we use feed update time as a fallback.
// To prevent processing of "out-of-limit" articles we must not assign dates
// that are earlier than the dates of existing articles.
const Article *existingArticle = articleByGUID(localId.toString());
if (existingArticle) {
dummyPubDate = existingArticle->date().addMSecs(-1);
continue;
}
QVariant &articleDate = article[Article::KeyDate];
if (!articleDate.toDateTime().isValid())
articleDate = dummyPubDate;
newArticles.append(article);
}
if (newArticles.empty())
return 0;
using ArticleSortAdaptor = QPair<QDateTime, const QVariantHash *>;
std::vector<ArticleSortAdaptor> sortData;
const QList<Article *> existingArticles = articles();
sortData.reserve(existingArticles.size() + newArticles.size());
std::transform(existingArticles.begin(), existingArticles.end(), std::back_inserter(sortData)
, [](const Article *article)
{
return qMakePair(article->date(), nullptr);
});
std::transform(newArticles.begin(), newArticles.end(), std::back_inserter(sortData)
, [](const QVariantHash &article)
{
return qMakePair(article[Article::KeyDate].toDateTime(), &article);
});
// Sort article list in reverse chronological order
std::sort(sortData.begin(), sortData.end()
, [](const ArticleSortAdaptor &a1, const ArticleSortAdaptor &a2)
{
return (a1.first > a2.first);
});
if (sortData.size() > m_session->maxArticlesPerFeed())
sortData.resize(m_session->maxArticlesPerFeed());
int newArticlesCount = 0;
std::for_each(sortData.crbegin(), sortData.crend(), [this, &newArticlesCount](const ArticleSortAdaptor &a)
{
if (a.second) {
addArticle(new Article {this, *a.second});
++newArticlesCount;
}
});
return newArticlesCount;
}
QString Feed::iconPath() const
{
return m_iconPath;

1
src/base/rss/rss_feed.h

@ -104,6 +104,7 @@ namespace RSS @@ -104,6 +104,7 @@ namespace RSS
void increaseUnreadCount();
void decreaseUnreadCount();
void downloadIcon();
int updateArticles(const QList<QVariantHash> &loadedArticles);
Session *m_session;
Private::Parser *m_parser;

Loading…
Cancel
Save