Merge pull request #9844 from glassez/fix-rss

Improve RSS Feed updating. Closes #9833
6 years ago · 4f565d9f09
4 changed files with 101 additions and 63 deletions
--- a/src/base/rss/private/rss_parser.cpp
+++ b/src/base/rss/private/rss_parser.cpp
@ -582,16 +582,6 @@ void Parser::parse_impl(const QByteArray &feedData)
				@@ -582,16 +582,6 @@ void Parser::parse_impl(const QByteArray &feedData)
                .arg(xml.errorString()).arg(xml.lineNumber())
                .arg(xml.columnNumber()).arg(xml.characterOffset());
    }
-    else {
-        // Sort article list chronologically
-        // NOTE: We don't need to sort it here if articles are always
-        // sorted in fetched XML in reverse chronological order
-        std::sort(m_result.articles.begin(), m_result.articles.end()
-                  , [](const QVariantHash &a1, const QVariantHash &a2)
-        {
-            return a1["date"].toDateTime() < a2["date"].toDateTime();
-        });
-    }

    emit finished(m_result);
    m_result.articles.clear(); // clear articles only
--- a/src/base/rss/rss_article.cpp
+++ b/src/base/rss/rss_article.cpp
@ -30,7 +30,6 @@
				@@ -30,7 +30,6 @@

 #include "rss_article.h"

-#include <stdexcept>
 #include <QJsonObject>
 #include <QVariant>

@ -73,23 +72,6 @@ Article::Article(Feed *feed, const QVariantHash &varHash)
				@@ -73,23 +72,6 @@ Article::Article(Feed *feed, const QVariantHash &varHash)
    , m_isRead(varHash.value(KeyIsRead, false).toBool())
    , m_data(varHash)
 {
-    if (!m_date.isValid())
-        throw std::runtime_error("Bad RSS Article data");
-
-    // If item does not have a guid, fall back to some other identifier
-    if (m_guid.isEmpty())
-        m_guid = varHash.value(KeyTorrentURL).toString();
-    if (m_guid.isEmpty())
-        m_guid = varHash.value(KeyTitle).toString();
-    if (m_guid.isEmpty())
-        throw std::runtime_error("Bad RSS Article data");
-
-    m_data[KeyId] = m_guid;
-
-    if (m_torrentURL.isEmpty()) {
-        m_torrentURL = m_link;
-        m_data[KeyTorrentURL] = m_torrentURL;
-    }
 }

 Article::Article(Feed *feed, const QJsonObject &jsonObj)
--- a/src/base/rss/rss_feed.cpp
+++ b/src/base/rss/rss_feed.cpp
@ -30,6 +30,9 @@
				@@ -30,6 +30,9 @@

 #include "rss_feed.h"

+#include <algorithm>
+#include <vector>
+
 #include <QDebug>
 #include <QDir>
 #include <QJsonArray>
@ -215,47 +218,30 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result)
				@@ -215,47 +218,30 @@ void Feed::handleParsingFinished(const RSS::Private::ParsingResult &result)
 {
    m_hasError = !result.error.isEmpty();

-    // For some reason, the RSS feed may contain malformed XML data and it may not be
-    // successfully parsed by the XML parser. We are still trying to load as many articles
-    // as possible until we encounter corrupted data. So we can have some articles here
-    // even in case of parsing error.
-    if (!m_hasError || !result.articles.isEmpty()) {
-        if (title() != result.title) {
+    if (!result.title.isEmpty() && (title() != result.title)) {
        m_title = result.title;
+        m_dirty = true;
        emit titleChanged(this);
    }

+    if (!result.lastBuildDate.isEmpty()) {
        m_lastBuildDate = result.lastBuildDate;
-
-        int newArticlesCount = 0;
-        const QDateTime now {QDateTime::currentDateTime()};
-        for (QVariantHash varHash : result.articles) {
-            // if article has no publication date we use feed update time as a fallback
-            QVariant &articleDate = varHash[Article::KeyDate];
-            if (!articleDate.toDateTime().isValid())
-                articleDate = now;
-
-            try {
-                auto article = new Article(this, varHash);
-                if (addArticle(article))
-                    ++newArticlesCount;
-                else
-                    delete article;
-            }
-            catch (const std::runtime_error&) {}
+        m_dirty = true;
    }

-        m_dirty = (newArticlesCount > 0);
+    // For some reason, the RSS feed may contain malformed XML data and it may not be
+    // successfully parsed by the XML parser. We are still trying to load as many articles
+    // as possible until we encounter corrupted data. So we can have some articles here
+    // even in case of parsing error.
+    const int newArticlesCount = updateArticles(result.articles);
    store();

-        LogMsg(tr("RSS feed at '%1' updated. Added %2 new articles.")
-               .arg(m_url, QString::number(newArticlesCount)));
-    }
-
    if (m_hasError) {
        LogMsg(tr("Failed to parse RSS feed at '%1'. Reason: %2").arg(m_url, result.error)
               , Log::WARNING);
    }
+    LogMsg(tr("RSS feed at '%1' updated. Added %2 new articles.")
+           .arg(url(), QString::number(newArticlesCount)));

    m_isLoading = false;
    emit stateChanged(this);
@ -358,9 +344,7 @@ void Feed::storeDeferred()
				@@ -358,9 +344,7 @@ void Feed::storeDeferred()
 bool Feed::addArticle(Article *article)
 {
    Q_ASSERT(article);
-
-    if (m_articles.contains(article->guid()))
-        return false;
+    Q_ASSERT(!m_articles.contains(article->guid()));

    // Insertion sort
    const int maxArticles = m_session->maxArticlesPerFeed();
@ -375,6 +359,8 @@ bool Feed::addArticle(Article *article)
				@@ -375,6 +359,8 @@ bool Feed::addArticle(Article *article)
        increaseUnreadCount();
        connect(article, &Article::read, this, &Feed::handleArticleRead);
    }
+
+    m_dirty = true;
    emit newArticle(article);

    if (m_articlesByDate.size() > maxArticles)
@ -424,6 +410,85 @@ void Feed::downloadIcon()
				@@ -424,6 +410,85 @@ void Feed::downloadIcon()
            , this, &Feed::handleIconDownloadFinished);
 }

+int Feed::updateArticles(const QList<QVariantHash> &loadedArticles)
+{
+    if (loadedArticles.empty())
+        return 0;
+
+    QDateTime dummyPubDate {QDateTime::currentDateTime()};
+    QVector<QVariantHash> newArticles;
+    newArticles.reserve(loadedArticles.size());
+    for (QVariantHash article : loadedArticles) {
+        QVariant &torrentURL = article[Article::KeyTorrentURL];
+        if (torrentURL.toString().isEmpty())
+            torrentURL = article[Article::KeyLink];
+
+        // If item does not have an ID, fall back to some other identifier.
+        QVariant &localId = article[Article::KeyId];
+        if (localId.toString().isEmpty())
+            localId = article.value(Article::KeyTorrentURL);
+        if (localId.toString().isEmpty())
+            localId = article.value(Article::KeyTitle);
+
+        if (localId.toString().isEmpty())
+            continue;
+
+        // If article has no publication date we use feed update time as a fallback.
+        // To prevent processing of "out-of-limit" articles we must not assign dates
+        // that are earlier than the dates of existing articles.
+        const Article *existingArticle = articleByGUID(localId.toString());
+        if (existingArticle) {
+            dummyPubDate = existingArticle->date().addMSecs(-1);
+            continue;
+        }
+
+        QVariant &articleDate = article[Article::KeyDate];
+        if (!articleDate.toDateTime().isValid())
+            articleDate = dummyPubDate;
+
+        newArticles.append(article);
+    }
+
+    if (newArticles.empty())
+        return 0;
+
+    using ArticleSortAdaptor = QPair<QDateTime, const QVariantHash *>;
+    std::vector<ArticleSortAdaptor> sortData;
+    const QList<Article *> existingArticles = articles();
+    sortData.reserve(existingArticles.size() + newArticles.size());
+    std::transform(existingArticles.begin(), existingArticles.end(), std::back_inserter(sortData)
+                   , [](const Article *article)
+    {
+        return qMakePair(article->date(), nullptr);
+    });
+    std::transform(newArticles.begin(), newArticles.end(), std::back_inserter(sortData)
+                   , [](const QVariantHash &article)
+    {
+        return qMakePair(article[Article::KeyDate].toDateTime(), &article);
+    });
+
+    // Sort article list in reverse chronological order
+    std::sort(sortData.begin(), sortData.end()
+              , [](const ArticleSortAdaptor &a1, const ArticleSortAdaptor &a2)
+    {
+        return (a1.first > a2.first);
+    });
+
+    if (sortData.size() > m_session->maxArticlesPerFeed())
+        sortData.resize(m_session->maxArticlesPerFeed());
+
+    int newArticlesCount = 0;
+    std::for_each(sortData.crbegin(), sortData.crend(), [this, &newArticlesCount](const ArticleSortAdaptor &a)
+    {
+        if (a.second) {
+            addArticle(new Article {this, *a.second});
+            ++newArticlesCount;
+        }
+    });
+
+    return newArticlesCount;
+}
+
 QString Feed::iconPath() const
 {
    return m_iconPath;
--- a/src/base/rss/rss_feed.h
+++ b/src/base/rss/rss_feed.h
@ -104,6 +104,7 @@ namespace RSS
				@@ -104,6 +104,7 @@ namespace RSS
        void increaseUnreadCount();
        void decreaseUnreadCount();
        void downloadIcon();
+        int updateArticles(const QList<QVariantHash> &loadedArticles);

        Session *m_session;
        Private::Parser *m_parser;