From d710bbd9eff2b75d6a0b994a7ee41a6c19065b90 Mon Sep 17 00:00:00 2001 From: "Vladimir Golovnev (Glassez)" Date: Sun, 2 Jun 2019 13:05:50 +0300 Subject: [PATCH 1/2] Perform more RSS parsing in working thread --- src/base/rss/private/rss_parser.cpp | 21 +++++++++++++++++++-- src/base/rss/private/rss_parser.h | 1 + src/base/rss/rss_feed.cpp | 16 +--------------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/base/rss/private/rss_parser.cpp b/src/base/rss/private/rss_parser.cpp index 54bfdc125..ecfb99812 100644 --- a/src/base/rss/private/rss_parser.cpp +++ b/src/base/rss/private/rss_parser.cpp @@ -635,7 +635,7 @@ void Parser::parseRssArticle(QXmlStreamReader &xml) if (article[Article::KeyTorrentURL].toString().isEmpty()) article[Article::KeyTorrentURL] = altTorrentUrl; - m_result.articles.prepend(article); + addArticle(article); } void Parser::parseRSSChannel(QXmlStreamReader &xml) @@ -730,7 +730,7 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml) } } - m_result.articles.prepend(article); + addArticle(article); } void Parser::parseAtomChannel(QXmlStreamReader &xml) @@ -760,3 +760,20 @@ void Parser::parseAtomChannel(QXmlStreamReader &xml) } } } + +void Parser::addArticle(QVariantHash article) +{ + QVariant &torrentURL = article[Article::KeyTorrentURL]; + if (torrentURL.toString().isEmpty()) + torrentURL = article[Article::KeyLink]; + + // If item does not have an ID, fall back to some other identifier. + QVariant &localId = article[Article::KeyId]; + if (localId.toString().isEmpty()) + localId = article.value(Article::KeyTorrentURL); + if (localId.toString().isEmpty()) + localId = article.value(Article::KeyTitle); + + if (!localId.toString().isEmpty()) + m_result.articles.prepend(article); +} diff --git a/src/base/rss/private/rss_parser.h b/src/base/rss/private/rss_parser.h index aeab57e3f..01795af8c 100644 --- a/src/base/rss/private/rss_parser.h +++ b/src/base/rss/private/rss_parser.h @@ -65,6 +65,7 @@ namespace RSS void parseRSSChannel(QXmlStreamReader &xml); void parseAtomArticle(QXmlStreamReader &xml); void parseAtomChannel(QXmlStreamReader &xml); + void addArticle(QVariantHash article); QString m_baseUrl; ParsingResult m_result; diff --git a/src/base/rss/rss_feed.cpp b/src/base/rss/rss_feed.cpp index 0680aa954..90bf70310 100644 --- a/src/base/rss/rss_feed.cpp +++ b/src/base/rss/rss_feed.cpp @@ -411,24 +411,10 @@ int Feed::updateArticles(const QList &loadedArticles) QVector newArticles; newArticles.reserve(loadedArticles.size()); for (QVariantHash article : loadedArticles) { - QVariant &torrentURL = article[Article::KeyTorrentURL]; - if (torrentURL.toString().isEmpty()) - torrentURL = article[Article::KeyLink]; - - // If item does not have an ID, fall back to some other identifier. - QVariant &localId = article[Article::KeyId]; - if (localId.toString().isEmpty()) - localId = article.value(Article::KeyTorrentURL); - if (localId.toString().isEmpty()) - localId = article.value(Article::KeyTitle); - - if (localId.toString().isEmpty()) - continue; - // If article has no publication date we use feed update time as a fallback. // To prevent processing of "out-of-limit" articles we must not assign dates // that are earlier than the dates of existing articles. - const Article *existingArticle = articleByGUID(localId.toString()); + const Article *existingArticle = articleByGUID(article[Article::KeyId].toString()); if (existingArticle) { dummyPubDate = existingArticle->date().addMSecs(-1); continue; From 68ee071331faa469534fd41662a569a18eb63a88 Mon Sep 17 00:00:00 2001 From: "Vladimir Golovnev (Glassez)" Date: Sun, 2 Jun 2019 13:30:00 +0300 Subject: [PATCH 2/2] Ignore RSS articles with non-unique identifiers --- src/base/rss/private/rss_parser.cpp | 19 +++++++++++++++++-- src/base/rss/private/rss_parser.h | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/base/rss/private/rss_parser.cpp b/src/base/rss/private/rss_parser.cpp index ecfb99812..d16b20f26 100644 --- a/src/base/rss/private/rss_parser.cpp +++ b/src/base/rss/private/rss_parser.cpp @@ -583,6 +583,7 @@ void Parser::parse_impl(const QByteArray &feedData) emit finished(m_result); m_result.articles.clear(); // clear articles only + m_articleIDs.clear(); } void Parser::parseRssArticle(QXmlStreamReader &xml) @@ -774,6 +775,20 @@ void Parser::addArticle(QVariantHash article) if (localId.toString().isEmpty()) localId = article.value(Article::KeyTitle); - if (!localId.toString().isEmpty()) - m_result.articles.prepend(article); + if (localId.toString().isEmpty()) { + // The article could not be uniquely identified + // since it has no appropriate data. + // Just ignore it. + return; + } + + if (m_articleIDs.contains(localId.toString())) { + // The article could not be uniquely identified + // since the Feed has duplicate identifiers. + // Just ignore it. + return; + } + + m_articleIDs.insert(localId.toString()); + m_result.articles.prepend(article); } diff --git a/src/base/rss/private/rss_parser.h b/src/base/rss/private/rss_parser.h index 01795af8c..3d0ed4e72 100644 --- a/src/base/rss/private/rss_parser.h +++ b/src/base/rss/private/rss_parser.h @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -69,6 +70,7 @@ namespace RSS QString m_baseUrl; ParsingResult m_result; + QSet m_articleIDs; }; } }