Browse Source

Merge pull request #10742 from glassez/fix-rss

Ignore RSS articles with non-unique identifiers
adaptive-webui-19844
Vladimir Golovnev 5 years ago committed by GitHub
parent
commit
ecb4a76db7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 36
      src/base/rss/private/rss_parser.cpp
  2. 3
      src/base/rss/private/rss_parser.h
  3. 16
      src/base/rss/rss_feed.cpp

36
src/base/rss/private/rss_parser.cpp

@ -583,6 +583,7 @@ void Parser::parse_impl(const QByteArray &feedData)
emit finished(m_result); emit finished(m_result);
m_result.articles.clear(); // clear articles only m_result.articles.clear(); // clear articles only
m_articleIDs.clear();
} }
void Parser::parseRssArticle(QXmlStreamReader &xml) void Parser::parseRssArticle(QXmlStreamReader &xml)
@ -635,7 +636,7 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
if (article[Article::KeyTorrentURL].toString().isEmpty()) if (article[Article::KeyTorrentURL].toString().isEmpty())
article[Article::KeyTorrentURL] = altTorrentUrl; article[Article::KeyTorrentURL] = altTorrentUrl;
m_result.articles.prepend(article); addArticle(article);
} }
void Parser::parseRSSChannel(QXmlStreamReader &xml) void Parser::parseRSSChannel(QXmlStreamReader &xml)
@ -730,7 +731,7 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
} }
} }
m_result.articles.prepend(article); addArticle(article);
} }
void Parser::parseAtomChannel(QXmlStreamReader &xml) void Parser::parseAtomChannel(QXmlStreamReader &xml)
@ -760,3 +761,34 @@ void Parser::parseAtomChannel(QXmlStreamReader &xml)
} }
} }
} }
void Parser::addArticle(QVariantHash article)
{
QVariant &torrentURL = article[Article::KeyTorrentURL];
if (torrentURL.toString().isEmpty())
torrentURL = article[Article::KeyLink];
// If item does not have an ID, fall back to some other identifier.
QVariant &localId = article[Article::KeyId];
if (localId.toString().isEmpty())
localId = article.value(Article::KeyTorrentURL);
if (localId.toString().isEmpty())
localId = article.value(Article::KeyTitle);
if (localId.toString().isEmpty()) {
// The article could not be uniquely identified
// since it has no appropriate data.
// Just ignore it.
return;
}
if (m_articleIDs.contains(localId.toString())) {
// The article could not be uniquely identified
// since the Feed has duplicate identifiers.
// Just ignore it.
return;
}
m_articleIDs.insert(localId.toString());
m_result.articles.prepend(article);
}

3
src/base/rss/private/rss_parser.h

@ -31,6 +31,7 @@
#include <QList> #include <QList>
#include <QObject> #include <QObject>
#include <QSet>
#include <QString> #include <QString>
#include <QVariantHash> #include <QVariantHash>
@ -65,9 +66,11 @@ namespace RSS
void parseRSSChannel(QXmlStreamReader &xml); void parseRSSChannel(QXmlStreamReader &xml);
void parseAtomArticle(QXmlStreamReader &xml); void parseAtomArticle(QXmlStreamReader &xml);
void parseAtomChannel(QXmlStreamReader &xml); void parseAtomChannel(QXmlStreamReader &xml);
void addArticle(QVariantHash article);
QString m_baseUrl; QString m_baseUrl;
ParsingResult m_result; ParsingResult m_result;
QSet<QString> m_articleIDs;
}; };
} }
} }

16
src/base/rss/rss_feed.cpp

@ -411,24 +411,10 @@ int Feed::updateArticles(const QList<QVariantHash> &loadedArticles)
QVector<QVariantHash> newArticles; QVector<QVariantHash> newArticles;
newArticles.reserve(loadedArticles.size()); newArticles.reserve(loadedArticles.size());
for (QVariantHash article : loadedArticles) { for (QVariantHash article : loadedArticles) {
QVariant &torrentURL = article[Article::KeyTorrentURL];
if (torrentURL.toString().isEmpty())
torrentURL = article[Article::KeyLink];
// If item does not have an ID, fall back to some other identifier.
QVariant &localId = article[Article::KeyId];
if (localId.toString().isEmpty())
localId = article.value(Article::KeyTorrentURL);
if (localId.toString().isEmpty())
localId = article.value(Article::KeyTitle);
if (localId.toString().isEmpty())
continue;
// If article has no publication date we use feed update time as a fallback. // If article has no publication date we use feed update time as a fallback.
// To prevent processing of "out-of-limit" articles we must not assign dates // To prevent processing of "out-of-limit" articles we must not assign dates
// that are earlier than the dates of existing articles. // that are earlier than the dates of existing articles.
const Article *existingArticle = articleByGUID(localId.toString()); const Article *existingArticle = articleByGUID(article[Article::KeyId].toString());
if (existingArticle) { if (existingArticle) {
dummyPubDate = existingArticle->date().addMSecs(-1); dummyPubDate = existingArticle->date().addMSecs(-1);
continue; continue;

Loading…
Cancel
Save