mirror of
https://github.com/d47081/qBittorrent.git
synced 2025-02-10 05:44:25 +00:00
Support Atom feeds
This commit is contained in:
parent
bc605fe09e
commit
7c1e91f256
@ -35,6 +35,7 @@
|
|||||||
#include <QRegExp>
|
#include <QRegExp>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
#include <QVariant>
|
#include <QVariant>
|
||||||
|
#include <QTextDocument>
|
||||||
|
|
||||||
struct ParsingJob {
|
struct ParsingJob {
|
||||||
QString feedUrl;
|
QString feedUrl;
|
||||||
@ -236,7 +237,7 @@ void RssParser::run()
|
|||||||
if (!m_queue.empty()) {
|
if (!m_queue.empty()) {
|
||||||
ParsingJob job = m_queue.dequeue();
|
ParsingJob job = m_queue.dequeue();
|
||||||
m_mutex.unlock();
|
m_mutex.unlock();
|
||||||
parseRSS(job);
|
parseFeed(job);
|
||||||
} else {
|
} else {
|
||||||
qDebug() << Q_FUNC_INFO << "Thread is waiting.";
|
qDebug() << Q_FUNC_INFO << "Thread is waiting.";
|
||||||
m_waitCondition.wait(&m_mutex);
|
m_waitCondition.wait(&m_mutex);
|
||||||
@ -326,8 +327,129 @@ void RssParser::parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl, const QString& baseUrl)
|
||||||
|
{
|
||||||
|
QVariantHash article;
|
||||||
|
bool double_content = false;
|
||||||
|
|
||||||
|
while(!xml.atEnd()) {
|
||||||
|
xml.readNext();
|
||||||
|
|
||||||
|
if(xml.isEndElement() && xml.name() == "entry")
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (xml.isStartElement()) {
|
||||||
|
if (xml.name() == "title") {
|
||||||
|
// Workaround for CDATA (QString cannot parse html escapes on it's own)
|
||||||
|
QTextDocument doc;
|
||||||
|
doc.setHtml(xml.readElementText());
|
||||||
|
article["title"] = doc.toPlainText();
|
||||||
|
}
|
||||||
|
else if (xml.name() == "link") {
|
||||||
|
QString theLink = ( xml.attributes().isEmpty() ?
|
||||||
|
xml.readElementText() :
|
||||||
|
xml.attributes().value("href").toString() );
|
||||||
|
|
||||||
|
// Atom feeds can have relative links, work around this and
|
||||||
|
// take the stress of figuring article full URI from UI
|
||||||
|
|
||||||
|
// Assemble full URI
|
||||||
|
article["news_link"] = ( baseUrl.isEmpty() ?
|
||||||
|
theLink :
|
||||||
|
baseUrl + theLink );
|
||||||
|
}
|
||||||
|
else if (xml.name() == "summary" || xml.name() == "content"){
|
||||||
|
if(double_content) { // Duplicate content -> ignore
|
||||||
|
xml.readNext();
|
||||||
|
|
||||||
|
while(xml.name() != "summary" && xml.name() != "content")
|
||||||
|
xml.readNext();
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to also parse broken articles, which don't use html '&' escapes
|
||||||
|
// Actually works great for non-broken content too
|
||||||
|
QString feedText = xml.readElementText(QXmlStreamReader::IncludeChildElements);
|
||||||
|
if (!feedText.isEmpty())
|
||||||
|
article["description"] = feedText;
|
||||||
|
|
||||||
|
double_content = true;
|
||||||
|
}
|
||||||
|
else if (xml.name() == "updated"){
|
||||||
|
// ATOM uses standard compliant date, don't do fancy stuff
|
||||||
|
QDateTime articleDate = QDateTime::fromString(xml.readElementText(), Qt::ISODate);
|
||||||
|
article["date"] = ( articleDate.isValid() ?
|
||||||
|
articleDate :
|
||||||
|
QDateTime::currentDateTime() );
|
||||||
|
}
|
||||||
|
else if (xml.name() == "author") {
|
||||||
|
xml.readNext();
|
||||||
|
while(xml.name() != "author") {
|
||||||
|
if(xml.name() == "name")
|
||||||
|
article["author"] = xml.readElementText();
|
||||||
|
xml.readNext();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (xml.name() == "id")
|
||||||
|
article["id"] = xml.readElementText();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!article.contains("id")) {
|
||||||
|
// Item does not have a guid, fall back to some other identifier
|
||||||
|
const QString link = article.value("news_link").toString();
|
||||||
|
if (!link.isEmpty())
|
||||||
|
article["id"] = link;
|
||||||
|
else {
|
||||||
|
const QString title = article.value("title").toString();
|
||||||
|
if (!title.isEmpty())
|
||||||
|
article["id"] = title;
|
||||||
|
else {
|
||||||
|
qWarning() << "Item has no guid, link or title, ignoring it...";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emit newArticle(feedUrl, article);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RssParser::parseAtomChannel(QXmlStreamReader& xml, const QString& feedUrl)
|
||||||
|
{
|
||||||
|
qDebug() << Q_FUNC_INFO << feedUrl;
|
||||||
|
Q_ASSERT(xml.isStartElement() && xml.name() == "feed");
|
||||||
|
|
||||||
|
QString baseURL = xml.attributes().value("xml:base").toString();
|
||||||
|
|
||||||
|
while(!xml.atEnd()) {
|
||||||
|
xml.readNext();
|
||||||
|
|
||||||
|
if (xml.isStartElement()) {
|
||||||
|
if (xml.name() == "title") {
|
||||||
|
QString title = xml.readElementText();
|
||||||
|
emit feedTitle(feedUrl, title);
|
||||||
|
}
|
||||||
|
else if (xml.name() == "updated") {
|
||||||
|
QString lastBuildDate = xml.readElementText();
|
||||||
|
if (!lastBuildDate.isEmpty()) {
|
||||||
|
QMutexLocker locker(&m_mutex);
|
||||||
|
if (m_lastBuildDates.value(feedUrl) == lastBuildDate) {
|
||||||
|
qDebug() << "The RSS feed has not changed since last time, aborting parsing.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
m_lastBuildDates[feedUrl] = lastBuildDate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (xml.name() == "entry") {
|
||||||
|
parseAtomArticle(xml, feedUrl, baseURL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// read and create items from a rss document
|
// read and create items from a rss document
|
||||||
void RssParser::parseRSS(const ParsingJob& job)
|
void RssParser::parseFeed(const ParsingJob& job)
|
||||||
{
|
{
|
||||||
qDebug() << Q_FUNC_INFO << job.feedUrl << job.filePath;
|
qDebug() << Q_FUNC_INFO << job.feedUrl << job.filePath;
|
||||||
QFile fileRss(job.filePath);
|
QFile fileRss(job.filePath);
|
||||||
@ -352,6 +474,11 @@ void RssParser::parseRSS(const ParsingJob& job)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
else if (xml.name() == "feed") { // Atom feed
|
||||||
|
parseAtomChannel(xml, job.feedUrl);
|
||||||
|
found_channel = true;
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
qDebug() << "Skip root item: " << xml.name();
|
qDebug() << "Skip root item: " << xml.name();
|
||||||
xml.skipCurrentElement();
|
xml.skipCurrentElement();
|
||||||
|
@ -61,7 +61,9 @@ protected:
|
|||||||
static QDateTime parseDate(const QString& string);
|
static QDateTime parseDate(const QString& string);
|
||||||
void parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl);
|
void parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl);
|
||||||
void parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl);
|
void parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl);
|
||||||
void parseRSS(const ParsingJob& job);
|
void parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl, const QString& baseUrl);
|
||||||
|
void parseAtomChannel(QXmlStreamReader& xml, const QString& feedUrl);
|
||||||
|
void parseFeed(const ParsingJob& job);
|
||||||
void reportFailure(const ParsingJob& job, const QString& error);
|
void reportFailure(const ParsingJob& job, const QString& error);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user