From 66620810442085e4282b772f8892190c9dd03d38 Mon Sep 17 00:00:00 2001 From: "Vladimir Golovnev (Glassez)" Date: Sat, 24 Oct 2015 11:13:35 +0300 Subject: [PATCH] Improve RSS parsing logic. --- src/base/rss/private/rssparser.cpp | 487 ++++++++++++----------------- src/base/rss/private/rssparser.h | 49 +-- src/base/rss/rssfeed.cpp | 38 ++- src/base/rss/rssfeed.h | 13 +- src/base/rss/rssmanager.cpp | 11 +- src/base/rss/rssmanager.h | 11 +- 6 files changed, 259 insertions(+), 350 deletions(-) diff --git a/src/base/rss/private/rssparser.cpp b/src/base/rss/private/rssparser.cpp index eecfe1af1..8dc1b0849 100644 --- a/src/base/rss/private/rssparser.cpp +++ b/src/base/rss/private/rssparser.cpp @@ -1,6 +1,7 @@ /* - * Bittorrent Client using Qt4 and libtorrent. - * Copyright (C) 2012 Christophe Dumez + * Bittorrent Client using Qt and libtorrent. + * Copyright (C) 2015 Vladimir Golovnev + * Copyright (C) 2012 Christophe Dumez * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -30,248 +31,226 @@ #include #include -#include #include #include #include #include -#include "base/utils/fs.h" #include "rssparser.h" -namespace Rss +namespace { - namespace Private + const char shortDay[][4] = { + "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat", + "Sun" + }; + + const char longDay[][10] = { + "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday", + "Sunday" + }; + + const char shortMonth[][4] = { + "Jan", "Feb", "Mar", "Apr", + "May", "Jun", "Jul", "Aug", + "Sep", "Oct", "Nov", "Dec" + }; + + // Ported to Qt from KDElibs4 + QDateTime parseDate(const QString &string) { - struct ParsingJob - { - QString feedUrl; - QByteArray feedData; - }; - } -} - -static const char shortDay[][4] = { - "Mon", "Tue", "Wed", - "Thu", "Fri", "Sat", - "Sun" -}; - -static const char longDay[][10] = { - "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday", - "Sunday" -}; - -static const char shortMonth[][4] = { - "Jan", "Feb", "Mar", "Apr", - "May", "Jun", "Jul", "Aug", - "Sep", "Oct", "Nov", "Dec" -}; - -using namespace Rss::Private; - -Parser::Parser(QObject *parent) - : QThread(parent) - , m_running(true) -{ - start(); -} - -Parser::~Parser() -{ - m_running = false; - m_waitCondition.wakeOne(); - wait(); -} - -// Ported to Qt from KDElibs4 -QDateTime Parser::parseDate(const QString &string) -{ - const QString str = string.trimmed(); - if (str.isEmpty()) - return QDateTime::currentDateTime(); - - int nyear = 6; // indexes within string to values - int nmonth = 4; - int nday = 2; - int nwday = 1; - int nhour = 7; - int nmin = 8; - int nsec = 9; - // Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm" - QRegExp rx("^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$"); - QStringList parts; - if (!str.indexOf(rx)) { - // Check that if date has '-' separators, both separators are '-'. - parts = rx.capturedTexts(); - bool h1 = (parts[3] == QLatin1String("-")); - bool h2 = (parts[5] == QLatin1String("-")); - if (h1 != h2) - return QDateTime::currentDateTime(); - } - else { - // Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY" - rx = QRegExp("^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$"); - if (str.indexOf(rx)) + const QString str = string.trimmed(); + if (str.isEmpty()) return QDateTime::currentDateTime(); - nyear = 7; - nmonth = 2; - nday = 3; - nwday = 1; - nhour = 4; - nmin = 5; - nsec = 6; - parts = rx.capturedTexts(); - } - bool ok[4]; - const int day = parts[nday].toInt(&ok[0]); - int year = parts[nyear].toInt(&ok[1]); - const int hour = parts[nhour].toInt(&ok[2]); - const int minute = parts[nmin].toInt(&ok[3]); - if (!ok[0] || !ok[1] || !ok[2] || !ok[3]) - return QDateTime::currentDateTime(); - - int second = 0; - if (!parts[nsec].isEmpty()) { - second = parts[nsec].toInt(&ok[0]); - if (!ok[0]) + int nyear = 6; // indexes within string to values + int nmonth = 4; + int nday = 2; + int nwday = 1; + int nhour = 7; + int nmin = 8; + int nsec = 9; + // Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm" + QRegExp rx("^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$"); + QStringList parts; + if (!str.indexOf(rx)) { + // Check that if date has '-' separators, both separators are '-'. + parts = rx.capturedTexts(); + bool h1 = (parts[3] == QLatin1String("-")); + bool h2 = (parts[5] == QLatin1String("-")); + if (h1 != h2) + return QDateTime::currentDateTime(); + } + else { + // Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY" + rx = QRegExp("^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$"); + if (str.indexOf(rx)) + return QDateTime::currentDateTime(); + nyear = 7; + nmonth = 2; + nday = 3; + nwday = 1; + nhour = 4; + nmin = 5; + nsec = 6; + parts = rx.capturedTexts(); + } + + bool ok[4]; + const int day = parts[nday].toInt(&ok[0]); + int year = parts[nyear].toInt(&ok[1]); + const int hour = parts[nhour].toInt(&ok[2]); + const int minute = parts[nmin].toInt(&ok[3]); + if (!ok[0] || !ok[1] || !ok[2] || !ok[3]) return QDateTime::currentDateTime(); - } - bool leapSecond = (second == 60); - if (leapSecond) - second = 59; // apparently a leap second - validate below, once time zone is known - int month = 0; - for ( ; (month < 12) && (parts[nmonth] != shortMonth[month]); ++month); - int dayOfWeek = -1; - if (!parts[nwday].isEmpty()) { - // Look up the weekday name - while (++dayOfWeek < 7 && (shortDay[dayOfWeek] != parts[nwday])); - if (dayOfWeek >= 7) - for (dayOfWeek = 0; dayOfWeek < 7 && (longDay[dayOfWeek] != parts[nwday]); ++dayOfWeek); - } + int second = 0; + if (!parts[nsec].isEmpty()) { + second = parts[nsec].toInt(&ok[0]); + if (!ok[0]) + return QDateTime::currentDateTime(); + } - // if (month >= 12 || dayOfWeek >= 7 - // || (dayOfWeek < 0 && format == RFCDateDay)) - // return QDateTime; - int i = parts[nyear].size(); - if (i < 4) { - // It's an obsolete year specification with less than 4 digits - year += (i == 2 && year < 50) ? 2000 : 1900; - } + bool leapSecond = (second == 60); + if (leapSecond) + second = 59; // apparently a leap second - validate below, once time zone is known + int month = 0; + for ( ; (month < 12) && (parts[nmonth] != shortMonth[month]); ++month); + int dayOfWeek = -1; + if (!parts[nwday].isEmpty()) { + // Look up the weekday name + while (++dayOfWeek < 7 && (shortDay[dayOfWeek] != parts[nwday])); + if (dayOfWeek >= 7) + for (dayOfWeek = 0; dayOfWeek < 7 && (longDay[dayOfWeek] != parts[nwday]); ++dayOfWeek); + } - // Parse the UTC offset part - int offset = 0; // set default to '-0000' - bool negOffset = false; - if (parts.count() > 10) { - rx = QRegExp("^([+-])(\\d\\d)(\\d\\d)$"); - if (!parts[10].indexOf(rx)) { - // It's a UTC offset ±hhmm - parts = rx.capturedTexts(); - offset = parts[2].toInt(&ok[0]) * 3600; - int offsetMin = parts[3].toInt(&ok[1]); - if (!ok[0] || !ok[1] || offsetMin > 59) - return QDateTime(); - offset += offsetMin * 60; - negOffset = (parts[1] == QLatin1String("-")); - if (negOffset) - offset = -offset; + // if (month >= 12 || dayOfWeek >= 7 + // || (dayOfWeek < 0 && format == RFCDateDay)) + // return QDateTime; + int i = parts[nyear].size(); + if (i < 4) { + // It's an obsolete year specification with less than 4 digits + year += (i == 2 && year < 50) ? 2000 : 1900; } - else { - // Check for an obsolete time zone name - QByteArray zone = parts[10].toLatin1(); - if (zone.length() == 1 && isalpha(zone[0]) && toupper(zone[0]) != 'J') { - negOffset = true; // military zone: RFC 2822 treats as '-0000' + + // Parse the UTC offset part + int offset = 0; // set default to '-0000' + bool negOffset = false; + if (parts.count() > 10) { + rx = QRegExp("^([+-])(\\d\\d)(\\d\\d)$"); + if (!parts[10].indexOf(rx)) { + // It's a UTC offset ±hhmm + parts = rx.capturedTexts(); + offset = parts[2].toInt(&ok[0]) * 3600; + int offsetMin = parts[3].toInt(&ok[1]); + if (!ok[0] || !ok[1] || offsetMin > 59) + return QDateTime(); + offset += offsetMin * 60; + negOffset = (parts[1] == QLatin1String("-")); + if (negOffset) + offset = -offset; } - else if (zone != "UT" && zone != "GMT") { // treated as '+0000' - offset = (zone == "EDT") - ? -4 * 3600 - : ((zone == "EST") || (zone == "CDT")) - ? -5 * 3600 - : ((zone == "CST") || (zone == "MDT")) - ? -6 * 3600 - : (zone == "MST" || zone == "PDT") - ? -7 * 3600 - : (zone == "PST") - ? -8 * 3600 - : 0; - if (!offset) { - // Check for any other alphabetic time zone - bool nonalpha = false; - for (int i = 0, end = zone.size(); (i < end) && !nonalpha; ++i) - nonalpha = !isalpha(zone[i]); - if (nonalpha) - return QDateTime(); - // TODO: Attempt to recognize the time zone abbreviation? - negOffset = true; // unknown time zone: RFC 2822 treats as '-0000' + else { + // Check for an obsolete time zone name + QByteArray zone = parts[10].toLatin1(); + if (zone.length() == 1 && isalpha(zone[0]) && toupper(zone[0]) != 'J') { + negOffset = true; // military zone: RFC 2822 treats as '-0000' + } + else if (zone != "UT" && zone != "GMT") { // treated as '+0000' + offset = (zone == "EDT") + ? -4 * 3600 + : ((zone == "EST") || (zone == "CDT")) + ? -5 * 3600 + : ((zone == "CST") || (zone == "MDT")) + ? -6 * 3600 + : (zone == "MST" || zone == "PDT") + ? -7 * 3600 + : (zone == "PST") + ? -8 * 3600 + : 0; + if (!offset) { + // Check for any other alphabetic time zone + bool nonalpha = false; + for (int i = 0, end = zone.size(); (i < end) && !nonalpha; ++i) + nonalpha = !isalpha(zone[i]); + if (nonalpha) + return QDateTime(); + // TODO: Attempt to recognize the time zone abbreviation? + negOffset = true; // unknown time zone: RFC 2822 treats as '-0000' + } } } } - } - QDate qdate(year, month + 1, day); // convert date, and check for out-of-range - if (!qdate.isValid()) - return QDateTime::currentDateTime(); - - QTime qTime(hour, minute, second); - QDateTime result(qdate, qTime, Qt::UTC); - if (offset) - result = result.addSecs(-offset); - if (!result.isValid()) - return QDateTime::currentDateTime(); // invalid date/time - - if (leapSecond) { - // Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC. - // Convert the time to UTC and check that it is 00:00:00. - if ((hour*3600 + minute*60 + 60 - offset + 86400*5) % 86400) // (max abs(offset) is 100 hours) - return QDateTime::currentDateTime(); // the time isn't the last second of the day - } + QDate qdate(year, month + 1, day); // convert date, and check for out-of-range + if (!qdate.isValid()) + return QDateTime::currentDateTime(); - return result; -} + QTime qTime(hour, minute, second); + QDateTime result(qdate, qTime, Qt::UTC); + if (offset) + result = result.addSecs(-offset); + if (!result.isValid()) + return QDateTime::currentDateTime(); // invalid date/time + + if (leapSecond) { + // Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC. + // Convert the time to UTC and check that it is 00:00:00. + if ((hour*3600 + minute*60 + 60 - offset + 86400*5) % 86400) // (max abs(offset) is 100 hours) + return QDateTime::currentDateTime(); // the time isn't the last second of the day + } -void Parser::parseFeedData(const QString &feedUrl, const QByteArray &feedData) -{ - qDebug() << Q_FUNC_INFO << feedUrl; - m_mutex.lock(); - ParsingJob job = { feedUrl, feedData }; - m_queue.enqueue(job); - // Wake up thread. - if (m_queue.count() == 1) { - qDebug() << Q_FUNC_INFO << "Waking up thread"; - m_waitCondition.wakeOne(); + return result; } - m_mutex.unlock(); } -void Parser::clearFeedData(const QString &feedUrl) -{ - m_mutex.lock(); - m_lastBuildDates.remove(feedUrl); - m_mutex.unlock(); -} +using namespace Rss::Private; -void Parser::run() +// read and create items from a rss document +void Parser::parse(const QByteArray &feedData) { - while (m_running) { - m_mutex.lock(); - if (!m_queue.empty()) { - ParsingJob job = m_queue.dequeue(); - m_mutex.unlock(); - parseFeed(job); + qDebug() << Q_FUNC_INFO; + + QXmlStreamReader xml(feedData); + bool foundChannel = false; + while (xml.readNextStartElement()) { + if (xml.name() == "rss") { + // Find channels + while (xml.readNextStartElement()) { + if (xml.name() == "channel") { + parseRSSChannel(xml); + foundChannel = true; + break; + } + else { + qDebug() << "Skip rss item: " << xml.name(); + xml.skipCurrentElement(); + } + } + break; + } + else if (xml.name() == "feed") { // Atom feed + parseAtomChannel(xml); + foundChannel = true; + break; } else { - qDebug() << Q_FUNC_INFO << "Thread is waiting."; - m_waitCondition.wait(&m_mutex); - qDebug() << Q_FUNC_INFO << "Thread woke up."; - m_mutex.unlock(); + qDebug() << "Skip root item: " << xml.name(); + xml.skipCurrentElement(); } } + + if (xml.hasError()) + emit finished(xml.errorString()); + else if (!foundChannel) + emit finished(tr("Invalid RSS feed.")); + else + emit finished(QString()); } -void Parser::parseRssArticle(QXmlStreamReader &xml, const QString &feedUrl) +void Parser::parseRssArticle(QXmlStreamReader &xml) { QVariantHash article; @@ -332,12 +311,12 @@ void Parser::parseRssArticle(QXmlStreamReader &xml, const QString &feedUrl) } } - emit newArticle(feedUrl, article); + emit newArticle(article); } -void Parser::parseRSSChannel(QXmlStreamReader &xml, const QString &feedUrl) +void Parser::parseRSSChannel(QXmlStreamReader &xml) { - qDebug() << Q_FUNC_INFO << feedUrl; + qDebug() << Q_FUNC_INFO; Q_ASSERT(xml.isStartElement() && xml.name() == "channel"); while(!xml.atEnd()) { @@ -346,27 +325,26 @@ void Parser::parseRSSChannel(QXmlStreamReader &xml, const QString &feedUrl) if (xml.isStartElement()) { if (xml.name() == "title") { QString title = xml.readElementText(); - emit feedTitle(feedUrl, title); + emit feedTitle(title); } else if (xml.name() == "lastBuildDate") { QString lastBuildDate = xml.readElementText(); if (!lastBuildDate.isEmpty()) { - QMutexLocker locker(&m_mutex); - if (m_lastBuildDates.value(feedUrl, "") == lastBuildDate) { + if (m_lastBuildDate == lastBuildDate) { qDebug() << "The RSS feed has not changed since last time, aborting parsing."; return; } - m_lastBuildDates[feedUrl] = lastBuildDate; + m_lastBuildDate = lastBuildDate; } } else if (xml.name() == "item") { - parseRssArticle(xml, feedUrl); + parseRssArticle(xml); } } } } -void Parser::parseAtomArticle(QXmlStreamReader &xml, const QString &feedUrl, const QString &baseUrl) +void Parser::parseAtomArticle(QXmlStreamReader &xml) { QVariantHash article; bool doubleContent = false; @@ -392,7 +370,7 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml, const QString &feedUrl, con // Atom feeds can have relative links, work around this and // take the stress of figuring article full URI from UI // Assemble full URI - article["news_link"] = ( baseUrl.isEmpty() ? link : baseUrl + link ); + article["news_link"] = ( m_baseUrl.isEmpty() ? link : m_baseUrl + link ); } else if ((xml.name() == "summary") || (xml.name() == "content")){ @@ -453,15 +431,15 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml, const QString &feedUrl, con } } - emit newArticle(feedUrl, article); + emit newArticle(article); } -void Parser::parseAtomChannel(QXmlStreamReader &xml, const QString &feedUrl) +void Parser::parseAtomChannel(QXmlStreamReader &xml) { - qDebug() << Q_FUNC_INFO << feedUrl; + qDebug() << Q_FUNC_INFO; Q_ASSERT(xml.isStartElement() && xml.name() == "feed"); - QString baseURL = xml.attributes().value("xml:base").toString(); + m_baseUrl = xml.attributes().value("xml:base").toString(); while (!xml.atEnd()) { xml.readNext(); @@ -469,74 +447,21 @@ void Parser::parseAtomChannel(QXmlStreamReader &xml, const QString &feedUrl) if (xml.isStartElement()) { if (xml.name() == "title") { QString title = xml.readElementText(); - emit feedTitle(feedUrl, title); + emit feedTitle(title); } else if (xml.name() == "updated") { QString lastBuildDate = xml.readElementText(); if (!lastBuildDate.isEmpty()) { - QMutexLocker locker(&m_mutex); - if (m_lastBuildDates.value(feedUrl) == lastBuildDate) { + if (m_lastBuildDate == lastBuildDate) { qDebug() << "The RSS feed has not changed since last time, aborting parsing."; return; } - m_lastBuildDates[feedUrl] = lastBuildDate; + m_lastBuildDate = lastBuildDate; } } else if (xml.name() == "entry") { - parseAtomArticle(xml, feedUrl, baseURL); + parseAtomArticle(xml); } } } } - -// read and create items from a rss document -void Parser::parseFeed(const ParsingJob &job) -{ - qDebug() << Q_FUNC_INFO << job.feedUrl; - - QXmlStreamReader xml(job.feedData); - bool foundChannel = false; - while (xml.readNextStartElement()) { - if (xml.name() == "rss") { - // Find channels - while (xml.readNextStartElement()) { - if (xml.name() == "channel") { - parseRSSChannel(xml, job.feedUrl); - foundChannel = true; - break; - } - else { - qDebug() << "Skip rss item: " << xml.name(); - xml.skipCurrentElement(); - } - } - break; - } - else if (xml.name() == "feed") { // Atom feed - parseAtomChannel(xml, job.feedUrl); - foundChannel = true; - break; - } - else { - qDebug() << "Skip root item: " << xml.name(); - xml.skipCurrentElement(); - } - } - - if (xml.hasError()) { - reportFailure(job, xml.errorString()); - return; - } - - if (!foundChannel) { - reportFailure(job, tr("Invalid RSS feed at '%1'.").arg(job.feedUrl)); - return; - } - - emit feedParsingFinished(job.feedUrl, QString()); -} - -void Parser::reportFailure(const ParsingJob &job, const QString &error) -{ - emit feedParsingFinished(job.feedUrl, error); -} diff --git a/src/base/rss/private/rssparser.h b/src/base/rss/private/rssparser.h index 89893feaa..afbf1df03 100644 --- a/src/base/rss/private/rssparser.h +++ b/src/base/rss/private/rssparser.h @@ -1,6 +1,7 @@ /* * Bittorrent Client using Qt and libtorrent. - * Copyright (C) 2012 Christophe Dumez + * Copyright (C) 2015 Vladimir Golovnev + * Copyright (C) 2012 Christophe Dumez * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -31,12 +32,9 @@ #ifndef RSSPARSER_H #define RSSPARSER_H -#include -#include -#include -#include +#include +#include #include -#include class QXmlStreamReader; @@ -44,41 +42,26 @@ namespace Rss { namespace Private { - struct ParsingJob; - - class Parser: public QThread + class Parser: public QObject { Q_OBJECT - public: - explicit Parser(QObject *parent = 0); - ~Parser(); - - void parseFeedData(const QString &feedUrl, const QByteArray &feedData); - void clearFeedData(const QString &feedUrl); + public slots: + void parse(const QByteArray &feedData); signals: - void newArticle(const QString &feedUrl, const QVariantHash &rssArticle); - void feedTitle(const QString &feedUrl, const QString &title); - void feedParsingFinished(const QString &feedUrl, const QString &error); + void newArticle(const QVariantHash &rssArticle); + void feedTitle(const QString &title); + void finished(const QString &error); private: - void run() override; - - static QDateTime parseDate(const QString &string); - - void parseRssArticle(QXmlStreamReader &xml, const QString &feedUrl); - void parseRSSChannel(QXmlStreamReader &xml, const QString &feedUrl); - void parseAtomArticle(QXmlStreamReader &xml, const QString &feedUrl, const QString &baseUrl); - void parseAtomChannel(QXmlStreamReader &xml, const QString &feedUrl); - void parseFeed(const ParsingJob &job); - void reportFailure(const ParsingJob &job, const QString &error); + void parseRssArticle(QXmlStreamReader &xml); + void parseRSSChannel(QXmlStreamReader &xml); + void parseAtomArticle(QXmlStreamReader &xml); + void parseAtomChannel(QXmlStreamReader &xml); - bool m_running; - QMutex m_mutex; - QQueue m_queue; - QWaitCondition m_waitCondition; - QHash m_lastBuildDates; // Optimization + QString m_lastBuildDate; // Optimization + QString m_baseUrl; }; } } diff --git a/src/base/rss/rssfeed.cpp b/src/base/rss/rssfeed.cpp index 76acfaf4b..066964045 100644 --- a/src/base/rss/rssfeed.cpp +++ b/src/base/rss/rssfeed.cpp @@ -1,5 +1,6 @@ /* * Bittorrent Client using Qt and libtorrent. + * Copyright (C) 2015 Vladimir Golovnev * Copyright (C) 2010 Christophe Dumez * Copyright (C) 2010 Arnaud Demaiziere * @@ -67,11 +68,13 @@ Feed::Feed(const QString &url, Manager *manager) , m_loading(false) { qDebug() << Q_FUNC_INFO << m_url; + m_parser = new Private::Parser; + m_parser->moveToThread(m_manager->workingThread()); + connect(this, SIGNAL(destroyed()), m_parser, SLOT(deleteLater())); // Listen for new RSS downloads - Private::Parser *const parser = m_manager->rssParser(); - connect(parser, SIGNAL(feedTitle(QString,QString)), SLOT(handleFeedTitle(QString,QString))); - connect(parser, SIGNAL(newArticle(QString,QVariantHash)), SLOT(handleNewArticle(QString,QVariantHash))); - connect(parser, SIGNAL(feedParsingFinished(QString,QString)), SLOT(handleParsingFinished(QString,QString))); + connect(m_parser, SIGNAL(feedTitle(QString)), SLOT(handleFeedTitle(QString))); + connect(m_parser, SIGNAL(newArticle(QVariantHash)), SLOT(handleNewArticle(QVariantHash))); + connect(m_parser, SIGNAL(finished(QString)), SLOT(handleParsingFinished(QString))); // Download the RSS Feed icon Net::DownloadHandler *handler = Net::DownloadManager::instance()->downloadUrl(iconUrl(), true); @@ -87,7 +90,6 @@ Feed::~Feed() { if (!m_icon.startsWith(":/") && QFile::exists(m_icon)) Utils::Fs::forceRemove(m_icon); - m_manager->rssParser()->clearFeedData(m_url); } void Feed::saveItemsToDisk() @@ -320,7 +322,6 @@ QString Feed::iconUrl() const void Feed::handleIconDownloadFinished(const QString &url, const QString &filePath) { Q_UNUSED(url); - m_icon = filePath; qDebug() << Q_FUNC_INFO << "icon path:" << m_icon; m_manager->forwardFeedIconChanged(m_url, m_icon); @@ -328,30 +329,31 @@ void Feed::handleIconDownloadFinished(const QString &url, const QString &filePat void Feed::handleRssDownloadFinished(const QString &url, const QByteArray &data) { - qDebug() << Q_FUNC_INFO << "Successfully downloaded RSS feed at" << url; + Q_UNUSED(url); + qDebug() << Q_FUNC_INFO << "Successfully downloaded RSS feed at" << m_url; // Parse the download RSS - m_manager->rssParser()->parseFeedData(m_url, data); + QMetaObject::invokeMethod(m_parser, "parse", Qt::QueuedConnection, Q_ARG(QByteArray, data)); } void Feed::handleRssDownloadFailed(const QString &url, const QString &error) { + Q_UNUSED(url); m_inErrorState = true; m_loading = false; m_manager->forwardFeedInfosChanged(m_url, displayName(), m_unreadCount); - qWarning() << "Failed to download RSS feed at" << url; + qWarning() << "Failed to download RSS feed at" << m_url; qWarning() << "Reason:" << error; } -void Feed::handleFeedTitle(const QString &feedUrl, const QString &title) +void Feed::handleFeedTitle(const QString &title) { - if (feedUrl != m_url) return; if (m_title == title) return; m_title = title; // Notify that we now have something better than a URL to display if (m_alias.isEmpty()) - m_manager->forwardFeedInfosChanged(feedUrl, title, m_unreadCount); + m_manager->forwardFeedInfosChanged(m_url, title, m_unreadCount); } void Feed::downloadArticleTorrentIfMatching(const ArticlePtr &article) @@ -406,13 +408,11 @@ void Feed::recheckRssItemsForDownload() } } -void Feed::handleNewArticle(const QString &feedUrl, const QVariantHash &articleData) +void Feed::handleNewArticle(const QVariantHash &articleData) { - if (feedUrl != m_url) return; - ArticlePtr article = Article::fromHash(this, articleData); if (article.isNull()) { - qDebug() << "Article hash corrupted or guid is uncomputable; feed url: " << feedUrl; + qDebug() << "Article hash corrupted or guid is uncomputable; feed url: " << m_url; return; } Q_ASSERT(article); @@ -424,12 +424,10 @@ void Feed::handleNewArticle(const QString &feedUrl, const QVariantHash &articleD //m_manager->forwardFeedContentChanged(m_url); } -void Feed::handleParsingFinished(const QString &feedUrl, const QString &error) +void Feed::handleParsingFinished(const QString &error) { - if (feedUrl != m_url) return; - if (!error.isEmpty()) { - qWarning() << "Failed to parse RSS feed at" << feedUrl; + qWarning() << "Failed to parse RSS feed at" << m_url; qWarning() << "Reason:" << error; } diff --git a/src/base/rss/rssfeed.h b/src/base/rss/rssfeed.h index 5198234bd..b457a04b4 100644 --- a/src/base/rss/rssfeed.h +++ b/src/base/rss/rssfeed.h @@ -1,5 +1,6 @@ /* * Bittorrent Client using Qt and libtorrent. + * Copyright (C) 2015 Vladimir Golovnev * Copyright (C) 2010 Christophe Dumez * Copyright (C) 2010 Arnaud Demaiziere * @@ -51,6 +52,11 @@ namespace Rss typedef QSharedPointer FeedPtr; typedef QList FeedList; + namespace Private + { + class Parser; + } + bool articleDateRecentThan(const ArticlePtr &left, const ArticlePtr &right); class Feed: public QObject, public File @@ -86,9 +92,9 @@ namespace Rss void handleIconDownloadFinished(const QString &url, const QString &filePath); void handleRssDownloadFinished(const QString &url, const QByteArray &data); void handleRssDownloadFailed(const QString &url, const QString &error); - void handleFeedTitle(const QString &feedUrl, const QString &title); - void handleNewArticle(const QString &feedUrl, const QVariantHash &article); - void handleParsingFinished(const QString &feedUrl, const QString &error); + void handleFeedTitle(const QString &title); + void handleNewArticle(const QVariantHash &article); + void handleParsingFinished(const QString &error); void handleArticleRead(); private: @@ -99,6 +105,7 @@ namespace Rss private: Manager *m_manager; + Private::Parser *m_parser; ArticleHash m_articles; ArticleList m_articlesByDate; // Articles sorted by date (more recent first) QString m_title; diff --git a/src/base/rss/rssmanager.cpp b/src/base/rss/rssmanager.cpp index a7bd087d1..8fc936d6e 100644 --- a/src/base/rss/rssmanager.cpp +++ b/src/base/rss/rssmanager.cpp @@ -33,7 +33,6 @@ #include "base/logger.h" #include "base/preferences.h" -#include "private/rssparser.h" #include "rssfolder.h" #include "rssfeed.h" #include "rssarticle.h" @@ -48,9 +47,10 @@ using namespace Rss::Private; Manager::Manager(QObject *parent) : QObject(parent) , m_downloadRules(new DownloadRuleList) - , m_rssParser(new Parser(this)) , m_rootFolder(new Folder) + , m_workingThread(new QThread(this)) { + m_workingThread->start(); connect(&m_refreshTimer, SIGNAL(timeout()), SLOT(refresh())); m_refreshInterval = Preferences::instance()->getRSSRefreshInterval(); m_refreshTimer.start(m_refreshInterval * MSECS_PER_MIN); @@ -59,8 +59,9 @@ Manager::Manager(QObject *parent) Manager::~Manager() { qDebug("Deleting RSSManager..."); + m_workingThread->quit(); + m_workingThread->wait(); delete m_downloadRules; - delete m_rssParser; m_rootFolder->saveItemsToDisk(); saveStreamList(); m_rootFolder.clear(); @@ -178,9 +179,9 @@ FolderPtr Manager::rootFolder() const return m_rootFolder; } -Parser *Manager::rssParser() const +QThread *Manager::workingThread() const { - return m_rssParser; + return m_workingThread; } void Manager::refresh() diff --git a/src/base/rss/rssmanager.h b/src/base/rss/rssmanager.h index 2321ce5a2..3c7b81738 100644 --- a/src/base/rss/rssmanager.h +++ b/src/base/rss/rssmanager.h @@ -35,6 +35,7 @@ #include #include #include +#include namespace Rss { @@ -48,11 +49,6 @@ namespace Rss typedef QSharedPointer FolderPtr; typedef QSharedPointer FeedPtr; - namespace Private - { - class Parser; - } - typedef QSharedPointer ManagerPtr; class Manager: public QObject @@ -65,8 +61,7 @@ namespace Rss DownloadRuleList *downloadRules() const; FolderPtr rootFolder() const; - - Private::Parser *rssParser() const; + QThread *workingThread() const; public slots: void refresh(); @@ -87,8 +82,8 @@ namespace Rss QTimer m_refreshTimer; uint m_refreshInterval; DownloadRuleList *m_downloadRules; - Private::Parser *m_rssParser; FolderPtr m_rootFolder; + QThread *m_workingThread; }; }