diff --git a/src/rss/rss.pri b/src/rss/rss.pri index cd41ac849..c1015aae8 100644 --- a/src/rss/rss.pri +++ b/src/rss/rss.pri @@ -12,7 +12,8 @@ HEADERS += $$PWD/rss_imp.h \ $$PWD/rsssettings.h \ $$PWD/rssdownloadrule.h \ $$PWD/rssdownloadrulelist.h \ - $$PWD/cookiesdlg.h + $$PWD/cookiesdlg.h \ + $$PWD/rssparser.h SOURCES += $$PWD/rss_imp.cpp \ $$PWD/rsssettingsdlg.cpp \ @@ -25,7 +26,8 @@ SOURCES += $$PWD/rss_imp.cpp \ $$PWD/rssdownloadrule.cpp \ $$PWD/rssdownloadrulelist.cpp \ $$PWD/cookiesdlg.cpp \ - rss/rssfile.cpp + $$PWD/rssfile.cpp \ + $$PWD/rssparser.cpp FORMS += $$PWD/rss.ui \ $$PWD/rsssettingsdlg.ui \ diff --git a/src/rss/rss_imp.cpp b/src/rss/rss_imp.cpp index 5251c4712..a1c9d161c 100644 --- a/src/rss/rss_imp.cpp +++ b/src/rss/rss_imp.cpp @@ -402,17 +402,17 @@ void RSSImp::refreshSelectedItems() { // Update icons if (item == m_feedList->stickyUnreadItem()) { foreach (QTreeWidgetItem *feed, m_feedList->getAllFeedItems()) { - feed->setData(0,Qt::DecorationRole, QVariant(QIcon(":/Icons/loading.png"))); + feed->setData(0, Qt::DecorationRole, QVariant(QIcon(":/Icons/loading.png"))); } file->refresh(); break; } else { if (qSharedPointerDynamicCast(file)) { - item->setData(0,Qt::DecorationRole, QVariant(QIcon(":/Icons/loading.png"))); + item->setData(0, Qt::DecorationRole, QVariant(QIcon(":/Icons/loading.png"))); } else if (qSharedPointerDynamicCast(file)) { // Update feeds in the folder foreach (QTreeWidgetItem *feed, m_feedList->getAllFeedItems(item)) { - feed->setData(0,Qt::DecorationRole, QVariant(QIcon(":/Icons/loading.png"))); + feed->setData(0, Qt::DecorationRole, QVariant(QIcon(":/Icons/loading.png"))); } } } diff --git a/src/rss/rssarticle.cpp b/src/rss/rssarticle.cpp index 50933d8f2..14242e060 100644 --- a/src/rss/rssarticle.cpp +++ b/src/rss/rssarticle.cpp @@ -28,168 +28,12 @@ * Contact: chris@qbittorrent.org, arnaud@qbittorrent.org */ -#include #include -#include #include - #include #include "rssarticle.h" -static const char shortDay[][4] = { - "Mon", "Tue", "Wed", - "Thu", "Fri", "Sat", - "Sun" -}; -static const char longDay[][10] = { - "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday", - "Sunday" -}; -static const char shortMonth[][4] = { - "Jan", "Feb", "Mar", "Apr", - "May", "Jun", "Jul", "Aug", - "Sep", "Oct", "Nov", "Dec" -}; -static const char longMonth[][10] = { - "January", "February", "March", - "April", "May", "June", - "July", "August", "September", - "October", "November", "December" -}; - -// Ported to Qt4 from KDElibs4 -QDateTime RssArticle::parseDate(const QString &string) { - const QString str = string.trimmed(); - if (str.isEmpty()) - return QDateTime::currentDateTime(); - - int nyear = 6; // indexes within string to values - int nmonth = 4; - int nday = 2; - int nwday = 1; - int nhour = 7; - int nmin = 8; - int nsec = 9; - // Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm" - QRegExp rx("^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$"); - QStringList parts; - if (!str.indexOf(rx)) { - // Check that if date has '-' separators, both separators are '-'. - parts = rx.capturedTexts(); - bool h1 = (parts[3] == QLatin1String("-")); - bool h2 = (parts[5] == QLatin1String("-")); - if (h1 != h2) - return QDateTime::currentDateTime(); - } else { - // Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY" - rx = QRegExp("^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$"); - if (str.indexOf(rx)) - return QDateTime::currentDateTime(); - nyear = 7; - nmonth = 2; - nday = 3; - nwday = 1; - nhour = 4; - nmin = 5; - nsec = 6; - parts = rx.capturedTexts(); - } - bool ok[4]; - const int day = parts[nday].toInt(&ok[0]); - int year = parts[nyear].toInt(&ok[1]); - const int hour = parts[nhour].toInt(&ok[2]); - const int minute = parts[nmin].toInt(&ok[3]); - if (!ok[0] || !ok[1] || !ok[2] || !ok[3]) - return QDateTime::currentDateTime(); - int second = 0; - if (!parts[nsec].isEmpty()) { - second = parts[nsec].toInt(&ok[0]); - if (!ok[0]) - return QDateTime::currentDateTime(); - } - bool leapSecond = (second == 60); - if (leapSecond) - second = 59; // apparently a leap second - validate below, once time zone is known - int month = 0; - for ( ; month < 12 && parts[nmonth] != shortMonth[month]; ++month) ; - int dayOfWeek = -1; - if (!parts[nwday].isEmpty()) { - // Look up the weekday name - while (++dayOfWeek < 7 && shortDay[dayOfWeek] != parts[nwday]) ; - if (dayOfWeek >= 7) - for (dayOfWeek = 0; dayOfWeek < 7 && longDay[dayOfWeek] != parts[nwday]; ++dayOfWeek) ; - } - // if (month >= 12 || dayOfWeek >= 7 - // || (dayOfWeek < 0 && format == RFCDateDay)) - // return QDateTime; - int i = parts[nyear].size(); - if (i < 4) { - // It's an obsolete year specification with less than 4 digits - year += (i == 2 && year < 50) ? 2000: 1900; - } - - // Parse the UTC offset part - int offset = 0; // set default to '-0000' - bool negOffset = false; - if (parts.count() > 10) { - rx = QRegExp("^([+-])(\\d\\d)(\\d\\d)$"); - if (!parts[10].indexOf(rx)) { - // It's a UTC offset ±hhmm - parts = rx.capturedTexts(); - offset = parts[2].toInt(&ok[0]) * 3600; - int offsetMin = parts[3].toInt(&ok[1]); - if (!ok[0] || !ok[1] || offsetMin > 59) - return QDateTime(); - offset += offsetMin * 60; - negOffset = (parts[1] == QLatin1String("-")); - if (negOffset) - offset = -offset; - } else { - // Check for an obsolete time zone name - QByteArray zone = parts[10].toLatin1(); - if (zone.length() == 1 && isalpha(zone[0]) && toupper(zone[0]) != 'J') - negOffset = true; // military zone: RFC 2822 treats as '-0000' - else if (zone != "UT" && zone != "GMT") { // treated as '+0000' - offset = (zone == "EDT") ? -4*3600 - : (zone == "EST" || zone == "CDT") ? -5*3600 - : (zone == "CST" || zone == "MDT") ? -6*3600 - : (zone == "MST" || zone == "PDT") ? -7*3600 - : (zone == "PST") ? -8*3600 - : 0; - if (!offset) { - // Check for any other alphabetic time zone - bool nonalpha = false; - for (int i = 0, end = zone.size(); i < end && !nonalpha; ++i) - nonalpha = !isalpha(zone[i]); - if (nonalpha) - return QDateTime(); - // TODO: Attempt to recognize the time zone abbreviation? - negOffset = true; // unknown time zone: RFC 2822 treats as '-0000' - } - } - } - } - QDate qdate(year, month+1, day); // convert date, and check for out-of-range - if (!qdate.isValid()) - return QDateTime::currentDateTime(); - QDateTime result(qdate, QTime(hour, minute, second)); - if (!result.isValid() - || (dayOfWeek >= 0 && result.date().dayOfWeek() != dayOfWeek+1)) - return QDateTime::currentDateTime(); // invalid date/time, or weekday doesn't correspond with date - if (!offset) { - result.setTimeSpec(Qt::UTC); - } - if (leapSecond) { - // Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC. - // Convert the time to UTC and check that it is 00:00:00. - if ((hour*3600 + minute*60 + 60 - offset + 86400*5) % 86400) // (max abs(offset) is 100 hours) - return QDateTime::currentDateTime(); // the time isn't the last second of the day - } - return result; -} - // public constructor RssArticle::RssArticle(RssFeed* parent, const QString &guid): m_parent(parent), m_guid(guid), m_read(false) {} @@ -211,65 +55,6 @@ QVariantHash RssArticle::toHash() const { return item; } -RssArticlePtr xmlToRssArticle(RssFeed* parent, QXmlStreamReader& xml) -{ - QString guid; - QString title; - QString torrentUrl; - QString link; - QString description; - QDateTime date; - QString author; - - while(!xml.atEnd()) { - xml.readNext(); - - if(xml.isEndElement() && xml.name() == "item") - break; - - if (xml.isStartElement()) { - if (xml.name() == "title") - title = xml.readElementText(); - else if (xml.name() == "enclosure") { - if (xml.attributes().value("type") == "application/x-bittorrent") - torrentUrl = xml.attributes().value("url").toString(); - } - else if (xml.name() == "link") - link = xml.readElementText(); - else if (xml.name() == "description") - description = xml.readElementText(); - else if (xml.name() == "pubDate") - date = RssArticle::parseDate(xml.readElementText()); - else if (xml.name() == "author") - author = xml.readElementText(); - else if (xml.name() == "guid") - guid = xml.readElementText(); - } - } - - if (guid.isEmpty()) { - // Item does not have a guid, fall back to some other identifier - if (!link.isEmpty()) - guid = link; - else if (!title.isEmpty()) - guid = title; - else { - qWarning() << "Item has no guid, link or title, ignoring it..."; - return RssArticlePtr(); - } - } - - RssArticlePtr art(new RssArticle(parent, guid)); - art->m_title = title; - art->m_torrentUrl = torrentUrl; - art->m_link = link; - art->m_description = description; - art->m_date = date; - art->m_author = author; - - return art; -} - RssArticlePtr hashToRssArticle(RssFeed* parent, const QVariantHash &h) { const QString guid = h.value("id").toString(); if (guid.isEmpty()) return RssArticlePtr(); @@ -281,7 +66,7 @@ RssArticlePtr hashToRssArticle(RssFeed* parent, const QVariantHash &h) { art->m_description = h.value("description").toString(); art->m_date = h.value("date").toDateTime(); art->m_author = h.value("author").toString(); - art->m_read = h.value("read").toBool(); + art->m_read = h.value("read", false).toBool(); return art; } @@ -320,7 +105,7 @@ void RssArticle::markAsRead() { m_read = true; } -QString RssArticle::guid() const +const QString& RssArticle::guid() const { return m_guid; } diff --git a/src/rss/rssarticle.h b/src/rss/rssarticle.h index 806026243..666fa02fa 100644 --- a/src/rss/rssarticle.h +++ b/src/rss/rssarticle.h @@ -48,7 +48,7 @@ public: RssArticle(RssFeed* parent, const QString &guid); // Accessors bool hasAttachment() const; - QString guid() const; + const QString& guid() const; RssFeed* parent() const; QString title() const; QString author() const; @@ -62,12 +62,8 @@ public: // Serialization QVariantHash toHash() const; - friend RssArticlePtr xmlToRssArticle(RssFeed* parent, QXmlStreamReader& xml); friend RssArticlePtr hashToRssArticle(RssFeed* parent, const QVariantHash &hash); -private: - static QDateTime parseDate(const QString &string); - private: RssFeed* m_parent; QString m_guid; diff --git a/src/rss/rssfeed.cpp b/src/rss/rssfeed.cpp index 1064be30d..fa133aee2 100644 --- a/src/rss/rssfeed.cpp +++ b/src/rss/rssfeed.cpp @@ -35,6 +35,7 @@ #include "rssfolder.h" #include "rsssettings.h" #include "rssarticle.h" +#include "rssparser.h" #include "misc.h" #include "rssdownloadrulelist.h" #include "downloadthread.h" @@ -42,12 +43,16 @@ RssFeed::RssFeed(RssManager* manager, RssFolder* parent, const QString &url): m_manager(manager), m_parent(parent), m_icon(":/Icons/oxygen/application-rss+xml.png"), - m_refreshed(false), m_downloadFailure(false), m_loading(false) { + m_refreshed(false), m_inErrorState(false), m_loading(false) { qDebug() << Q_FUNC_INFO << url; m_url = QUrl::fromEncoded(url.toUtf8()).toString(); // Listen for new RSS downloads connect(manager->rssDownloader(), SIGNAL(downloadFinished(QString,QString)), SLOT(handleFinishedDownload(QString,QString))); connect(manager->rssDownloader(), SIGNAL(downloadFailure(QString,QString)), SLOT(handleDownloadFailure(QString,QString))); + connect(manager->rssParser(), SIGNAL(feedTitle(QString,QString)), SLOT(handleFeedTitle(QString,QString))); + connect(manager->rssParser(), SIGNAL(newArticle(QString,QVariantHash)), SLOT(handleNewArticle(QString,QVariantHash))); + connect(manager->rssParser(), SIGNAL(feedParsingFinished(QString,QString)), SLOT(handleFeedParsingFinished(QString,QString))); + // Download the RSS Feed icon m_iconUrl = iconUrl(); manager->rssDownloader()->downloadUrl(m_iconUrl); @@ -155,7 +160,7 @@ QString RssFeed::url() const { } QString RssFeed::icon() const { - if (m_downloadFailure) + if (m_inErrorState) return ":/Icons/oxygen/unavailable.png"; return m_icon; } @@ -220,117 +225,6 @@ QString RssFeed::iconUrl() const { return QString("http://")+QUrl(m_url).host()+QString("/favicon.ico"); } -void RssFeed::parseRSSChannel(QXmlStreamReader& xml) -{ - qDebug() << Q_FUNC_INFO; - Q_ASSERT(xml.isStartElement() && xml.name() == "channel"); - - while(!xml.atEnd()) { - xml.readNext(); - - if (xml.isStartElement()) { - if (xml.name() == "title") { - m_title = xml.readElementText(); - if (m_alias == url()) - rename(m_title); - } - else if (xml.name() == "image") { - QString icon_path = xml.attributes().value("url").toString(); - if (!icon_path.isEmpty()) { - m_iconUrl = icon_path; - m_manager->rssDownloader()->downloadUrl(m_iconUrl); - } - } - else if (xml.name() == "item") { - RssArticlePtr article = xmlToRssArticle(this, xml); - qDebug() << "Found RSS Item, valid: " << (article ? "True" : "False"); - if (article) { - QString guid = article->guid(); - if (m_articles.contains(guid) && m_articles[guid]->isRead()) - article->markAsRead(); - m_articles[guid] = article; - } - } - } - } -} - -// read and create items from a rss document -bool RssFeed::parseRSS(QIODevice* device) -{ - qDebug("Parsing RSS file..."); - QXmlStreamReader xml(device); - - bool found_channel = false; - while (xml.readNextStartElement()) { - if (xml.name() == "rss") { - // Find channels - while (xml.readNextStartElement()) { - if (xml.name() == "channel") { - parseRSSChannel(xml); - found_channel = true; - break; - } else { - qDebug() << "Skip rss item: " << xml.name(); - xml.skipCurrentElement(); - } - } - break; - } else { - qDebug() << "Skip root item: " << xml.name(); - xml.skipCurrentElement(); - } - } - - if (xml.hasError()) { - qWarning() << "Error parsing RSS document: " << xml.errorString(); - } - - if (!found_channel) { - qWarning() << m_url << " is not a valid RSS feed"; - return false; - } - - // Make sure we limit the number of articles - removeOldArticles(); - - // RSS Feed Downloader - if (RssSettings().isRssDownloadingEnabled()) - downloadMatchingArticleTorrents(); - - // Save items to disk (for safety) - saveItemsToDisk(); - - return true; -} - -void RssFeed::downloadMatchingArticleTorrents() { - Q_ASSERT(RssSettings().isRssDownloadingEnabled()); - RssDownloadRuleList *download_rules = m_manager->downloadRules(); - - RssArticleHash::ConstIterator it = m_articles.begin(); - RssArticleHash::ConstIterator itend = m_articles.end(); - for ( ; it != itend; ++it) { - RssArticlePtr article = it.value(); - // Skip read articles - if (article->isRead()) - continue; - // Check if the item should be automatically downloaded - RssDownloadRulePtr matching_rule = download_rules->findMatchingRule(m_url, article->title()); - if (matching_rule) { - // Torrent was downloaded, consider article as read - article->markAsRead(); - // Download the torrent - QString torrent_url = article->hasAttachment() ? article->torrentUrl() : article->link(); - QBtSession::instance()->addConsoleMessage(tr("Automatically downloading %1 torrent from %2 RSS feed...").arg(article->title()).arg(displayName())); - if (torrent_url.startsWith("magnet:", Qt::CaseInsensitive)) - QBtSession::instance()->addMagnetSkipAddDlg(torrent_url, matching_rule->savePath(), matching_rule->label()); - else - QBtSession::instance()->downloadUrlAndSkipDialog(torrent_url, matching_rule->savePath(), matching_rule->label()); - } - } -} - void RssFeed::removeOldArticles() { const uint max_articles = RssSettings().getRSSMaxArticlesPerFeed(); const uint nb_articles = m_articles.size(); @@ -344,37 +238,12 @@ void RssFeed::removeOldArticles() { } } -// existing and opening test after download -bool RssFeed::parseXmlFile(const QString &file_path) { - qDebug("openRss() called"); - QFile fileRss(file_path); - if (!fileRss.open(QIODevice::ReadOnly | QIODevice::Text)) { - qDebug("openRss error: open failed, no file or locked, %s", qPrintable(file_path)); - if (QFile::exists(file_path)) - fileRss.remove(); - return false; - } - - // start reading the xml - bool ret = parseRSS(&fileRss); - fileRss.close(); - if (QFile::exists(file_path)) - fileRss.remove(); - return ret; -} - // read and store the downloaded rss' informations void RssFeed::handleFinishedDownload(const QString& url, const QString &file_path) { if (url == m_url) { qDebug() << Q_FUNC_INFO << "Successfully downloaded RSS feed at" << url; - m_downloadFailure = false; - m_loading = false; // Parse the download RSS - if (parseXmlFile(file_path)) { - m_refreshed = true; - m_manager->forwardFeedInfosChanged(m_url, displayName(), unreadCount()); // XXX: Ugly - qDebug() << Q_FUNC_INFO << "Feed parsed successfully"; - } + m_manager->rssParser()->parseRssFile(m_url, file_path); } else if (url == m_iconUrl) { m_icon = file_path; @@ -385,9 +254,66 @@ void RssFeed::handleFinishedDownload(const QString& url, const QString &file_pat void RssFeed::handleDownloadFailure(const QString &url, const QString& error) { if (url != m_url) return; - m_downloadFailure = true; + m_inErrorState = true; m_loading = false; m_manager->forwardFeedInfosChanged(m_url, displayName(), unreadCount()); // XXX: Ugly qWarning() << "Failed to download RSS feed at" << url; qWarning() << "Reason:" << error; } + +void RssFeed::handleFeedTitle(const QString& feedUrl, const QString& title) +{ + if (feedUrl != m_url) + return; + + rename(title); +} + +void RssFeed::handleNewArticle(const QString& feedUrl, const QVariantHash& articleData) +{ + if (feedUrl != m_url) + return; + m_refreshed = true; + + const QString guid = articleData["id"].toString(); + if (m_articles.contains(guid)) + return; + + RssArticlePtr article = hashToRssArticle(this, articleData); + Q_ASSERT(article); + m_articles[guid] = article; + + // Download torrent if necessary. + if (RssSettings().isRssDownloadingEnabled()) { + RssDownloadRulePtr matching_rule = m_manager->downloadRules()->findMatchingRule(m_url, article->title()); + if (matching_rule) { + // Torrent was downloaded, consider article as read + article->markAsRead(); + // Download the torrent + QString torrent_url = article->hasAttachment() ? article->torrentUrl() : article->link(); + QBtSession::instance()->addConsoleMessage(tr("Automatically downloading %1 torrent from %2 RSS feed...").arg(article->title()).arg(displayName())); + if (torrent_url.startsWith("magnet:", Qt::CaseInsensitive)) + QBtSession::instance()->addMagnetSkipAddDlg(torrent_url, matching_rule->savePath(), matching_rule->label()); + else + QBtSession::instance()->downloadUrlAndSkipDialog(torrent_url, matching_rule->savePath(), matching_rule->label()); + } + } + // FIXME: We should forward the information here but this would seriously decrease + // performance with current design. + //m_manager->forwardFeedInfosChanged(m_url, displayName(), unreadCount()); // XXX: Ugly +} + +void RssFeed::handleFeedParsingFinished(const QString& feedUrl, const QString& error) +{ + if (feedUrl != m_url) + return; + + // Make sure we limit the number of articles + removeOldArticles(); + + m_loading = false; + m_inErrorState = !error.isEmpty(); + m_manager->forwardFeedInfosChanged(m_url, displayName(), unreadCount()); // XXX: Ugly + + saveItemsToDisk(); +} diff --git a/src/rss/rssfeed.h b/src/rss/rssfeed.h index 5a96cb2bd..eeb7fd522 100644 --- a/src/rss/rssfeed.h +++ b/src/rss/rssfeed.h @@ -33,6 +33,7 @@ #include #include +#include #include #include "rssfile.h" @@ -76,13 +77,12 @@ public: private slots: void handleFinishedDownload(const QString& url, const QString &file_path); void handleDownloadFailure(const QString &url, const QString& error); + void handleFeedTitle(const QString& feedUrl, const QString& title); + void handleNewArticle(const QString& feedUrl, const QVariantHash& article); + void handleFeedParsingFinished(const QString& feedUrl, const QString& error); private: - bool parseRSS(QIODevice* device); - void parseRSSChannel(QXmlStreamReader& xml); void removeOldArticles(); - bool parseXmlFile(const QString &file_path); - void downloadMatchingArticleTorrents(); QString iconUrl() const; void loadItemsFromDisk(); @@ -97,7 +97,7 @@ private: QString m_iconUrl; bool m_read; bool m_refreshed; - bool m_downloadFailure; + bool m_inErrorState; bool m_loading; }; diff --git a/src/rss/rssmanager.cpp b/src/rss/rssmanager.cpp index dbff5b857..d1c76f6a6 100644 --- a/src/rss/rssmanager.cpp +++ b/src/rss/rssmanager.cpp @@ -35,10 +35,13 @@ #include "rssfeed.h" #include "rssarticle.h" #include "rssdownloadrulelist.h" +#include "rssparser.h" #include "downloadthread.h" RssManager::RssManager(): - m_rssDownloader(new DownloadThread(this)), m_downloadRules(new RssDownloadRuleList) + m_rssDownloader(new DownloadThread(this)), + m_downloadRules(new RssDownloadRuleList), + m_rssParser(new RssParser(this)) { connect(&m_refreshTimer, SIGNAL(timeout()), this, SLOT(refresh())); m_refreshInterval = RssSettings().getRSSRefreshInterval(); @@ -48,16 +51,22 @@ RssManager::RssManager(): RssManager::~RssManager() { qDebug("Deleting RSSManager..."); delete m_downloadRules; + delete m_rssParser; saveItemsToDisk(); saveStreamList(); qDebug("RSSManager deleted"); } -DownloadThread *RssManager::rssDownloader() const +DownloadThread* RssManager::rssDownloader() const { return m_rssDownloader; } +RssParser* RssManager::rssParser() const +{ + return m_rssParser; +} + void RssManager::updateRefreshInterval(uint val) { if (m_refreshInterval != val) { m_refreshInterval = val; diff --git a/src/rss/rssmanager.h b/src/rss/rssmanager.h index 64e2b33ce..1de71f578 100644 --- a/src/rss/rssmanager.h +++ b/src/rss/rssmanager.h @@ -38,6 +38,7 @@ class DownloadThread; class RssDownloadRuleList; +class RssParser; class RssManager; typedef QSharedPointer RssManagerPtr; @@ -50,6 +51,7 @@ public: virtual ~RssManager(); DownloadThread* rssDownloader() const; + RssParser* rssParser() const; static void sortArticleListByDateDesc(RssArticleList& news_list); RssDownloadRuleList* downloadRules() const; @@ -71,6 +73,7 @@ private: uint m_refreshInterval; DownloadThread *m_rssDownloader; RssDownloadRuleList *m_downloadRules; + RssParser* m_rssParser; }; #endif // RSSMANAGER_H diff --git a/src/rss/rssparser.cpp b/src/rss/rssparser.cpp new file mode 100644 index 000000000..c0ad973b0 --- /dev/null +++ b/src/rss/rssparser.cpp @@ -0,0 +1,361 @@ +/* + * Bittorrent Client using Qt4 and libtorrent. + * Copyright (C) 2012 Christophe Dumez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, the copyright holders give permission to + * link this program with the OpenSSL project's "OpenSSL" library (or with + * modified versions of it that use the same license as the "OpenSSL" library), + * and distribute the linked executables. You must obey the GNU General Public + * License in all respects for all of the code used other than "OpenSSL". If you + * modify file(s), you may extend this exception to your version of the file(s), + * but you are not obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * Contact : chris@qbittorrent.org + */ + +#include "rssparser.h" +#include "downloadthread.h" +#include +#include +#include +#include +#include + +struct ParsingJob { + QString feedUrl; + QString filePath; +}; + +static const char shortDay[][4] = { + "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat", + "Sun" +}; +static const char longDay[][10] = { + "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday", + "Sunday" +}; +static const char shortMonth[][4] = { + "Jan", "Feb", "Mar", "Apr", + "May", "Jun", "Jul", "Aug", + "Sep", "Oct", "Nov", "Dec" +}; +static const char longMonth[][10] = { + "January", "February", "March", + "April", "May", "June", + "July", "August", "September", + "October", "November", "December" +}; + +// Ported to Qt4 from KDElibs4 +QDateTime RssParser::parseDate(const QString &string) { + const QString str = string.trimmed(); + if (str.isEmpty()) + return QDateTime::currentDateTime(); + + int nyear = 6; // indexes within string to values + int nmonth = 4; + int nday = 2; + int nwday = 1; + int nhour = 7; + int nmin = 8; + int nsec = 9; + // Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm" + QRegExp rx("^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$"); + QStringList parts; + if (!str.indexOf(rx)) { + // Check that if date has '-' separators, both separators are '-'. + parts = rx.capturedTexts(); + bool h1 = (parts[3] == QLatin1String("-")); + bool h2 = (parts[5] == QLatin1String("-")); + if (h1 != h2) + return QDateTime::currentDateTime(); + } else { + // Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY" + rx = QRegExp("^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$"); + if (str.indexOf(rx)) + return QDateTime::currentDateTime(); + nyear = 7; + nmonth = 2; + nday = 3; + nwday = 1; + nhour = 4; + nmin = 5; + nsec = 6; + parts = rx.capturedTexts(); + } + bool ok[4]; + const int day = parts[nday].toInt(&ok[0]); + int year = parts[nyear].toInt(&ok[1]); + const int hour = parts[nhour].toInt(&ok[2]); + const int minute = parts[nmin].toInt(&ok[3]); + if (!ok[0] || !ok[1] || !ok[2] || !ok[3]) + return QDateTime::currentDateTime(); + int second = 0; + if (!parts[nsec].isEmpty()) { + second = parts[nsec].toInt(&ok[0]); + if (!ok[0]) + return QDateTime::currentDateTime(); + } + bool leapSecond = (second == 60); + if (leapSecond) + second = 59; // apparently a leap second - validate below, once time zone is known + int month = 0; + for ( ; month < 12 && parts[nmonth] != shortMonth[month]; ++month) ; + int dayOfWeek = -1; + if (!parts[nwday].isEmpty()) { + // Look up the weekday name + while (++dayOfWeek < 7 && shortDay[dayOfWeek] != parts[nwday]) ; + if (dayOfWeek >= 7) + for (dayOfWeek = 0; dayOfWeek < 7 && longDay[dayOfWeek] != parts[nwday]; ++dayOfWeek) ; + } + // if (month >= 12 || dayOfWeek >= 7 + // || (dayOfWeek < 0 && format == RFCDateDay)) + // return QDateTime; + int i = parts[nyear].size(); + if (i < 4) { + // It's an obsolete year specification with less than 4 digits + year += (i == 2 && year < 50) ? 2000: 1900; + } + + // Parse the UTC offset part + int offset = 0; // set default to '-0000' + bool negOffset = false; + if (parts.count() > 10) { + rx = QRegExp("^([+-])(\\d\\d)(\\d\\d)$"); + if (!parts[10].indexOf(rx)) { + // It's a UTC offset ±hhmm + parts = rx.capturedTexts(); + offset = parts[2].toInt(&ok[0]) * 3600; + int offsetMin = parts[3].toInt(&ok[1]); + if (!ok[0] || !ok[1] || offsetMin > 59) + return QDateTime(); + offset += offsetMin * 60; + negOffset = (parts[1] == QLatin1String("-")); + if (negOffset) + offset = -offset; + } else { + // Check for an obsolete time zone name + QByteArray zone = parts[10].toLatin1(); + if (zone.length() == 1 && isalpha(zone[0]) && toupper(zone[0]) != 'J') + negOffset = true; // military zone: RFC 2822 treats as '-0000' + else if (zone != "UT" && zone != "GMT") { // treated as '+0000' + offset = (zone == "EDT") ? -4*3600 + : (zone == "EST" || zone == "CDT") ? -5*3600 + : (zone == "CST" || zone == "MDT") ? -6*3600 + : (zone == "MST" || zone == "PDT") ? -7*3600 + : (zone == "PST") ? -8*3600 + : 0; + if (!offset) { + // Check for any other alphabetic time zone + bool nonalpha = false; + for (int i = 0, end = zone.size(); i < end && !nonalpha; ++i) + nonalpha = !isalpha(zone[i]); + if (nonalpha) + return QDateTime(); + // TODO: Attempt to recognize the time zone abbreviation? + negOffset = true; // unknown time zone: RFC 2822 treats as '-0000' + } + } + } + } + QDate qdate(year, month+1, day); // convert date, and check for out-of-range + if (!qdate.isValid()) + return QDateTime::currentDateTime(); + QDateTime result(qdate, QTime(hour, minute, second)); + if (!result.isValid() + || (dayOfWeek >= 0 && result.date().dayOfWeek() != dayOfWeek+1)) + return QDateTime::currentDateTime(); // invalid date/time, or weekday doesn't correspond with date + if (!offset) { + result.setTimeSpec(Qt::UTC); + } + if (leapSecond) { + // Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC. + // Convert the time to UTC and check that it is 00:00:00. + if ((hour*3600 + minute*60 + 60 - offset + 86400*5) % 86400) // (max abs(offset) is 100 hours) + return QDateTime::currentDateTime(); // the time isn't the last second of the day + } + return result; +} + +RssParser::RssParser(QObject *parent) : + QThread(parent), m_running(true) +{ + start(); +} + +RssParser::~RssParser() +{ + m_running = false; + m_waitCondition.wakeOne(); + wait(); +} + +void RssParser::parseRssFile(const QString& feedUrl, const QString& filePath) +{ + qDebug() << Q_FUNC_INFO << feedUrl << filePath; + m_mutex.lock(); + ParsingJob job = {feedUrl, filePath}; + m_queue.enqueue(job); + // Wake up thread. + if (m_queue.count() == 1) { + qDebug() << Q_FUNC_INFO << "Waking up thread"; + m_waitCondition.wakeOne(); + } + m_mutex.unlock(); +} + +void RssParser::run() +{ + while (m_running) { + m_mutex.lock(); + if (!m_queue.empty()) { + ParsingJob job = m_queue.dequeue(); + m_mutex.unlock(); + parseRSS(job); + } else { + qDebug() << Q_FUNC_INFO << "Thread is waiting."; + m_waitCondition.wait(&m_mutex); + qDebug() << Q_FUNC_INFO << "Thread woke up."; + m_mutex.unlock(); + } + } +} + +void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl) +{ + QVariantHash article; + + while(!xml.atEnd()) { + xml.readNext(); + + if(xml.isEndElement() && xml.name() == "item") + break; + + if (xml.isStartElement()) { + if (xml.name() == "title") + article["title"] = xml.readElementText(); + else if (xml.name() == "enclosure") { + if (xml.attributes().value("type") == "application/x-bittorrent") + article["torrent_url"] = xml.attributes().value("url").toString(); + } + else if (xml.name() == "link") + article["news_link"] = xml.readElementText(); + else if (xml.name() == "description") + article["description"] = xml.readElementText(); + else if (xml.name() == "pubDate") + article["date"] = parseDate(xml.readElementText()); + else if (xml.name() == "author") + article["author"] = xml.readElementText(); + else if (xml.name() == "guid") + article["id"] = xml.readElementText(); + } + } + + if (!article.contains("id")) { + // Item does not have a guid, fall back to some other identifier + const QString link = article.value("news_link").toString(); + if (!link.isEmpty()) + article["id"] = link; + else { + const QString title = article.value("title").toString(); + if (!title.isEmpty()) + article["id"] = title; + else { + qWarning() << "Item has no guid, link or title, ignoring it..."; + return; + } + } + } + + emit newArticle(feedUrl, article); +} + +void RssParser::parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl) +{ + qDebug() << Q_FUNC_INFO << feedUrl; + Q_ASSERT(xml.isStartElement() && xml.name() == "channel"); + + while(!xml.atEnd()) { + xml.readNext(); + + if (xml.isStartElement()) { + if (xml.name() == "title") { + QString title = xml.readElementText(); + emit feedTitle(feedUrl, title); + } + else if (xml.name() == "item") { + parseRssArticle(xml, feedUrl); + } + } + } +} + +// read and create items from a rss document +void RssParser::parseRSS(const ParsingJob& job) +{ + qDebug() << Q_FUNC_INFO << job.feedUrl << job.filePath; + QFile fileRss(job.filePath); + if (!fileRss.open(QIODevice::ReadOnly | QIODevice::Text)) { + reportFailure(job, tr("Failed to open downloaded RSS file.")); + return; + } + QXmlStreamReader xml(&fileRss); + + bool found_channel = false; + while (xml.readNextStartElement()) { + if (xml.name() == "rss") { + // Find channels + while (xml.readNextStartElement()) { + if (xml.name() == "channel") { + parseRSSChannel(xml, job.feedUrl); + found_channel = true; + break; + } else { + qDebug() << "Skip rss item: " << xml.name(); + xml.skipCurrentElement(); + } + } + break; + } else { + qDebug() << "Skip root item: " << xml.name(); + xml.skipCurrentElement(); + } + } + + if (xml.hasError()) { + reportFailure(job, xml.errorString()); + return; + } + + if (!found_channel) { + reportFailure(job, tr("Invalid RSS feed at %1.").arg(job.feedUrl)); + return; + } + + // Clean up + QFile::remove(job.filePath); + emit feedParsingFinished(job.feedUrl, QString()); +} + +void RssParser::reportFailure(const ParsingJob& job, const QString& error) +{ + QFile::remove(job.filePath); + emit feedParsingFinished(job.feedUrl, error); +} diff --git a/src/rss/rssparser.h b/src/rss/rssparser.h new file mode 100644 index 000000000..0289e47d3 --- /dev/null +++ b/src/rss/rssparser.h @@ -0,0 +1,73 @@ +/* + * Bittorrent Client using Qt4 and libtorrent. + * Copyright (C) 2012 Christophe Dumez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, the copyright holders give permission to + * link this program with the OpenSSL project's "OpenSSL" library (or with + * modified versions of it that use the same license as the "OpenSSL" library), + * and distribute the linked executables. You must obey the GNU General Public + * License in all respects for all of the code used other than "OpenSSL". If you + * modify file(s), you may extend this exception to your version of the file(s), + * but you are not obligated to do so. If you do not wish to do so, delete this + * exception statement from your version. + * + * Contact : chris@qbittorrent.org + */ + +#ifndef RSSPARSER_H +#define RSSPARSER_H + +#include "rssarticle.h" +#include +#include +#include +#include + +struct ParsingJob; + +class RssParser : public QThread +{ + Q_OBJECT + +public: + explicit RssParser(QObject *parent = 0); + virtual ~RssParser(); + +signals: + void newArticle(const QString& feedUrl, const QVariantHash& rssArticle); + void feedTitle(const QString& feedUrl, const QString& title); + void feedParsingFinished(const QString& feedUrl, const QString& error); + +public slots: + void parseRssFile(const QString& feedUrl, const QString& filePath); + +protected: + virtual void run(); + static QDateTime parseDate(const QString& string); + void parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl); + void parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl); + void parseRSS(const ParsingJob& job); + void reportFailure(const ParsingJob& job, const QString& error); + +private: + bool m_running; + QMutex m_mutex; + QQueue m_queue; + QWaitCondition m_waitCondition; +}; + +#endif // RSSPARSER_H