@ -37,7 +37,8 @@
@@ -37,7 +37,8 @@
# include "base/utils/fs.h"
# include "rssparser.h"
struct ParsingJob {
struct ParsingJob
{
QString feedUrl ;
QString filePath ;
} ;
@ -47,19 +48,22 @@ static const char shortDay[][4] = {
@@ -47,19 +48,22 @@ static const char shortDay[][4] = {
" Thu " , " Fri " , " Sat " ,
" Sun "
} ;
static const char longDay [ ] [ 10 ] = {
" Monday " , " Tuesday " , " Wednesday " ,
" Thursday " , " Friday " , " Saturday " ,
" Sunday "
} ;
static const char shortMonth [ ] [ 4 ] = {
" Jan " , " Feb " , " Mar " , " Apr " ,
" May " , " Jun " , " Jul " , " Aug " ,
" Sep " , " Oct " , " Nov " , " Dec "
} ;
// Ported to Qt4 from KDElibs4
QDateTime RssParser : : parseDate ( const QString & string ) {
// Ported to Qt from KDElibs4
QDateTime RssParser : : parseDate ( const QString & string )
{
const QString str = string . trimmed ( ) ;
if ( str . isEmpty ( ) )
return QDateTime : : currentDateTime ( ) ;
@ -81,7 +85,8 @@ QDateTime RssParser::parseDate(const QString &string) {
@@ -81,7 +85,8 @@ QDateTime RssParser::parseDate(const QString &string) {
bool h2 = ( parts [ 5 ] = = QLatin1String ( " - " ) ) ;
if ( h1 ! = h2 )
return QDateTime : : currentDateTime ( ) ;
} else {
}
else {
// Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY"
rx = QRegExp ( " ^([A-Z][a-z]+) \\ s+( \\ S+) \\ s+( \\ d \\ d) \\ s+( \\ d \\ d):( \\ d \\ d):( \\ d \\ d) \\ s+( \\ d \\ d \\ d \\ d)$ " ) ;
if ( str . indexOf ( rx ) )
@ -95,6 +100,7 @@ QDateTime RssParser::parseDate(const QString &string) {
@@ -95,6 +100,7 @@ QDateTime RssParser::parseDate(const QString &string) {
nsec = 6 ;
parts = rx . capturedTexts ( ) ;
}
bool ok [ 4 ] ;
const int day = parts [ nday ] . toInt ( & ok [ 0 ] ) ;
int year = parts [ nyear ] . toInt ( & ok [ 1 ] ) ;
@ -102,31 +108,34 @@ QDateTime RssParser::parseDate(const QString &string) {
@@ -102,31 +108,34 @@ QDateTime RssParser::parseDate(const QString &string) {
const int minute = parts [ nmin ] . toInt ( & ok [ 3 ] ) ;
if ( ! ok [ 0 ] | | ! ok [ 1 ] | | ! ok [ 2 ] | | ! ok [ 3 ] )
return QDateTime : : currentDateTime ( ) ;
int second = 0 ;
if ( ! parts [ nsec ] . isEmpty ( ) ) {
second = parts [ nsec ] . toInt ( & ok [ 0 ] ) ;
if ( ! ok [ 0 ] )
return QDateTime : : currentDateTime ( ) ;
}
bool leapSecond = ( second = = 60 ) ;
if ( leapSecond )
second = 59 ; // apparently a leap second - validate below, once time zone is known
int month = 0 ;
for ( ; month < 12 & & parts [ nmonth ] ! = shortMonth [ month ] ; + + month ) ;
for ( ; ( month < 12 ) & & ( parts [ nmonth ] ! = shortMonth [ month ] ) ; + + month ) ;
int dayOfWeek = - 1 ;
if ( ! parts [ nwday ] . isEmpty ( ) ) {
// Look up the weekday name
while ( + + dayOfWeek < 7 & & shortDay [ dayOfWeek ] ! = parts [ nwday ] ) ;
while ( + + dayOfWeek < 7 & & ( shortDay [ dayOfWeek ] ! = parts [ nwday ] ) ) ;
if ( dayOfWeek > = 7 )
for ( dayOfWeek = 0 ; dayOfWeek < 7 & & longDay [ dayOfWeek ] ! = parts [ nwday ] ; + + dayOfWeek ) ;
for ( dayOfWeek = 0 ; dayOfWeek < 7 & & ( longDay [ dayOfWeek ] ! = parts [ nwday ] ) ; + + dayOfWeek ) ;
}
// if (month >= 12 || dayOfWeek >= 7
// || (dayOfWeek < 0 && format == RFCDateDay))
// return QDateTime;
int i = parts [ nyear ] . size ( ) ;
if ( i < 4 ) {
// It's an obsolete year specification with less than 4 digits
year + = ( i = = 2 & & year < 50 ) ? 2000 : 1900 ;
year + = ( i = = 2 & & year < 50 ) ? 2000 : 1900 ;
}
// Parse the UTC offset part
@ -145,22 +154,29 @@ QDateTime RssParser::parseDate(const QString &string) {
@@ -145,22 +154,29 @@ QDateTime RssParser::parseDate(const QString &string) {
negOffset = ( parts [ 1 ] = = QLatin1String ( " - " ) ) ;
if ( negOffset )
offset = - offset ;
} else {
}
else {
// Check for an obsolete time zone name
QByteArray zone = parts [ 10 ] . toLatin1 ( ) ;
if ( zone . length ( ) = = 1 & & isalpha ( zone [ 0 ] ) & & toupper ( zone [ 0 ] ) ! = ' J ' )
if ( zone . length ( ) = = 1 & & isalpha ( zone [ 0 ] ) & & toupper ( zone [ 0 ] ) ! = ' J ' ) {
negOffset = true ; // military zone: RFC 2822 treats as '-0000'
}
else if ( zone ! = " UT " & & zone ! = " GMT " ) { // treated as '+0000'
offset = ( zone = = " EDT " ) ? - 4 * 3600
: ( zone = = " EST " | | zone = = " CDT " ) ? - 5 * 3600
: ( zone = = " CST " | | zone = = " MDT " ) ? - 6 * 3600
: ( zone = = " MST " | | zone = = " PDT " ) ? - 7 * 3600
: ( zone = = " PST " ) ? - 8 * 3600
offset = ( zone = = " EDT " )
? - 4 * 3600
: ( ( zone = = " EST " ) | | ( zone = = " CDT " ) )
? - 5 * 3600
: ( ( zone = = " CST " ) | | ( zone = = " MDT " ) )
? - 6 * 3600
: ( zone = = " MST " | | zone = = " PDT " )
? - 7 * 3600
: ( zone = = " PST " )
? - 8 * 3600
: 0 ;
if ( ! offset ) {
// Check for any other alphabetic time zone
bool nonalpha = false ;
for ( int i = 0 , end = zone . size ( ) ; i < end & & ! nonalpha ; + + i )
for ( int i = 0 , end = zone . size ( ) ; ( i < end ) & & ! nonalpha ; + + i )
nonalpha = ! isalpha ( zone [ i ] ) ;
if ( nonalpha )
return QDateTime ( ) ;
@ -170,11 +186,12 @@ QDateTime RssParser::parseDate(const QString &string) {
@@ -170,11 +186,12 @@ QDateTime RssParser::parseDate(const QString &string) {
}
}
}
QDate qdate ( year , month + 1 , day ) ; // convert date, and check for out-of-range
QDate qdate ( year , month + 1 , day ) ; // convert date, and check for out-of-range
if ( ! qdate . isValid ( ) )
return QDateTime : : currentDateTime ( ) ;
QTime qTime ( hour , minute , second ) ;
QTime qTime ( hour , minute , second ) ;
QDateTime result ( qdate , qTime , Qt : : UTC ) ;
if ( offset )
result = result . addSecs ( - offset ) ;
@ -187,11 +204,13 @@ QDateTime RssParser::parseDate(const QString &string) {
@@ -187,11 +204,13 @@ QDateTime RssParser::parseDate(const QString &string) {
if ( ( hour * 3600 + minute * 60 + 60 - offset + 86400 * 5 ) % 86400 ) // (max abs(offset) is 100 hours)
return QDateTime : : currentDateTime ( ) ; // the time isn't the last second of the day
}
return result ;
}
RssParser : : RssParser ( QObject * parent ) :
QThread ( parent ) , m_running ( true )
RssParser : : RssParser ( QObject * parent )
: QThread ( parent )
, m_running ( true )
{
start ( ) ;
}
@ -203,7 +222,7 @@ RssParser::~RssParser()
@@ -203,7 +222,7 @@ RssParser::~RssParser()
wait ( ) ;
}
void RssParser : : parseRssFile ( const QString & feedUrl , const QString & filePath )
void RssParser : : parseRssFile ( const QString & feedUrl , const QString & filePath )
{
qDebug ( ) < < Q_FUNC_INFO < < feedUrl < < filePath ;
m_mutex . lock ( ) ;
@ -232,7 +251,8 @@ void RssParser::run()
@@ -232,7 +251,8 @@ void RssParser::run()
ParsingJob job = m_queue . dequeue ( ) ;
m_mutex . unlock ( ) ;
parseFeed ( job ) ;
} else {
}
else {
qDebug ( ) < < Q_FUNC_INFO < < " Thread is waiting. " ;
m_waitCondition . wait ( & m_mutex ) ;
qDebug ( ) < < Q_FUNC_INFO < < " Thread woke up. " ;
@ -241,7 +261,7 @@ void RssParser::run()
@@ -241,7 +261,7 @@ void RssParser::run()
}
}
void RssParser : : parseRssArticle ( QXmlStreamReader & xml , const QString & feedUrl )
void RssParser : : parseRssArticle ( QXmlStreamReader & xml , const QString & feedUrl )
{
QVariantHash article ;
@ -252,8 +272,9 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
@@ -252,8 +272,9 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
break ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " )
if ( xml . name ( ) = = " title " ) {
article [ " title " ] = xml . readElementText ( ) . trimmed ( ) ;
}
else if ( xml . name ( ) = = " enclosure " ) {
if ( xml . attributes ( ) . value ( " type " ) = = " application/x-bittorrent " )
article [ " torrent_url " ] = xml . attributes ( ) . value ( " url " ) . toString ( ) ;
@ -265,16 +286,20 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
@@ -265,16 +286,20 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
else
article [ " news_link " ] = link ;
}
else if ( xml . name ( ) = = " description " )
else if ( xml . name ( ) = = " description " ) {
article [ " description " ] = xml . readElementText ( ) . trimmed ( ) ;
else if ( xml . name ( ) = = " pubDate " )
}
else if ( xml . name ( ) = = " pubDate " ) {
article [ " date " ] = parseDate ( xml . readElementText ( ) . trimmed ( ) ) ;
else if ( xml . name ( ) = = " author " )
}
else if ( xml . name ( ) = = " author " ) {
article [ " author " ] = xml . readElementText ( ) . trimmed ( ) ;
else if ( xml . name ( ) = = " guid " )
}
else if ( xml . name ( ) = = " guid " ) {
article [ " id " ] = xml . readElementText ( ) . trimmed ( ) ;
}
}
}
if ( ! article . contains ( " torrent_url " ) & & article . contains ( " news_link " ) )
article [ " torrent_url " ] = article [ " news_link " ] ;
@ -282,12 +307,14 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
@@ -282,12 +307,14 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
if ( ! article . contains ( " id " ) ) {
// Item does not have a guid, fall back to some other identifier
const QString link = article . value ( " news_link " ) . toString ( ) ;
if ( ! link . isEmpty ( ) )
if ( ! link . isEmpty ( ) ) {
article [ " id " ] = link ;
}
else {
const QString title = article . value ( " title " ) . toString ( ) ;
if ( ! title . isEmpty ( ) )
if ( ! title . isEmpty ( ) ) {
article [ " id " ] = title ;
}
else {
qWarning ( ) < < " Item has no guid, link or title, ignoring it... " ;
return ;
@ -298,7 +325,7 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
@@ -298,7 +325,7 @@ void RssParser::parseRssArticle(QXmlStreamReader& xml, const QString& feedUrl)
emit newArticle ( feedUrl , article ) ;
}
void RssParser : : parseRSSChannel ( QXmlStreamReader & xml , const QString & feedUrl )
void RssParser : : parseRSSChannel ( QXmlStreamReader & xml , const QString & feedUrl )
{
qDebug ( ) < < Q_FUNC_INFO < < feedUrl ;
Q_ASSERT ( xml . isStartElement ( ) & & xml . name ( ) = = " channel " ) ;
@ -329,15 +356,15 @@ void RssParser::parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl)
@@ -329,15 +356,15 @@ void RssParser::parseRSSChannel(QXmlStreamReader& xml, const QString& feedUrl)
}
}
void RssParser : : parseAtomArticle ( QXmlStreamReader & xml , const QString & feedUrl , const QString & baseUrl )
void RssParser : : parseAtomArticle ( QXmlStreamReader & xml , const QString & feedUrl , const QString & baseUrl )
{
QVariantHash article ;
bool double_c ontent = false ;
bool doubleC ontent = false ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isEndElement ( ) & & xml . name ( ) = = " entry " )
if ( xml . isEndElement ( ) & & ( xml . name ( ) = = " entry " ) )
break ;
if ( xml . isStartElement ( ) ) {
@ -358,11 +385,11 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
@@ -358,11 +385,11 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
article [ " news_link " ] = ( baseUrl . isEmpty ( ) ? link : baseUrl + link ) ;
}
else if ( xml . name ( ) = = " summary " | | xml . name ( ) = = " content " ) {
if ( double_c ontent) { // Duplicate content -> ignore
else if ( ( xml . name ( ) = = " summary " ) | | ( xml . name ( ) = = " content " ) ) {
if ( doubleC ontent ) { // Duplicate content -> ignore
xml . readNext ( ) ;
while ( xml . name ( ) ! = " summary " & & xml . name ( ) ! = " content " )
while ( ( xml . name ( ) ! = " summary " ) & & ( xml . name ( ) ! = " content " ) )
xml . readNext ( ) ;
continue ;
@ -374,14 +401,12 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
@@ -374,14 +401,12 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
if ( ! feedText . isEmpty ( ) )
article [ " description " ] = feedText . trimmed ( ) ;
double_c ontent = true ;
doubleC ontent = true ;
}
else if ( xml . name ( ) = = " updated " ) {
else if ( xml . name ( ) = = " updated " ) {
// ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime : : fromString ( xml . readElementText ( ) . trimmed ( ) , Qt : : ISODate ) ;
article [ " date " ] = ( articleDate . isValid ( ) ?
articleDate :
QDateTime : : currentDateTime ( ) ) ;
article [ " date " ] = ( articleDate . isValid ( ) ? articleDate : QDateTime : : currentDateTime ( ) ) ;
}
else if ( xml . name ( ) = = " author " ) {
xml . readNext ( ) ;
@ -391,10 +416,11 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
@@ -391,10 +416,11 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
xml . readNext ( ) ;
}
}
else if ( xml . name ( ) = = " id " )
else if ( xml . name ( ) = = " id " ) {
article [ " id " ] = xml . readElementText ( ) . trimmed ( ) ;
}
}
}
if ( ! article . contains ( " torrent_url " ) & & article . contains ( " news_link " ) )
article [ " torrent_url " ] = article [ " news_link " ] ;
@ -402,12 +428,14 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
@@ -402,12 +428,14 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
if ( ! article . contains ( " id " ) ) {
// Item does not have a guid, fall back to some other identifier
const QString link = article . value ( " news_link " ) . toString ( ) ;
if ( ! link . isEmpty ( ) )
if ( ! link . isEmpty ( ) ) {
article [ " id " ] = link ;
}
else {
const QString title = article . value ( " title " ) . toString ( ) ;
if ( ! title . isEmpty ( ) )
if ( ! title . isEmpty ( ) ) {
article [ " id " ] = title ;
}
else {
qWarning ( ) < < " Item has no guid, link or title, ignoring it... " ;
return ;
@ -418,14 +446,14 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
@@ -418,14 +446,14 @@ void RssParser::parseAtomArticle(QXmlStreamReader& xml, const QString& feedUrl,
emit newArticle ( feedUrl , article ) ;
}
void RssParser : : parseAtomChannel ( QXmlStreamReader & xml , const QString & feedUrl )
void RssParser : : parseAtomChannel ( QXmlStreamReader & xml , const QString & feedUrl )
{
qDebug ( ) < < Q_FUNC_INFO < < feedUrl ;
Q_ASSERT ( xml . isStartElement ( ) & & xml . name ( ) = = " feed " ) ;
QString baseURL = xml . attributes ( ) . value ( " xml:base " ) . toString ( ) ;
while ( ! xml . atEnd ( ) ) {
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isStartElement ( ) ) {
@ -452,7 +480,7 @@ void RssParser::parseAtomChannel(QXmlStreamReader& xml, const QString& feedUrl)
@@ -452,7 +480,7 @@ void RssParser::parseAtomChannel(QXmlStreamReader& xml, const QString& feedUrl)
}
// read and create items from a rss document
void RssParser : : parseFeed ( const ParsingJob & job )
void RssParser : : parseFeed ( const ParsingJob & job )
{
qDebug ( ) < < Q_FUNC_INFO < < job . feedUrl < < job . filePath ;
QFile fileRss ( job . filePath ) ;
@ -460,18 +488,19 @@ void RssParser::parseFeed(const ParsingJob& job)
@@ -460,18 +488,19 @@ void RssParser::parseFeed(const ParsingJob& job)
reportFailure ( job , tr ( " Failed to open downloaded RSS file. " ) ) ;
return ;
}
QXmlStreamReader xml ( & fileRss ) ;
bool found_channel = false ;
QXmlStreamReader xml ( & fileRss ) ;
bool foundChannel = false ;
while ( xml . readNextStartElement ( ) ) {
if ( xml . name ( ) = = " rss " ) {
// Find channels
while ( xml . readNextStartElement ( ) ) {
if ( xml . name ( ) = = " channel " ) {
parseRSSChannel ( xml , job . feedUrl ) ;
found_c hannel = true ;
foundC hannel = true ;
break ;
} else {
}
else {
qDebug ( ) < < " Skip rss item: " < < xml . name ( ) ;
xml . skipCurrentElement ( ) ;
}
@ -480,9 +509,10 @@ void RssParser::parseFeed(const ParsingJob& job)
@@ -480,9 +509,10 @@ void RssParser::parseFeed(const ParsingJob& job)
}
else if ( xml . name ( ) = = " feed " ) { // Atom feed
parseAtomChannel ( xml , job . feedUrl ) ;
found_c hannel = true ;
foundC hannel = true ;
break ;
} else {
}
else {
qDebug ( ) < < " Skip root item: " < < xml . name ( ) ;
xml . skipCurrentElement ( ) ;
}
@ -493,7 +523,7 @@ void RssParser::parseFeed(const ParsingJob& job)
@@ -493,7 +523,7 @@ void RssParser::parseFeed(const ParsingJob& job)
return ;
}
if ( ! found_c hannel ) {
if ( ! foundC hannel ) {
reportFailure ( job , tr ( " Invalid RSS feed at '%1'. " ) . arg ( job . feedUrl ) ) ;
return ;
}
@ -504,7 +534,7 @@ void RssParser::parseFeed(const ParsingJob& job)
@@ -504,7 +534,7 @@ void RssParser::parseFeed(const ParsingJob& job)
Utils : : Fs : : forceRemove ( job . filePath ) ;
}
void RssParser : : reportFailure ( const ParsingJob & job , const QString & error )
void RssParser : : reportFailure ( const ParsingJob & job , const QString & error )
{
emit feedParsingFinished ( job . feedUrl , error ) ;
Utils : : Fs : : forceRemove ( job . filePath ) ;