@ -37,475 +37,505 @@
@@ -37,475 +37,505 @@
# include "base/utils/fs.h"
# include "rssparser.h"
struct ParsingJob {
QString feedUrl ;
QString filePath ;
struct ParsingJob
{
QString feedUrl ;
QString filePath ;
} ;
static const char shortDay [ ] [ 4 ] = {
" Mon " , " Tue " , " Wed " ,
" Thu " , " Fri " , " Sat " ,
" Sun "
" Mon " , " Tue " , " Wed " ,
" Thu " , " Fri " , " Sat " ,
" Sun "
} ;
static const char longDay [ ] [ 10 ] = {
" Monday " , " Tuesday " , " Wednesday " ,
" Thursday " , " Friday " , " Saturday " ,
" Sunday "
" Monday " , " Tuesday " , " Wednesday " ,
" Thursday " , " Friday " , " Saturday " ,
" Sunday "
} ;
static const char shortMonth [ ] [ 4 ] = {
" Jan " , " Feb " , " Mar " , " Apr " ,
" May " , " Jun " , " Jul " , " Aug " ,
" Sep " , " Oct " , " Nov " , " Dec "
" Jan " , " Feb " , " Mar " , " Apr " ,
" May " , " Jun " , " Jul " , " Aug " ,
" Sep " , " Oct " , " Nov " , " Dec "
} ;
// Ported to Qt4 from KDElibs4
QDateTime RssParser : : parseDate ( const QString & string ) {
const QString str = string . trimmed ( ) ;
if ( str . isEmpty ( ) )
return QDateTime : : currentDateTime ( ) ;
int nyear = 6 ; // indexes within string to values
int nmonth = 4 ;
int nday = 2 ;
int nwday = 1 ;
int nhour = 7 ;
int nmin = 8 ;
int nsec = 9 ;
// Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm"
QRegExp rx ( " ^(?:([A-Z][a-z]+) , \ \ s * ) ? ( \ \ d { 1 , 2 } ) ( \ \ s + | - ) ( [ ^ - \ \ s ] + ) ( \ \ s + | - ) ( \ \ d { 2 , 4 } ) \ \ s + ( \ \ d \ \ d ) : ( \ \ d \ \ d ) ( ? : : ( \ \ d \ \ d ) ) ? \ \ s + ( \ \ S + ) $ " );
QStringList parts ;
if ( ! str . indexOf ( rx ) ) {
// Check that if date has '-' separators, both separators are '-'.
parts = rx . capturedTexts ( ) ;
bool h1 = ( parts [ 3 ] = = QLatin1String ( " - " ) ) ;
bool h2 = ( parts [ 5 ] = = QLatin1String ( " - " ) ) ;
if ( h1 ! = h2 )
return QDateTime : : currentDateTime ( ) ;
} else {
// Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY"
rx = QRegExp ( " ^([A-Z][a-z]+) \\ s+( \\ S+) \\ s+( \\ d \\ d) \\ s+( \\ d \\ d):( \\ d \\ d):( \\ d \\ d) \\ s+( \\ d \\ d \\ d \\ d)$ " ) ;
if ( str . indexOf ( rx ) )
return QDateTime : : currentDateTime ( ) ;
nyear = 7 ;
nmonth = 2 ;
nday = 3 ;
nwday = 1 ;
nhour = 4 ;
nmin = 5 ;
nsec = 6 ;
parts = rx . capturedTexts ( ) ;
}
bool ok [ 4 ] ;
const int day = parts [ nday ] . toInt ( & ok [ 0 ] ) ;
int year = parts [ nyear ] . toInt ( & ok [ 1 ] ) ;
const int hour = parts [ nhour ] . toInt ( & ok [ 2 ] ) ;
const int minute = parts [ nmin ] . toInt ( & ok [ 3 ] ) ;
if ( ! ok [ 0 ] | | ! ok [ 1 ] | | ! ok [ 2 ] | | ! ok [ 3 ] )
return QDateTime : : currentDateTime ( ) ;
int second = 0 ;
if ( ! parts [ nsec ] . isEmpty ( ) ) {
second = parts [ nsec ] . toInt ( & ok [ 0 ] ) ;
if ( ! ok [ 0 ] )
return QDateTime : : currentDateTime ( ) ;
}
bool leapSecond = ( second = = 60 ) ;
if ( leapSecond )
second = 59 ; // apparently a leap second - validate below, once time zone is known
int month = 0 ;
for ( ; month < 12 & & parts [ nmonth ] ! = shortMonth [ month ] ; + + month ) ;
int dayOfWeek = - 1 ;
if ( ! parts [ nwday ] . isEmpty ( ) ) {
// Look up the weekday name
while ( + + dayOfWeek < 7 & & shortDay [ dayOfWeek ] ! = parts [ nwday ] ) ;
if ( dayOfWeek > = 7 )
for ( dayOfWeek = 0 ; dayOfWeek < 7 & & longDay [ dayOfWeek ] ! = parts [ nwday ] ; + + dayOfWeek ) ;
}
// if (month >= 12 || dayOfWeek >= 7
// || (dayOfWeek < 0 && format == RFCDateDay))
// return QDateTime;
int i = parts [ nyear ] . size ( ) ;
if ( i < 4 ) {
// It's an obsolete year specification with less than 4 digits
year + = ( i = = 2 & & year < 50 ) ? 2000 : 1900 ;
}
// Parse the UTC offset part
int offset = 0 ; // set default to '-0000'
bool negOffset = false ;
if ( parts . count ( ) > 10 ) {
rx = QRegExp ( " ^([+-])( \\ d \\ d)( \\ d \\ d)$ " ) ;
if ( ! parts [ 10 ] . indexOf ( rx ) ) {
// It's a UTC offset ±hhmm
parts = rx . capturedTexts ( ) ;
offset = parts [ 2 ] . toInt ( & ok [ 0 ] ) * 3600 ;
int offsetMin = parts [ 3 ] . toInt ( & ok [ 1 ] ) ;
if ( ! ok [ 0 ] | | ! ok [ 1 ] | | offsetMin > 59 )
return QDateTime ( ) ;
offset + = offsetMin * 60 ;
negOffset = ( parts [ 1 ] = = QLatin1String ( " - " ) ) ;
if ( negOffset )
offset = - offset ;
} else {
// Check for an obsolete time zone name
QByteArray zone = parts [ 10 ] . toLatin1 ( ) ;
if ( zone . length ( ) = = 1 & & isalpha ( zone [ 0 ] ) & & toupper ( zone [ 0 ] ) ! = ' J ' )
negOffset = true ; // military zone: RFC 2822 treats as '-0000'
else if ( zone ! = " UT " & & zone ! = " GMT " ) { // treated as '+0000'
offset = ( zone = = " EDT " ) ? - 4 * 3600
: ( zone = = " EST " | | zone = = " CDT " ) ? - 5 * 3600
: ( zone = = " CST " | | zone = = " MDT " ) ? - 6 * 3600
: ( zone = = " MST " | | zone = = " PDT " ) ? - 7 * 3600
: ( zone = = " PST " ) ? - 8 * 3600
: 0 ;
if ( ! offset ) {
// Check for any other alphabetic time zone
bool nonalpha = false ;
for ( int i = 0 , end = zone . size ( ) ; i < end & & ! nonalpha ; + + i )
nonalpha = ! isalpha ( zone [ i ] ) ;
if ( nonalpha )
return QDateTime ( ) ;
// TODO: Attempt to recognize the time zone abbreviation?
negOffset = true ; // unknown time zone: RFC 2822 treats as '-0000'
// Ported to Qt from KDElibs4
QDateTime RssParser : : parseDate ( const QString & string )
{
const QString str = string . trimmed ( ) ;
if ( str . isEmpty ( ) )
return QDateTime : : currentDateTime ( ) ;
int nyear = 6 ; // indexes within string to values
int nmonth = 4 ;
int nday = 2 ;
int nwday = 1 ;
int nhour = 7 ;
int nmin = 8 ;
int nsec = 9 ;
// Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm"
QRegExp rx ( " ^(?:([A-Z][a-z]+) , \ \ s * ) ? ( \ \ d { 1 , 2 } ) ( \ \ s + | - ) ( [ ^ - \ \ s ] + ) ( \ \ s + | - ) ( \ \ d { 2 , 4 } ) \ \ s + ( \ \ d \ \ d ) : ( \ \ d \ \ d ) ( ? : : ( \ \ d \ \ d ) ) ? \ \ s + ( \ \ S + ) $ " );
QStringList parts ;
if ( ! str . indexOf ( rx ) ) {
// Check that if date has '-' separators, both separators are '-'.
parts = rx . capturedTexts ( ) ;
bool h1 = ( parts [ 3 ] = = QLatin1String ( " - " ) ) ;
bool h2 = ( parts [ 5 ] = = QLatin1String ( " - " ) ) ;
if ( h1 ! = h2 )
return QDateTime : : currentDateTime ( ) ;
}
else {
// Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY"
rx = QRegExp ( " ^([A-Z][a-z]+) \\ s+( \\ S+) \\ s+( \\ d \\ d) \\ s+( \\ d \\ d):( \\ d \\ d):( \\ d \\ d) \\ s+( \\ d \\ d \\ d \\ d)$ " ) ;
if ( str . indexOf ( rx ) )
return QDateTime : : currentDateTime ( ) ;
nyear = 7 ;
nmonth = 2 ;
nday = 3 ;
nwday = 1 ;
nhour = 4 ;
nmin = 5 ;
nsec = 6 ;
parts = rx . capturedTexts ( ) ;
}
bool ok [ 4 ] ;
const int day = parts [ nday ] . toInt ( & ok [ 0 ] ) ;
int year = parts [ nyear ] . toInt ( & ok [ 1 ] ) ;
const int hour = parts [ nhour ] . toInt ( & ok [ 2 ] ) ;
const int minute = parts [ nmin ] . toInt ( & ok [ 3 ] ) ;
if ( ! ok [ 0 ] | | ! ok [ 1 ] | | ! ok [ 2 ] | | ! ok [ 3 ] )
return QDateTime : : currentDateTime ( ) ;
int second = 0 ;
if ( ! parts [ nsec ] . isEmpty ( ) ) {
second = parts [ nsec ] . toInt ( & ok [ 0 ] ) ;
if ( ! ok [ 0 ] )
return QDateTime : : currentDateTime ( ) ;
}
bool leapSecond = ( second = = 60 ) ;
if ( leapSecond )
second = 59 ; // apparently a leap second - validate below, once time zone is known
int month = 0 ;
for ( ; ( month < 12 ) & & ( parts [ nmonth ] ! = shortMonth [ month ] ) ; + + month ) ;
int dayOfWeek = - 1 ;
if ( ! parts [ nwday ] . isEmpty ( ) ) {
// Look up the weekday name
while ( + + dayOfWeek < 7 & & ( shortDay [ dayOfWeek ] ! = parts [ nwday ] ) ) ;
if ( dayOfWeek > = 7 )
for ( dayOfWeek = 0 ; dayOfWeek < 7 & & ( longDay [ dayOfWeek ] ! = parts [ nwday ] ) ; + + dayOfWeek ) ;
}
// if (month >= 12 || dayOfWeek >= 7
// || (dayOfWeek < 0 && format == RFCDateDay))
// return QDateTime;
int i = parts [ nyear ] . size ( ) ;
if ( i < 4 ) {
// It's an obsolete year specification with less than 4 digits
year + = ( i = = 2 & & year < 50 ) ? 2000 : 1900 ;
}
// Parse the UTC offset part
int offset = 0 ; // set default to '-0000'
bool negOffset = false ;
if ( parts . count ( ) > 10 ) {
rx = QRegExp ( " ^([+-])( \\ d \\ d)( \\ d \\ d)$ " ) ;
if ( ! parts [ 10 ] . indexOf ( rx ) ) {
// It's a UTC offset ±hhmm
parts = rx . capturedTexts ( ) ;
offset = parts [ 2 ] . toInt ( & ok [ 0 ] ) * 3600 ;
int offsetMin = parts [ 3 ] . toInt ( & ok [ 1 ] ) ;
if ( ! ok [ 0 ] | | ! ok [ 1 ] | | offsetMin > 59 )
return QDateTime ( ) ;
offset + = offsetMin * 60 ;
negOffset = ( parts [ 1 ] = = QLatin1String ( " - " ) ) ;
if ( negOffset )
offset = - offset ;
}
else {
// Check for an obsolete time zone name
QByteArray zone = parts [ 10 ] . toLatin1 ( ) ;
if ( zone . length ( ) = = 1 & & isalpha ( zone [ 0 ] ) & & toupper ( zone [ 0 ] ) ! = ' J ' ) {
negOffset = true ; // military zone: RFC 2822 treats as '-0000'
}
else if ( zone ! = " UT " & & zone ! = " GMT " ) { // treated as '+0000'
offset = ( zone = = " EDT " )
? - 4 * 3600
: ( ( zone = = " EST " ) | | ( zone = = " CDT " ) )
? - 5 * 3600
: ( ( zone = = " CST " ) | | ( zone = = " MDT " ) )
? - 6 * 3600
: ( zone = = " MST " | | zone = = " PDT " )
? - 7 * 3600
: ( zone = = " PST " )
? - 8 * 3600
: 0 ;
if ( ! offset ) {
// Check for any other alphabetic time zone
bool nonalpha = false ;
for ( int i = 0 , end = zone . size ( ) ; ( i < end ) & & ! nonalpha ; + + i )
nonalpha = ! isalpha ( zone [ i ] ) ;
if ( nonalpha )
return QDateTime ( ) ;
// TODO: Attempt to recognize the time zone abbreviation?
negOffset = true ; // unknown time zone: RFC 2822 treats as '-0000'
}
}
}
}
}
}
QDate qdate ( year , month + 1 , day ) ; // convert date, and check for out-of-range
if ( ! qdate . isValid ( ) )
return QDateTime : : currentDateTime ( ) ;
QTime qTime ( hour , minute , second ) ;
QDateTime result ( qdate , qTime , Qt : : UTC ) ;
if ( offset )
result = result . addSecs ( - offset ) ;
if ( ! result . isValid ( ) )
return QDateTime : : currentDateTime ( ) ; // invalid date/time
if ( leapSecond ) {
// Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC.
// Convert the time to UTC and check that it is 00:00:00.
if ( ( hour * 3600 + minute * 60 + 60 - offset + 86400 * 5 ) % 86400 ) // (max abs(offset) is 100 hours)
return QDateTime : : currentDateTime ( ) ; // the time isn't the last second of the day
}
return result ;
QDate qdate ( year , month + 1 , day ) ; // convert date, and check for out-of-range
if ( ! qdate . isValid ( ) )
return QDateTime : : currentDateTime ( ) ;
QTime qTime ( hour , minute , second ) ;
QDateTime result ( qdate , qTime , Qt : : UTC ) ;
if ( offset )
result = result . addSecs ( - offset ) ;
if ( ! result . isValid ( ) )
return QDateTime : : currentDateTime ( ) ; // invalid date/time
if ( leapSecond ) {
// Validate a leap second time. Leap seconds are inserted after 23:59:59 UTC.
// Convert the time to UTC and check that it is 00:00:00.
if ( ( hour * 3600 + minute * 60 + 60 - offset + 86400 * 5 ) % 86400 ) // (max abs(offset) is 100 hours)
return QDateTime : : currentDateTime ( ) ; // the time isn't the last second of the day
}
return result ;
}
RssParser : : RssParser ( QObject * parent ) :
QThread ( parent ) , m_running ( true )
RssParser : : RssParser ( QObject * parent )
: QThread ( parent )
, m_running ( true )
{
start ( ) ;
start ( ) ;
}
RssParser : : ~ RssParser ( )
{
m_running = false ;
m_waitCondition . wakeOne ( ) ;
wait ( ) ;
m_running = false ;
m_waitCondition . wakeOne ( ) ;
wait ( ) ;
}
void RssParser : : parseRssFile ( const QString & feedUrl , const QString & filePath )
void RssParser : : parseRssFile ( const QString & feedUrl , const QString & filePath )
{
qDebug ( ) < < Q_FUNC_INFO < < feedUrl < < filePath ;
m_mutex . lock ( ) ;
ParsingJob job = { feedUrl , Utils : : Fs : : fromNativePath ( filePath ) } ;
m_queue . enqueue ( job ) ;
// Wake up thread.
if ( m_queue . count ( ) = = 1 ) {
qDebug ( ) < < Q_FUNC_INFO < < " Waking up thread " ;
m_waitCondition . wakeOne ( ) ;
}
m_mutex . unlock ( ) ;
qDebug ( ) < < Q_FUNC_INFO < < feedUrl < < filePath ;
m_mutex . lock ( ) ;
ParsingJob job = { feedUrl , Utils : : Fs : : fromNativePath ( filePath ) } ;
m_queue . enqueue ( job ) ;
// Wake up thread.
if ( m_queue . count ( ) = = 1 ) {
qDebug ( ) < < Q_FUNC_INFO < < " Waking up thread " ;
m_waitCondition . wakeOne ( ) ;
}
m_mutex . unlock ( ) ;
}
void RssParser : : clearFeedData ( const QString & feedUrl )
{
m_mutex . lock ( ) ;
m_lastBuildDates . remove ( feedUrl ) ;
m_mutex . unlock ( ) ;
m_mutex . lock ( ) ;
m_lastBuildDates . remove ( feedUrl ) ;
m_mutex . unlock ( ) ;
}
void RssParser : : run ( )
{
while ( m_running ) {
m_mutex . lock ( ) ;
if ( ! m_queue . empty ( ) ) {
ParsingJob job = m_queue . dequeue ( ) ;
m_mutex . unlock ( ) ;
parseFeed ( job ) ;
} else {
qDebug ( ) < < Q_FUNC_INFO < < " Thread is waiting. " ;
m_waitCondition . wait ( & m_mutex ) ;
qDebug ( ) < < Q_FUNC_INFO < < " Thread woke up. " ;
m_mutex . unlock ( ) ;
while ( m_running ) {
m_mutex . lock ( ) ;
if ( ! m_queue . empty ( ) ) {
ParsingJob job = m_queue . dequeue ( ) ;
m_mutex . unlock ( ) ;
parseFeed ( job ) ;
}
else {
qDebug ( ) < < Q_FUNC_INFO < < " Thread is waiting. " ;
m_waitCondition . wait ( & m_mutex ) ;
qDebug ( ) < < Q_FUNC_INFO < < " Thread woke up. " ;
m_mutex . unlock ( ) ;
}
}
}
}
void RssParser : : parseRssArticle ( QXmlStreamReader & xml , const QString & feedUrl )
void RssParser : : parseRssArticle ( QXmlStreamReader & xml , const QString & feedUrl )
{
QVariantHash article ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isEndElement ( ) & & xml . name ( ) = = " item " )
break ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " )
article [ " title " ] = xml . readElementText ( ) . trimmed ( ) ;
else if ( xml . name ( ) = = " enclosure " ) {
if ( xml . attributes ( ) . value ( " type " ) = = " application/x-bittorrent " )
article [ " torrent_url " ] = xml . attributes ( ) . value ( " url " ) . toString ( ) ;
}
else if ( xml . name ( ) = = " link " ) {
QString link = xml . readElementText ( ) . trimmed ( ) ;
if ( link . startsWith ( " magnet: " , Qt : : CaseInsensitive ) )
article [ " torrent_url " ] = link ; // magnet link instead of a news URL
else
article [ " news_link " ] = link ;
}
else if ( xml . name ( ) = = " description " )
article [ " description " ] = xml . readElementText ( ) . trimmed ( ) ;
else if ( xml . name ( ) = = " pubDate " )
article [ " date " ] = parseDate ( xml . readElementText ( ) . trimmed ( ) ) ;
else if ( xml . name ( ) = = " author " )
article [ " author " ] = xml . readElementText ( ) . trimmed ( ) ;
else if ( xml . name ( ) = = " guid " )
article [ " id " ] = xml . readElementText ( ) . trimmed ( ) ;
QVariantHash article ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isEndElement ( ) & & xml . name ( ) = = " item " )
break ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
article [ " title " ] = xml . readElementText ( ) . trimmed ( ) ;
}
else if ( xml . name ( ) = = " enclosure " ) {
if ( xml . attributes ( ) . value ( " type " ) = = " application/x-bittorrent " )
article [ " torrent_url " ] = xml . attributes ( ) . value ( " url " ) . toString ( ) ;
}
else if ( xml . name ( ) = = " link " ) {
QString link = xml . readElementText ( ) . trimmed ( ) ;
if ( link . startsWith ( " magnet: " , Qt : : CaseInsensitive ) )
article [ " torrent_url " ] = link ; // magnet link instead of a news URL
else
article [ " news_link " ] = link ;
}
else if ( xml . name ( ) = = " description " ) {
article [ " description " ] = xml . readElementText ( ) . trimmed ( ) ;
}
else if ( xml . name ( ) = = " pubDate " ) {
article [ " date " ] = parseDate ( xml . readElementText ( ) . trimmed ( ) ) ;
}
else if ( xml . name ( ) = = " author " ) {
article [ " author " ] = xml . readElementText ( ) . trimmed ( ) ;
}
else if ( xml . name ( ) = = " guid " ) {
article [ " id " ] = xml . readElementText ( ) . trimmed ( ) ;
}
}
}
}
if ( ! article . contains ( " torrent_url " ) & & article . contains ( " news_link " ) )
article [ " torrent_url " ] = article [ " news_link " ] ;
if ( ! article . contains ( " torrent_url " ) & & article . contains ( " news_link " ) )
article [ " torrent_url " ] = article [ " news_link " ] ;
if ( ! article . contains ( " id " ) ) {
// Item does not have a guid, fall back to some other identifier
const QString link = article . value ( " news_link " ) . toString ( ) ;
if ( ! link . isEmpty ( ) )
article [ " id " ] = link ;
else {
const QString title = article . value ( " title " ) . toString ( ) ;
if ( ! title . isEmpty ( ) )
article [ " id " ] = title ;
else {
qWarning ( ) < < " Item has no guid, link or title, ignoring it... " ;
return ;
}
if ( ! article . contains ( " id " ) ) {
// Item does not have a guid, fall back to some other identifier
const QString link = article . value ( " news_link " ) . toString ( ) ;
if ( ! link . isEmpty ( ) ) {
article [ " id " ] = link ;
}
else {
const QString title = article . value ( " title " ) . toString ( ) ;
if ( ! title . isEmpty ( ) ) {
article [ " id " ] = title ;
}
else {
qWarning ( ) < < " Item has no guid, link or title, ignoring it... " ;
return ;
}
}
}
}
emit newArticle ( feedUrl , article ) ;
emit newArticle ( feedUrl , article ) ;
}
void RssParser : : parseRSSChannel ( QXmlStreamReader & xml , const QString & feedUrl )
void RssParser : : parseRSSChannel ( QXmlStreamReader & xml , const QString & feedUrl )
{
qDebug ( ) < < Q_FUNC_INFO < < feedUrl ;
Q_ASSERT ( xml . isStartElement ( ) & & xml . name ( ) = = " channel " ) ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
QString title = xml . readElementText ( ) ;
emit feedTitle ( feedUrl , title ) ;
}
else if ( xml . name ( ) = = " lastBuildDate " ) {
QString lastBuildDate = xml . readElementText ( ) ;
if ( ! lastBuildDate . isEmpty ( ) ) {
QMutexLocker locker ( & m_mutex ) ;
if ( m_lastBuildDates . value ( feedUrl , " " ) = = lastBuildDate ) {
qDebug ( ) < < " The RSS feed has not changed since last time, aborting parsing. " ;
return ;
}
m_lastBuildDates [ feedUrl ] = lastBuildDate ;
qDebug ( ) < < Q_FUNC_INFO < < feedUrl ;
Q_ASSERT ( xml . isStartElement ( ) & & xml . name ( ) = = " channel " ) ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
QString title = xml . readElementText ( ) ;
emit feedTitle ( feedUrl , title ) ;
}
else if ( xml . name ( ) = = " lastBuildDate " ) {
QString lastBuildDate = xml . readElementText ( ) ;
if ( ! lastBuildDate . isEmpty ( ) ) {
QMutexLocker locker ( & m_mutex ) ;
if ( m_lastBuildDates . value ( feedUrl , " " ) = = lastBuildDate ) {
qDebug ( ) < < " The RSS feed has not changed since last time, aborting parsing. " ;
return ;
}
m_lastBuildDates [ feedUrl ] = lastBuildDate ;
}
}
else if ( xml . name ( ) = = " item " ) {
parseRssArticle ( xml , feedUrl ) ;
}
}
}
else if ( xml . name ( ) = = " item " ) {
parseRssArticle ( xml , feedUrl ) ;
}
}
}
}
void RssParser : : parseAtomArticle ( QXmlStreamReader & xml , const QString & feedUrl , const QString & baseUrl )
void RssParser : : parseAtomArticle ( QXmlStreamReader & xml , const QString & feedUrl , const QString & baseUrl )
{
QVariantHash article ;
bool double_content = false ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isEndElement ( ) & & xml . name ( ) = = " entry " )
break ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
article [ " title " ] = xml . readElementText ( ) . trimmed ( ) ;
}
else if ( xml . name ( ) = = " link " ) {
QString link = ( xml . attributes ( ) . isEmpty ( ) ?
xml . readElementText ( ) . trimmed ( ) :
xml . attributes ( ) . value ( " href " ) . toString ( ) ) ;
if ( link . startsWith ( " magnet: " , Qt : : CaseInsensitive ) )
article [ " torrent_url " ] = link ; // magnet link instead of a news URL
else
// Atom feeds can have relative links, work around this and
// take the stress of figuring article full URI from UI
// Assemble full URI
article [ " news_link " ] = ( baseUrl . isEmpty ( ) ? link : baseUrl + link ) ;
}
else if ( xml . name ( ) = = " summary " | | xml . name ( ) = = " content " ) {
if ( double_content ) { // Duplicate content -> ignore
xml . readNext ( ) ;
while ( xml . name ( ) ! = " summary " & & xml . name ( ) ! = " content " )
xml . readNext ( ) ;
continue ;
}
QVariantHash article ;
bool doubleContent = false ;
// Try to also parse broken articles, which don't use html '&' escapes
// Actually works great for non-broken content too
QString feedText = xml . readElementText ( QXmlStreamReader : : IncludeChildElements ) ;
if ( ! feedText . isEmpty ( ) )
article [ " description " ] = feedText . trimmed ( ) ;
double_content = true ;
}
else if ( xml . name ( ) = = " updated " ) {
// ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime : : fromString ( xml . readElementText ( ) . trimmed ( ) , Qt : : ISODate ) ;
article [ " date " ] = ( articleDate . isValid ( ) ?
articleDate :
QDateTime : : currentDateTime ( ) ) ;
}
else if ( xml . name ( ) = = " author " ) {
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
while ( xml . name ( ) ! = " author " ) {
if ( xml . name ( ) = = " name " )
article [ " author " ] = xml . readElementText ( ) . trimmed ( ) ;
xml . readNext ( ) ;
if ( xml . isEndElement ( ) & & ( xml . name ( ) = = " entry " ) )
break ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
article [ " title " ] = xml . readElementText ( ) . trimmed ( ) ;
}
else if ( xml . name ( ) = = " link " ) {
QString link = ( xml . attributes ( ) . isEmpty ( ) ?
xml . readElementText ( ) . trimmed ( ) :
xml . attributes ( ) . value ( " href " ) . toString ( ) ) ;
if ( link . startsWith ( " magnet: " , Qt : : CaseInsensitive ) )
article [ " torrent_url " ] = link ; // magnet link instead of a news URL
else
// Atom feeds can have relative links, work around this and
// take the stress of figuring article full URI from UI
// Assemble full URI
article [ " news_link " ] = ( baseUrl . isEmpty ( ) ? link : baseUrl + link ) ;
}
else if ( ( xml . name ( ) = = " summary " ) | | ( xml . name ( ) = = " content " ) ) {
if ( doubleContent ) { // Duplicate content -> ignore
xml . readNext ( ) ;
while ( ( xml . name ( ) ! = " summary " ) & & ( xml . name ( ) ! = " content " ) )
xml . readNext ( ) ;
continue ;
}
// Try to also parse broken articles, which don't use html '&' escapes
// Actually works great for non-broken content too
QString feedText = xml . readElementText ( QXmlStreamReader : : IncludeChildElements ) ;
if ( ! feedText . isEmpty ( ) )
article [ " description " ] = feedText . trimmed ( ) ;
doubleContent = true ;
}
else if ( xml . name ( ) = = " updated " ) {
// ATOM uses standard compliant date, don't do fancy stuff
QDateTime articleDate = QDateTime : : fromString ( xml . readElementText ( ) . trimmed ( ) , Qt : : ISODate ) ;
article [ " date " ] = ( articleDate . isValid ( ) ? articleDate : QDateTime : : currentDateTime ( ) ) ;
}
else if ( xml . name ( ) = = " author " ) {
xml . readNext ( ) ;
while ( xml . name ( ) ! = " author " ) {
if ( xml . name ( ) = = " name " )
article [ " author " ] = xml . readElementText ( ) . trimmed ( ) ;
xml . readNext ( ) ;
}
}
else if ( xml . name ( ) = = " id " ) {
article [ " id " ] = xml . readElementText ( ) . trimmed ( ) ;
}
}
}
else if ( xml . name ( ) = = " id " )
article [ " id " ] = xml . readElementText ( ) . trimmed ( ) ;
}
}
if ( ! article . contains ( " torrent_url " ) & & article . contains ( " news_link " ) )
article [ " torrent_url " ] = article [ " news_link " ] ;
if ( ! article . contains ( " torrent_url " ) & & article . contains ( " news_link " ) )
article [ " torrent_url " ] = article [ " news_link " ] ;
if ( ! article . contains ( " id " ) ) {
// Item does not have a guid, fall back to some other identifier
const QString link = article . value ( " news_link " ) . toString ( ) ;
if ( ! link . isEmpty ( ) )
article [ " id " ] = link ;
else {
const QString title = article . value ( " title " ) . toString ( ) ;
if ( ! title . isEmpty ( ) )
article [ " id " ] = title ;
else {
qWarning ( ) < < " Item has no guid, link or title, ignoring it... " ;
return ;
}
if ( ! article . contains ( " id " ) ) {
// Item does not have a guid, fall back to some other identifier
const QString link = article . value ( " news_link " ) . toString ( ) ;
if ( ! link . isEmpty ( ) ) {
article [ " id " ] = link ;
}
else {
const QString title = article . value ( " title " ) . toString ( ) ;
if ( ! title . isEmpty ( ) ) {
article [ " id " ] = title ;
}
else {
qWarning ( ) < < " Item has no guid, link or title, ignoring it... " ;
return ;
}
}
}
}
emit newArticle ( feedUrl , article ) ;
emit newArticle ( feedUrl , article ) ;
}
void RssParser : : parseAtomChannel ( QXmlStreamReader & xml , const QString & feedUrl )
void RssParser : : parseAtomChannel ( QXmlStreamReader & xml , const QString & feedUrl )
{
qDebug ( ) < < Q_FUNC_INFO < < feedUrl ;
Q_ASSERT ( xml . isStartElement ( ) & & xml . name ( ) = = " feed " ) ;
QString baseURL = xml . attributes ( ) . value ( " xml:base " ) . toString ( ) ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
QString title = xml . readElementText ( ) ;
emit feedTitle ( feedUrl , title ) ;
}
else if ( xml . name ( ) = = " updated " ) {
QString lastBuildDate = xml . readElementText ( ) ;
if ( ! lastBuildDate . isEmpty ( ) ) {
QMutexLocker locker ( & m_mutex ) ;
if ( m_lastBuildDates . value ( feedUrl ) = = lastBuildDate ) {
qDebug ( ) < < " The RSS feed has not changed since last time, aborting parsing. " ;
return ;
}
m_lastBuildDates [ feedUrl ] = lastBuildDate ;
qDebug ( ) < < Q_FUNC_INFO < < feedUrl ;
Q_ASSERT ( xml . isStartElement ( ) & & xml . name ( ) = = " feed " ) ;
QString baseURL = xml . attributes ( ) . value ( " xml:base " ) . toString ( ) ;
while ( ! xml . atEnd ( ) ) {
xml . readNext ( ) ;
if ( xml . isStartElement ( ) ) {
if ( xml . name ( ) = = " title " ) {
QString title = xml . readElementText ( ) ;
emit feedTitle ( feedUrl , title ) ;
}
else if ( xml . name ( ) = = " updated " ) {
QString lastBuildDate = xml . readElementText ( ) ;
if ( ! lastBuildDate . isEmpty ( ) ) {
QMutexLocker locker ( & m_mutex ) ;
if ( m_lastBuildDates . value ( feedUrl ) = = lastBuildDate ) {
qDebug ( ) < < " The RSS feed has not changed since last time, aborting parsing. " ;
return ;
}
m_lastBuildDates [ feedUrl ] = lastBuildDate ;
}
}
else if ( xml . name ( ) = = " entry " ) {
parseAtomArticle ( xml , feedUrl , baseURL ) ;
}
}
}
else if ( xml . name ( ) = = " entry " ) {
parseAtomArticle ( xml , feedUrl , baseURL ) ;
}
}
}
}
// read and create items from a rss document
void RssParser : : parseFeed ( const ParsingJob & job )
void RssParser : : parseFeed ( const ParsingJob & job )
{
qDebug ( ) < < Q_FUNC_INFO < < job . feedUrl < < job . filePath ;
QFile fileRss ( job . filePath ) ;
if ( ! fileRss . open ( QIODevice : : ReadOnly | QIODevice : : Text ) ) {
reportFailure ( job , tr ( " Failed to open downloaded RSS file. " ) ) ;
return ;
}
QXmlStreamReader xml ( & fileRss ) ;
bool found_channel = false ;
while ( xml . readNextStartElement ( ) ) {
if ( xml . name ( ) = = " rss " ) {
// Find channels
while ( xml . readNextStartElement ( ) ) {
if ( xml . name ( ) = = " channel " ) {
parseRSSChannel ( xml , job . feedUrl ) ;
found_channel = true ;
break ;
} else {
qDebug ( ) < < " Skip rss item: " < < xml . name ( ) ;
xml . skipCurrentElement ( ) ;
qDebug ( ) < < Q_FUNC_INFO < < job . feedUrl < < job . filePath ;
QFile fileRss ( job . filePath ) ;
if ( ! fileRss . open ( QIODevice : : ReadOnly | QIODevice : : Text ) ) {
reportFailure ( job , tr ( " Failed to open downloaded RSS file. " ) ) ;
return ;
}
QXmlStreamReader xml ( & fileRss ) ;
bool foundChannel = false ;
while ( xml . readNextStartElement ( ) ) {
if ( xml . name ( ) = = " rss " ) {
// Find channels
while ( xml . readNextStartElement ( ) ) {
if ( xml . name ( ) = = " channel " ) {
parseRSSChannel ( xml , job . feedUrl ) ;
foundChannel = true ;
break ;
}
else {
qDebug ( ) < < " Skip rss item: " < < xml . name ( ) ;
xml . skipCurrentElement ( ) ;
}
}
break ;
}
else if ( xml . name ( ) = = " feed " ) { // Atom feed
parseAtomChannel ( xml , job . feedUrl ) ;
foundChannel = true ;
break ;
}
else {
qDebug ( ) < < " Skip root item: " < < xml . name ( ) ;
xml . skipCurrentElement ( ) ;
}
}
break ;
}
else if ( xml . name ( ) = = " feed " ) { // Atom feed
parseAtomChannel ( xml , job . feedUrl ) ;
found_channel = true ;
break ;
} else {
qDebug ( ) < < " Skip root item: " < < xml . name ( ) ;
xml . skipCurrentElement ( ) ;
if ( xml . hasError ( ) ) {
reportFailure ( job , xml . errorString ( ) ) ;
return ;
}
}
if ( xml . hasError ( ) ) {
reportFailure ( job , xml . errorString ( ) ) ;
return ;
}
if ( ! found_channel ) {
reportFailure ( job , tr ( " Invalid RSS feed at '%1'. " ) . arg ( job . feedUrl ) ) ;
return ;
}
// Clean up
fileRss . close ( ) ;
emit feedParsingFinished ( job . feedUrl , QString ( ) ) ;
Utils : : Fs : : forceRemove ( job . filePath ) ;
if ( ! foundChannel ) {
reportFailure ( job , tr ( " Invalid RSS feed at '%1'. " ) . arg ( job . feedUrl ) ) ;
return ;
}
// Clean up
fileRss . close ( ) ;
emit feedParsingFinished ( job . feedUrl , QString ( ) ) ;
Utils : : Fs : : forceRemove ( job . filePath ) ;
}
void RssParser : : reportFailure ( const ParsingJob & job , const QString & error )
void RssParser : : reportFailure ( const ParsingJob & job , const QString & error )
{
emit feedParsingFinished ( job . feedUrl , error ) ;
Utils : : Fs : : forceRemove ( job . filePath ) ;
emit feedParsingFinished ( job . feedUrl , error ) ;
Utils : : Fs : : forceRemove ( job . filePath ) ;
}