Browse Source

Fix regex RSS matching. Closes #6337.

--HG--
branch : magao-dev
adaptive-webui-19844
Tim Delaney 8 years ago
parent
commit
c0339d4f6a
  1. 99
      src/base/rss/rssdownloadrule.cpp
  2. 2
      src/base/rss/rssdownloadrule.h
  3. 15
      src/gui/rss/automatedrssdownloader.cpp

99
src/base/rss/rssdownloadrule.cpp

@ -50,48 +50,51 @@ DownloadRule::DownloadRule() @@ -50,48 +50,51 @@ DownloadRule::DownloadRule()
{
}
bool DownloadRule::matches(const QString &articleTitle) const
bool DownloadRule::matches(const QString &articleTitle, const QString &expression) const
{
QRegExp whitespace("\\s+");
static QRegExp whitespace("\\s+");
if (expression.isEmpty()) {
// A regex of the form "expr|" will always match, so do the same for wildcards
return true;
}
else if (m_useRegex) {
QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp);
return reg.indexIn(articleTitle) > -1;
}
else {
// Only match if every wildcard token (separated by spaces) is present in the article name.
// Order of wildcard tokens is unimportant (if order is important, they should have used *).
foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) {
QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard);
if (reg.indexIn(articleTitle) == -1)
return false;
}
}
return true;
}
bool DownloadRule::matches(const QString &articleTitle) const
{
if (!m_mustContain.empty()) {
bool logged = false;
bool foundMustContain = true;
bool foundMustContain = false;
// Each expression is either a regex, or a set of wildcards separated by whitespace.
// Accept if any complete expression matches.
foreach (const QString &expression, m_mustContain) {
if (expression.isEmpty())
continue;
if (!logged) {
qDebug() << "Checking matching expressions:" << m_mustContain.join("|");
qDebug() << "Checking matching" << (m_useRegex ? "regex:" : "wildcard expressions:") << m_mustContain.join("|");
logged = true;
}
if (m_useRegex) {
QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp);
if (reg.indexIn(articleTitle) > -1)
foundMustContain = true;
}
else {
// Only accept if every wildcard token (separated by spaces) is present in the article name.
// Order of wildcard tokens is unimportant (if order is important, they should have used *).
foundMustContain = true;
foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) {
QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard);
if (reg.indexIn(articleTitle) == -1) {
foundMustContain = false;
break;
}
}
}
// A regex of the form "expr|" will always match, so do the same for wildcards
foundMustContain = matches(articleTitle, expression);
if (foundMustContain) {
qDebug() << "Found matching expression:" << expression;
qDebug() << "Found matching" << (m_useRegex ? "regex:" : "wildcard expression:") << expression;
break;
}
}
@ -106,38 +109,14 @@ bool DownloadRule::matches(const QString &articleTitle) const @@ -106,38 +109,14 @@ bool DownloadRule::matches(const QString &articleTitle) const
// Each expression is either a regex, or a set of wildcards separated by whitespace.
// Reject if any complete expression matches.
foreach (const QString &expression, m_mustNotContain) {
if (expression.isEmpty())
continue;
if (!logged) {
qDebug() << "Checking not matching expressions:" << m_mustNotContain.join("|");
qDebug() << "Checking not matching" << (m_useRegex ? "regex:" : "wildcard expressions:") << m_mustNotContain.join("|");
logged = true;
}
if (m_useRegex) {
QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp);
if (reg.indexIn(articleTitle) > -1) {
qDebug() << "Found not matching expression:" << expression;
return false;
}
}
// Only reject if every wildcard token (separated by spaces) is present in the article name.
// Order of wildcard tokens is unimportant (if order is important, they should have used *).
bool foundMustNotContain = true;
foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) {
QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard);
if (reg.indexIn(articleTitle) == -1) {
foundMustNotContain = false;
break;
}
}
if (foundMustNotContain) {
qDebug()<< "Found not matching expression:" << expression;
// A regex of the form "expr|" will always match, so do the same for wildcards
if (matches(articleTitle, expression)) {
qDebug() << "Found not matching" << (m_useRegex ? "regex:" : "wildcard expression:") << expression;
return false;
}
}
@ -241,6 +220,10 @@ void DownloadRule::setMustContain(const QString &tokens) @@ -241,6 +220,10 @@ void DownloadRule::setMustContain(const QString &tokens)
m_mustContain = QStringList() << tokens;
else
m_mustContain = tokens.split("|");
// Check for single empty string - if so, no condition
if ((m_mustContain.size() == 1) && m_mustContain[0].isEmpty())
m_mustContain.clear();
}
void DownloadRule::setMustNotContain(const QString &tokens)
@ -249,6 +232,10 @@ void DownloadRule::setMustNotContain(const QString &tokens) @@ -249,6 +232,10 @@ void DownloadRule::setMustNotContain(const QString &tokens)
m_mustNotContain = QStringList() << tokens;
else
m_mustNotContain = tokens.split("|");
// Check for single empty string - if so, no condition
if ((m_mustNotContain.size() == 1) && m_mustNotContain[0].isEmpty())
m_mustNotContain.clear();
}
QStringList DownloadRule::rssFeeds() const

2
src/base/rss/rssdownloadrule.h

@ -88,6 +88,8 @@ namespace Rss @@ -88,6 +88,8 @@ namespace Rss
bool operator==(const DownloadRule &other) const;
private:
bool matches(const QString &articleTitle, const QString &expression) const;
QString m_name;
QStringList m_mustContain;
QStringList m_mustNotContain;

15
src/gui/rss/automatedrssdownloader.cpp

@ -673,8 +673,6 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex) @@ -673,8 +673,6 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex)
QString tip;
if (regex) {
tip = "<p>" + tr("Regex mode: use Perl-like regular expressions") + "</p>";
ui->lineContains->setToolTip(tip);
ui->lineNotContains->setToolTip(tip);
}
else {
tip = "<p>" + tr("Wildcard mode: you can use") + "<ul>"
@ -683,9 +681,18 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex) @@ -683,9 +681,18 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex)
+ "<li>" + tr("Whitespaces count as AND operators (all words, any order)") + "</li>"
+ "<li>" + tr("| is used as OR operator") + "</li></ul></p>"
+ "<p>" + tr("If word order is important use * instead of whitespace.") + "</p>";
ui->lineContains->setToolTip(tip);
ui->lineNotContains->setToolTip(tip);
}
// Whether regex or wildcard, warn about a potential gotcha for users.
// Explanatory string broken over multiple lines for readability (and multiple
// statements to prevent uncrustify indenting excessively.
tip += "<p>";
tip += tr("An expression with an empty %1 clause (e.g. %2)",
"We talk about regex/wildcards in the RSS filters section here."
" So a valid sentence would be: An expression with an empty | clause (e.g. expr|)"
).arg("<tt>|</tt>").arg("<tt>expr|</tt>");
ui->lineContains->setToolTip(tip + tr(" will match all articles.") + "</p>");
ui->lineNotContains->setToolTip(tip + tr(" will exclude all articles.") + "</p>");
}
void AutomatedRssDownloader::updateMustLineValidity()

Loading…
Cancel
Save