From f9abd254f48de58b2f6e057ea5af870ab8eff1b6 Mon Sep 17 00:00:00 2001 From: Tim Delaney Date: Sat, 11 Feb 2017 16:33:18 +1100 Subject: [PATCH 1/2] Use Perl-compatible regexes for RSS rules. Closes #6367. --HG-- branch : magao-dev --- src/base/rss/rssdownloadrule.cpp | 63 ++++++++++++++------------ src/base/rss/rssdownloadrule.h | 6 ++- src/base/utils/string.cpp | 11 +++++ src/base/utils/string.h | 2 + src/gui/rss/automatedrssdownloader.cpp | 37 +++++++++++---- src/gui/rss/automatedrssdownloader.h | 4 +- 6 files changed, 81 insertions(+), 42 deletions(-) diff --git a/src/base/rss/rssdownloadrule.cpp b/src/base/rss/rssdownloadrule.cpp index eef217f0d..5f897a3d7 100644 --- a/src/base/rss/rssdownloadrule.cpp +++ b/src/base/rss/rssdownloadrule.cpp @@ -28,14 +28,16 @@ * Contact : chris@qbittorrent.org */ -#include #include #include +#include +#include #include #include #include "base/preferences.h" #include "base/utils/fs.h" +#include "base/utils/string.h" #include "rssfeed.h" #include "rssarticle.h" #include "rssdownloadrule.h" @@ -52,23 +54,23 @@ DownloadRule::DownloadRule() bool DownloadRule::matches(const QString &articleTitle, const QString &expression) const { - static QRegExp whitespace("\\s+"); + static QRegularExpression whitespace("\\s+"); if (expression.isEmpty()) { // A regex of the form "expr|" will always match, so do the same for wildcards return true; } else if (m_useRegex) { - QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp); - return reg.indexIn(articleTitle) > -1; + QRegularExpression reg(expression, QRegularExpression::CaseInsensitiveOption); + return reg.match(articleTitle).hasMatch(); } else { // Only match if every wildcard token (separated by spaces) is present in the article name. // Order of wildcard tokens is unimportant (if order is important, they should have used *). foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) { - QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard); + QRegularExpression reg(Utils::String::wildcardToRegex(wildcard), QRegularExpression::CaseInsensitiveOption); - if (reg.indexIn(articleTitle) == -1) + if (!reg.match(articleTitle).hasMatch()) return false; } } @@ -124,14 +126,15 @@ bool DownloadRule::matches(const QString &articleTitle) const if (!m_episodeFilter.isEmpty()) { qDebug() << "Checking episode filter:" << m_episodeFilter; - QRegExp f("(^\\d{1,4})x(.*;$)"); - int pos = f.indexIn(m_episodeFilter); + QRegularExpression f("(^\\d{1,4})x(.*;$)"); + QRegularExpressionMatch matcher = f.match(m_episodeFilter); + bool matched = matcher.hasMatch(); - if (pos < 0) + if (!matched) return false; - QString s = f.cap(1); - QStringList eps = f.cap(2).split(";"); + QString s = matcher.captured(1); + QStringList eps = matcher.captured(2).split(";"); int sOurs = s.toInt(); foreach (QString ep, eps) { @@ -145,22 +148,24 @@ bool DownloadRule::matches(const QString &articleTitle) const if (ep.indexOf('-') != -1) { // Range detected QString partialPattern1 = "\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)"; QString partialPattern2 = "\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)"; - QRegExp reg(partialPattern1, Qt::CaseInsensitive); + QRegularExpression reg(partialPattern1, QRegularExpression::CaseInsensitiveOption); if (ep.endsWith('-')) { // Infinite range int epOurs = ep.left(ep.size() - 1).toInt(); // Extract partial match from article and compare as digits - pos = reg.indexIn(articleTitle); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); - if (pos == -1) { - reg = QRegExp(partialPattern2, Qt::CaseInsensitive); - pos = reg.indexIn(articleTitle); + if (!matched) { + reg = QRegularExpression(partialPattern2, QRegularExpression::CaseInsensitiveOption); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); } - if (pos != -1) { - int sTheirs = reg.cap(1).toInt(); - int epTheirs = reg.cap(2).toInt(); + if (matched) { + int sTheirs = matcher.captured(1).toInt(); + int epTheirs = matcher.captured(2).toInt(); if (((sTheirs == sOurs) && (epTheirs >= epOurs)) || (sTheirs > sOurs)) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; @@ -178,16 +183,18 @@ bool DownloadRule::matches(const QString &articleTitle) const int epOursLast = range.last().toInt(); // Extract partial match from article and compare as digits - pos = reg.indexIn(articleTitle); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); - if (pos == -1) { - reg = QRegExp(partialPattern2, Qt::CaseInsensitive); - pos = reg.indexIn(articleTitle); + if (!matched) { + reg = QRegularExpression(partialPattern2, QRegularExpression::CaseInsensitiveOption); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); } - if (pos != -1) { - int sTheirs = reg.cap(1).toInt(); - int epTheirs = reg.cap(2).toInt(); + if (matched) { + int sTheirs = matcher.captured(1).toInt(); + int epTheirs = matcher.captured(2).toInt(); if ((sTheirs == sOurs) && ((epOursFirst <= epTheirs) && (epOursLast >= epTheirs))) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; @@ -198,8 +205,8 @@ bool DownloadRule::matches(const QString &articleTitle) const } else { // Single number QString expStr("\\b(?:s0?" + s + "[ -_\\.]?" + "e0?" + ep + "|" + s + "x" + "0?" + ep + ")(?:\\D|\\b)"); - QRegExp reg(expStr, Qt::CaseInsensitive); - if (reg.indexIn(articleTitle) != -1) { + QRegularExpression reg(expStr, QRegularExpression::CaseInsensitiveOption); + if (reg.match(articleTitle).hasMatch()) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; return true; diff --git a/src/base/rss/rssdownloadrule.h b/src/base/rss/rssdownloadrule.h index ac728f190..6b64943be 100644 --- a/src/base/rss/rssdownloadrule.h +++ b/src/base/rss/rssdownloadrule.h @@ -31,10 +31,12 @@ #ifndef RSSDOWNLOADRULE_H #define RSSDOWNLOADRULE_H -#include +#include #include #include -#include +#include + +class QRegularExpression; namespace Rss { diff --git a/src/base/utils/string.cpp b/src/base/utils/string.cpp index 51e4d62ac..fad3cc0ed 100644 --- a/src/base/utils/string.cpp +++ b/src/base/utils/string.cpp @@ -37,6 +37,7 @@ #ifdef QBT_USES_QT5 #include #endif +#include #ifdef Q_OS_MAC #include #endif @@ -211,3 +212,13 @@ bool Utils::String::slowEquals(const QByteArray &a, const QByteArray &b) return (diff == 0); } + +// This is marked as internal in QRegExp.cpp, but is exported. The alternative would be to +// copy the code from QRegExp::wc2rx(). +QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax); + +QString Utils::String::wildcardToRegex(const QString &pattern) +{ + return qt_regexp_toCanonical(pattern, QRegExp::Wildcard); +} + diff --git a/src/base/utils/string.h b/src/base/utils/string.h index 8ca2f2e45..44661d814 100644 --- a/src/base/utils/string.h +++ b/src/base/utils/string.h @@ -49,6 +49,8 @@ namespace Utils bool naturalCompareCaseSensitive(const QString &left, const QString &right); bool naturalCompareCaseInsensitive(const QString &left, const QString &right); + + QString wildcardToRegex(const QString &pattern); } } diff --git a/src/gui/rss/automatedrssdownloader.cpp b/src/gui/rss/automatedrssdownloader.cpp index cf6bf0dd2..30d13091e 100644 --- a/src/gui/rss/automatedrssdownloader.cpp +++ b/src/gui/rss/automatedrssdownloader.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include "base/bittorrent/session.h" #include "base/preferences.h" @@ -73,8 +74,8 @@ AutomatedRssDownloader::AutomatedRssDownloader(const QWeakPointer Q_ASSERT(ok); m_ruleList = manager.toStrongRef()->downloadRules(); m_editableRuleList = new Rss::DownloadRuleList; // Read rule list from disk - m_episodeRegex = new QRegExp("^(^\\d{1,4}x(\\d{1,4}(-(\\d{1,4})?)?;){1,}){1,1}", - Qt::CaseInsensitive); + m_episodeRegex = new QRegularExpression("^(^\\d{1,4}x(\\d{1,4}(-(\\d{1,4})?)?;){1,}){1,1}", + QRegularExpression::CaseInsensitiveOption); QString tip = "

" + tr("Matches articles based on episode filter.") + "

" + tr("Example: ") + "1x2;8-15;5;30-;" + tr(" will match 2, 5, 8 through 15, 30 and onward episodes of season one", "example X will match") + "

"; tip += "

" + tr("Episode filter rules: ") + "

  • " + tr("Season number is a mandatory non-zero value") + "
  • " @@ -672,7 +673,7 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex) { QString tip; if (regex) { - tip = "

    " + tr("Regex mode: use Perl-like regular expressions") + "

    "; + tip = "

    " + tr("Regex mode: use Perl-compatible regular expressions") + "

    "; } else { tip = "

    " + tr("Wildcard mode: you can use") + "

      " @@ -698,17 +699,23 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex) void AutomatedRssDownloader::updateMustLineValidity() { const QString text = ui->lineContains->text(); + bool isRegex = ui->checkRegex->isChecked(); bool valid = true; + QString error; if (!text.isEmpty()) { QStringList tokens; - if (ui->checkRegex->isChecked()) + if (isRegex) tokens << text; else - tokens << text.split("|"); + foreach (const QString &token, text.split("|")) + tokens << Utils::String::wildcardToRegex(token); + foreach (const QString &token, tokens) { - QRegExp reg(token, Qt::CaseInsensitive, ui->checkRegex->isChecked() ? QRegExp::RegExp : QRegExp::Wildcard); + QRegularExpression reg(token, QRegularExpression::CaseInsensitiveOption); if (!reg.isValid()) { + if (isRegex) + error = tr("Position %1: %2").arg(reg.patternErrorOffset()).arg(reg.errorString()); valid = false; break; } @@ -718,27 +725,35 @@ void AutomatedRssDownloader::updateMustLineValidity() if (valid) { ui->lineContains->setStyleSheet(""); ui->lbl_must_stat->setPixmap(QPixmap()); + ui->lbl_must_stat->setToolTip(QLatin1String("")); } else { ui->lineContains->setStyleSheet("QLineEdit { color: #ff0000; }"); ui->lbl_must_stat->setPixmap(GuiIconProvider::instance()->getIcon("task-attention").pixmap(16, 16)); + ui->lbl_must_stat->setToolTip(error); } } void AutomatedRssDownloader::updateMustNotLineValidity() { const QString text = ui->lineNotContains->text(); + bool isRegex = ui->checkRegex->isChecked(); bool valid = true; + QString error; if (!text.isEmpty()) { QStringList tokens; - if (ui->checkRegex->isChecked()) + if (isRegex) tokens << text; else - tokens << text.split("|"); + foreach (const QString &token, text.split("|")) + tokens << Utils::String::wildcardToRegex(token); + foreach (const QString &token, tokens) { - QRegExp reg(token, Qt::CaseInsensitive, ui->checkRegex->isChecked() ? QRegExp::RegExp : QRegExp::Wildcard); + QRegularExpression reg(token, QRegularExpression::CaseInsensitiveOption); if (!reg.isValid()) { + if (isRegex) + error = tr("Position %1: %2").arg(reg.patternErrorOffset()).arg(reg.errorString()); valid = false; break; } @@ -748,17 +763,19 @@ void AutomatedRssDownloader::updateMustNotLineValidity() if (valid) { ui->lineNotContains->setStyleSheet(""); ui->lbl_mustnot_stat->setPixmap(QPixmap()); + ui->lbl_mustnot_stat->setToolTip(QLatin1String("")); } else { ui->lineNotContains->setStyleSheet("QLineEdit { color: #ff0000; }"); ui->lbl_mustnot_stat->setPixmap(GuiIconProvider::instance()->getIcon("task-attention").pixmap(16, 16)); + ui->lbl_mustnot_stat->setToolTip(error); } } void AutomatedRssDownloader::updateEpisodeFilterValidity() { const QString text = ui->lineEFilter->text(); - bool valid = text.isEmpty() || m_episodeRegex->indexIn(text) != -1; + bool valid = text.isEmpty() || m_episodeRegex->match(text).hasMatch(); if (valid) { ui->lineEFilter->setStyleSheet(""); diff --git a/src/gui/rss/automatedrssdownloader.h b/src/gui/rss/automatedrssdownloader.h index 3e1f68f2c..6c124bad0 100644 --- a/src/gui/rss/automatedrssdownloader.h +++ b/src/gui/rss/automatedrssdownloader.h @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -58,6 +57,7 @@ namespace Rss QT_BEGIN_NAMESPACE class QListWidgetItem; +class QRegularExpression; QT_END_NAMESPACE class AutomatedRssDownloader: public QDialog @@ -113,7 +113,7 @@ private: QListWidgetItem *m_editedRule; Rss::DownloadRuleList *m_ruleList; Rss::DownloadRuleList *m_editableRuleList; - QRegExp *m_episodeRegex; + QRegularExpression *m_episodeRegex; QShortcut *editHotkey; QShortcut *deleteHotkey; QSet> m_treeListEntries; From a844ccb06a83d3386ac3abc2e6ddd76e3a4eb2a2 Mon Sep 17 00:00:00 2001 From: Tim Delaney Date: Sat, 11 Feb 2017 18:43:50 +1100 Subject: [PATCH 2/2] Cache rule regular expressions for performance --HG-- branch : magao-dev --- src/base/rss/rssdownloadrule.cpp | 41 ++++++++++++++++++++++++++------ src/base/rss/rssdownloadrule.h | 4 ++++ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/base/rss/rssdownloadrule.cpp b/src/base/rss/rssdownloadrule.cpp index 5f897a3d7..afa054ea2 100644 --- a/src/base/rss/rssdownloadrule.cpp +++ b/src/base/rss/rssdownloadrule.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -49,9 +50,29 @@ DownloadRule::DownloadRule() , m_useRegex(false) , m_apstate(USE_GLOBAL) , m_ignoreDays(0) + , m_cachedRegexes(new QHash) { } +DownloadRule::~DownloadRule() +{ + delete m_cachedRegexes; +} + +QRegularExpression DownloadRule::getRegex(const QString &expression, bool isRegex) const +{ + // Use a cache of regexes so we don't have to continually recompile - big performance increase. + // The cache is cleared whenever the regex/wildcard, must or must not contain fields or + // episode filter are modified. + Q_ASSERT(!expression.isEmpty()); + QRegularExpression regex((*m_cachedRegexes)[expression]); + + if (!regex.pattern().isEmpty()) + return regex; + + return (*m_cachedRegexes)[expression] = QRegularExpression(isRegex ? expression : Utils::String::wildcardToRegex(expression), QRegularExpression::CaseInsensitiveOption); +} + bool DownloadRule::matches(const QString &articleTitle, const QString &expression) const { static QRegularExpression whitespace("\\s+"); @@ -61,14 +82,14 @@ bool DownloadRule::matches(const QString &articleTitle, const QString &expressio return true; } else if (m_useRegex) { - QRegularExpression reg(expression, QRegularExpression::CaseInsensitiveOption); + QRegularExpression reg(getRegex(expression)); return reg.match(articleTitle).hasMatch(); } else { // Only match if every wildcard token (separated by spaces) is present in the article name. // Order of wildcard tokens is unimportant (if order is important, they should have used *). foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) { - QRegularExpression reg(Utils::String::wildcardToRegex(wildcard), QRegularExpression::CaseInsensitiveOption); + QRegularExpression reg(getRegex(wildcard, false)); if (!reg.match(articleTitle).hasMatch()) return false; @@ -126,7 +147,7 @@ bool DownloadRule::matches(const QString &articleTitle) const if (!m_episodeFilter.isEmpty()) { qDebug() << "Checking episode filter:" << m_episodeFilter; - QRegularExpression f("(^\\d{1,4})x(.*;$)"); + QRegularExpression f(getRegex("(^\\d{1,4})x(.*;$)")); QRegularExpressionMatch matcher = f.match(m_episodeFilter); bool matched = matcher.hasMatch(); @@ -148,7 +169,7 @@ bool DownloadRule::matches(const QString &articleTitle) const if (ep.indexOf('-') != -1) { // Range detected QString partialPattern1 = "\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)"; QString partialPattern2 = "\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)"; - QRegularExpression reg(partialPattern1, QRegularExpression::CaseInsensitiveOption); + QRegularExpression reg(getRegex(partialPattern1)); if (ep.endsWith('-')) { // Infinite range int epOurs = ep.left(ep.size() - 1).toInt(); @@ -158,7 +179,7 @@ bool DownloadRule::matches(const QString &articleTitle) const matched = matcher.hasMatch(); if (!matched) { - reg = QRegularExpression(partialPattern2, QRegularExpression::CaseInsensitiveOption); + reg = QRegularExpression(getRegex(partialPattern2)); matcher = reg.match(articleTitle); matched = matcher.hasMatch(); } @@ -187,7 +208,7 @@ bool DownloadRule::matches(const QString &articleTitle) const matched = matcher.hasMatch(); if (!matched) { - reg = QRegularExpression(partialPattern2, QRegularExpression::CaseInsensitiveOption); + reg = QRegularExpression(getRegex(partialPattern2)); matcher = reg.match(articleTitle); matched = matcher.hasMatch(); } @@ -205,7 +226,7 @@ bool DownloadRule::matches(const QString &articleTitle) const } else { // Single number QString expStr("\\b(?:s0?" + s + "[ -_\\.]?" + "e0?" + ep + "|" + s + "x" + "0?" + ep + ")(?:\\D|\\b)"); - QRegularExpression reg(expStr, QRegularExpression::CaseInsensitiveOption); + QRegularExpression reg(getRegex(expStr)); if (reg.match(articleTitle).hasMatch()) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; @@ -223,6 +244,8 @@ bool DownloadRule::matches(const QString &articleTitle) const void DownloadRule::setMustContain(const QString &tokens) { + m_cachedRegexes->clear(); + if (m_useRegex) m_mustContain = QStringList() << tokens; else @@ -235,6 +258,8 @@ void DownloadRule::setMustContain(const QString &tokens) void DownloadRule::setMustNotContain(const QString &tokens) { + m_cachedRegexes->clear(); + if (m_useRegex) m_mustNotContain = QStringList() << tokens; else @@ -384,6 +409,7 @@ bool DownloadRule::useRegex() const void DownloadRule::setUseRegex(bool enabled) { m_useRegex = enabled; + m_cachedRegexes->clear(); } QString DownloadRule::episodeFilter() const @@ -394,6 +420,7 @@ QString DownloadRule::episodeFilter() const void DownloadRule::setEpisodeFilter(const QString &e) { m_episodeFilter = e; + m_cachedRegexes->clear(); } QStringList DownloadRule::findMatchingArticles(const FeedPtr &feed) const diff --git a/src/base/rss/rssdownloadrule.h b/src/base/rss/rssdownloadrule.h index 6b64943be..7c7b1a053 100644 --- a/src/base/rss/rssdownloadrule.h +++ b/src/base/rss/rssdownloadrule.h @@ -36,6 +36,7 @@ #include #include +template class QHash; class QRegularExpression; namespace Rss @@ -57,6 +58,7 @@ namespace Rss }; DownloadRule(); + ~DownloadRule(); static DownloadRulePtr fromVariantHash(const QVariantHash &ruleHash); QVariantHash toVariantHash() const; @@ -91,6 +93,7 @@ namespace Rss private: bool matches(const QString &articleTitle, const QString &expression) const; + QRegularExpression getRegex(const QString &expression, bool isRegex = true) const; QString m_name; QStringList m_mustContain; @@ -104,6 +107,7 @@ namespace Rss AddPausedState m_apstate; QDateTime m_lastMatch; int m_ignoreDays; + mutable QHash *m_cachedRegexes; }; }