From 256f6954c548a827447eaf26d4972188dd09a4b4 Mon Sep 17 00:00:00 2001 From: "Vladimir Golovnev (Glassez)" Date: Thu, 17 May 2018 21:34:50 +0300 Subject: [PATCH 1/3] Improve code of RSS auto-downloading rule Closes #8933. --- src/base/rss/rss_autodownloadrule.cpp | 267 ++++++++++++-------------- src/base/rss/rss_autodownloadrule.h | 6 +- 2 files changed, 128 insertions(+), 145 deletions(-) diff --git a/src/base/rss/rss_autodownloadrule.cpp b/src/base/rss/rss_autodownloadrule.cpp index 2c5530695..e6755dd18 100644 --- a/src/base/rss/rss_autodownloadrule.cpp +++ b/src/base/rss/rss_autodownloadrule.cpp @@ -39,6 +39,7 @@ #include #include +#include "../global.h" #include "../preferences.h" #include "../tristatebool.h" #include "../utils/fs.h" @@ -192,197 +193,175 @@ QRegularExpression AutoDownloadRule::cachedRegex(const QString &expression, bool // The cache is cleared whenever the regex/wildcard, must or must not contain fields or // episode filter are modified. Q_ASSERT(!expression.isEmpty()); - QRegularExpression regex(m_dataPtr->cachedRegexes[expression]); - if (!regex.pattern().isEmpty()) - return regex; + QRegularExpression ®ex = m_dataPtr->cachedRegexes[expression]; + if (regex.pattern().isEmpty()) { + regex = QRegularExpression { + (isRegex ? expression : Utils::String::wildcardToRegex(expression)) + , QRegularExpression::CaseInsensitiveOption}; + } - return m_dataPtr->cachedRegexes[expression] = QRegularExpression(isRegex ? expression : Utils::String::wildcardToRegex(expression), QRegularExpression::CaseInsensitiveOption); + return regex; } -bool AutoDownloadRule::matches(const QString &articleTitle, const QString &expression) const +bool AutoDownloadRule::matchesExpression(const QString &articleTitle, const QString &expression) const { - static QRegularExpression whitespace("\\s+"); + const QRegularExpression whitespace {"\\s+"}; if (expression.isEmpty()) { // A regex of the form "expr|" will always match, so do the same for wildcards return true; } - else if (m_dataPtr->useRegex) { + + if (m_dataPtr->useRegex) { QRegularExpression reg(cachedRegex(expression)); return reg.match(articleTitle).hasMatch(); } - else { - // Only match if every wildcard token (separated by spaces) is present in the article name. - // Order of wildcard tokens is unimportant (if order is important, they should have used *). - foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) { - QRegularExpression reg(cachedRegex(wildcard, false)); - if (!reg.match(articleTitle).hasMatch()) - return false; - } + // Only match if every wildcard token (separated by spaces) is present in the article name. + // Order of wildcard tokens is unimportant (if order is important, they should have used *). + const QStringList wildcards {expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)}; + for (const QString &wildcard : wildcards) { + const QRegularExpression reg {cachedRegex(wildcard, false)}; + if (!reg.match(articleTitle).hasMatch()) + return false; } return true; } -bool AutoDownloadRule::matches(const QString &articleTitle) const +bool AutoDownloadRule::matchesMustContainExpression(const QString &articleTitle) const +{ + if (m_dataPtr->mustContain.empty()) + return true; + + // Each expression is either a regex, or a set of wildcards separated by whitespace. + // Accept if any complete expression matches. + for (const QString &expression : qAsConst(m_dataPtr->mustContain)) { + // A regex of the form "expr|" will always match, so do the same for wildcards + if (matchesExpression(articleTitle, expression)) + return true; + } + + return false; +} + +bool AutoDownloadRule::matchesMustNotContainExpression(const QString& articleTitle) const +{ + if (m_dataPtr->mustNotContain.empty()) + return true; + + // Each expression is either a regex, or a set of wildcards separated by whitespace. + // Reject if any complete expression matches. + for (const QString &expression : qAsConst(m_dataPtr->mustNotContain)) { + // A regex of the form "expr|" will always match, so do the same for wildcards + if (matchesExpression(articleTitle, expression)) + return false; + } + + return true; +} + +bool AutoDownloadRule::matchesEpisodeFilterExpression(const QString& articleTitle) const { // Reset the lastComputedEpisode, we don't want to leak it between matches m_dataPtr->lastComputedEpisode.clear(); - if (!m_dataPtr->mustContain.empty()) { - bool logged = false; - bool foundMustContain = false; + if (m_dataPtr->episodeFilter.isEmpty()) + return true; - // Each expression is either a regex, or a set of wildcards separated by whitespace. - // Accept if any complete expression matches. - foreach (const QString &expression, m_dataPtr->mustContain) { - if (!logged) { -// qDebug() << "Checking matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expressions:") << m_dataPtr->mustContain.join("|"); - logged = true; - } + const QRegularExpression filterRegex {cachedRegex("(^\\d{1,4})x(.*;$)")}; + const QRegularExpressionMatch matcher {filterRegex.match(m_dataPtr->episodeFilter)}; + if (!matcher.hasMatch()) + return false; - // A regex of the form "expr|" will always match, so do the same for wildcards - foundMustContain = matches(articleTitle, expression); + const QString season {matcher.captured(1)}; + const QStringList episodes {matcher.captured(2).split(';')}; + const int seasonOurs {season.toInt()}; - if (foundMustContain) { -// qDebug() << "Found matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expression:") << expression; - break; - } - } + for (QString episode : episodes) { + if (episode.isEmpty()) + continue; - if (!foundMustContain) - return false; - } + // We need to trim leading zeroes, but if it's all zeros then we want episode zero. + while ((episode.size() > 1) && episode.startsWith('0')) + episode = episode.right(episode.size() - 1); - if (!m_dataPtr->mustNotContain.empty()) { - bool logged = false; + if (episode.indexOf('-') != -1) { // Range detected + const QString partialPattern1 {"\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)"}; + const QString partialPattern2 {"\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)"}; - // Each expression is either a regex, or a set of wildcards separated by whitespace. - // Reject if any complete expression matches. - foreach (const QString &expression, m_dataPtr->mustNotContain) { - if (!logged) { -// qDebug() << "Checking not matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expressions:") << m_dataPtr->mustNotContain.join("|"); - logged = true; - } + // Extract partial match from article and compare as digits + QRegularExpressionMatch matcher = cachedRegex(partialPattern1).match(articleTitle); + bool matched = matcher.hasMatch(); - // A regex of the form "expr|" will always match, so do the same for wildcards - if (matches(articleTitle, expression)) { -// qDebug() << "Found not matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expression:") << expression; - return false; + if (!matched) { + matcher = cachedRegex(partialPattern2).match(articleTitle); + matched = matcher.hasMatch(); } - } - } - - if (!m_dataPtr->episodeFilter.isEmpty()) { -// qDebug() << "Checking episode filter:" << m_dataPtr->episodeFilter; - QRegularExpression f(cachedRegex("(^\\d{1,4})x(.*;$)")); - QRegularExpressionMatch matcher = f.match(m_dataPtr->episodeFilter); - bool matched = matcher.hasMatch(); - if (!matched) - return false; + if (matched) { + const int seasonTheirs {matcher.captured(1).toInt()}; + const int episodeTheirs {matcher.captured(2).toInt()}; - QString s = matcher.captured(1); - QStringList eps = matcher.captured(2).split(";"); - int sOurs = s.toInt(); - - foreach (QString ep, eps) { - if (ep.isEmpty()) - continue; - - // We need to trim leading zeroes, but if it's all zeros then we want episode zero. - while (ep.size() > 1 && ep.startsWith("0")) - ep = ep.right(ep.size() - 1); - - if (ep.indexOf('-') != -1) { // Range detected - QString partialPattern1 = "\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)"; - QString partialPattern2 = "\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)"; - QRegularExpression reg(cachedRegex(partialPattern1)); - - if (ep.endsWith('-')) { // Infinite range - int epOurs = ep.leftRef(ep.size() - 1).toInt(); - - // Extract partial match from article and compare as digits - matcher = reg.match(articleTitle); - matched = matcher.hasMatch(); - - if (!matched) { - reg = QRegularExpression(cachedRegex(partialPattern2)); - matcher = reg.match(articleTitle); - matched = matcher.hasMatch(); - } - - if (matched) { - int sTheirs = matcher.captured(1).toInt(); - int epTheirs = matcher.captured(2).toInt(); - if (((sTheirs == sOurs) && (epTheirs >= epOurs)) || (sTheirs > sOurs)) { -// qDebug() << "Matched episode:" << ep; -// qDebug() << "Matched article:" << articleTitle; - return true; - } - } + if (episode.endsWith('-')) { // Infinite range + const int episodeOurs {episode.leftRef(episode.size() - 1).toInt()}; + if (((seasonTheirs == seasonOurs) && (episodeTheirs >= episodeOurs)) || (seasonTheirs > seasonOurs)) + return true; } else { // Normal range - QStringList range = ep.split('-'); + const QStringList range {episode.split('-')}; Q_ASSERT(range.size() == 2); if (range.first().toInt() > range.last().toInt()) continue; // Ignore this subrule completely - int epOursFirst = range.first().toInt(); - int epOursLast = range.last().toInt(); - - // Extract partial match from article and compare as digits - matcher = reg.match(articleTitle); - matched = matcher.hasMatch(); - - if (!matched) { - reg = QRegularExpression(cachedRegex(partialPattern2)); - matcher = reg.match(articleTitle); - matched = matcher.hasMatch(); - } - - if (matched) { - int sTheirs = matcher.captured(1).toInt(); - int epTheirs = matcher.captured(2).toInt(); - if ((sTheirs == sOurs) && ((epOursFirst <= epTheirs) && (epOursLast >= epTheirs))) { -// qDebug() << "Matched episode:" << ep; -// qDebug() << "Matched article:" << articleTitle; - return true; - } - } - } - } - else { // Single number - QString expStr("\\b(?:s0?" + s + "[ -_\\.]?" + "e0?" + ep + "|" + s + "x" + "0?" + ep + ")(?:\\D|\\b)"); - QRegularExpression reg(cachedRegex(expStr)); - if (reg.match(articleTitle).hasMatch()) { -// qDebug() << "Matched episode:" << ep; -// qDebug() << "Matched article:" << articleTitle; - return true; + const int episodeOursFirst {range.first().toInt()}; + const int episodeOursLast {range.last().toInt()}; + if ((seasonTheirs == seasonOurs) && ((episodeOursFirst <= episodeTheirs) && (episodeOursLast >= episodeTheirs))) + return true; } } } - - return false; + else { // Single number + const QString expStr {QString("\\b(?:s0?%1[ -_\\.]?e0?%2|%1x0?%2)(?:\\D|\\b)").arg(season, episode)}; + if (cachedRegex(expStr).match(articleTitle).hasMatch()) + return true; + } } - if (useSmartFilter()) { - // now see if this episode has been downloaded before - const QString episodeStr = computeEpisodeName(articleTitle); + return false; +} + +bool AutoDownloadRule::matchesSmartEpisodeFilter(const QString& articleTitle) const +{ + if (!useSmartFilter()) + return true; + + const QString episodeStr = computeEpisodeName(articleTitle); + if (episodeStr.isEmpty()) + return true; - if (!episodeStr.isEmpty()) { - bool previouslyMatched = m_dataPtr->previouslyMatchedEpisodes.contains(episodeStr); - bool isRepack = articleTitle.contains("REPACK", Qt::CaseInsensitive) || articleTitle.contains("PROPER", Qt::CaseInsensitive); - if (previouslyMatched && !isRepack) - return false; + // See if this episode has been downloaded before + const bool previouslyMatched = m_dataPtr->previouslyMatchedEpisodes.contains(episodeStr); + const bool isRepack = articleTitle.contains("REPACK", Qt::CaseInsensitive) || articleTitle.contains("PROPER", Qt::CaseInsensitive); + if (previouslyMatched && !isRepack) + return false; - m_dataPtr->lastComputedEpisode = episodeStr; - } - } + m_dataPtr->lastComputedEpisode = episodeStr; + return true; +} + +bool AutoDownloadRule::matches(const QString &articleTitle) const +{ + if (!matchesMustContainExpression(articleTitle)) + return false; + if (!matchesMustNotContainExpression(articleTitle)) + return false; + if (!matchesEpisodeFilterExpression(articleTitle)) + return false; + if (!matchesSmartEpisodeFilter(articleTitle)) + return false; -// qDebug() << "Matched article:" << articleTitle; return true; } diff --git a/src/base/rss/rss_autodownloadrule.h b/src/base/rss/rss_autodownloadrule.h index ec6c76979..2802f38be 100644 --- a/src/base/rss/rss_autodownloadrule.h +++ b/src/base/rss/rss_autodownloadrule.h @@ -95,7 +95,11 @@ namespace RSS static AutoDownloadRule fromLegacyDict(const QVariantHash &dict); private: - bool matches(const QString &articleTitle, const QString &expression) const; + bool matchesMustContainExpression(const QString &articleTitle) const; + bool matchesMustNotContainExpression(const QString &articleTitle) const; + bool matchesEpisodeFilterExpression(const QString &articleTitle) const; + bool matchesSmartEpisodeFilter(const QString &articleTitle) const; + bool matchesExpression(const QString &articleTitle, const QString &expression) const; QRegularExpression cachedRegex(const QString &expression, bool isRegex = true) const; QSharedDataPointer m_dataPtr; From 844f76c2cadf7b2aa321dfe25efc2609b7977b61 Mon Sep 17 00:00:00 2001 From: "Vladimir Golovnev (Glassez)" Date: Fri, 18 May 2018 14:41:52 +0300 Subject: [PATCH 2/3] Make "Ignoring days" to behave like other filters This prevents confusing in GUI when it shows matched RSS articles which be really ignored by the rule. --- src/base/rss/rss_autodownloader.cpp | 14 +---------- src/base/rss/rss_autodownloadrule.cpp | 32 ++++++++++++++++++-------- src/base/rss/rss_autodownloadrule.h | 4 ++-- src/gui/rss/automatedrssdownloader.cpp | 4 +++- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/base/rss/rss_autodownloader.cpp b/src/base/rss/rss_autodownloader.cpp index 1e070f26f..007bbae54 100644 --- a/src/base/rss/rss_autodownloader.cpp +++ b/src/base/rss/rss_autodownloader.cpp @@ -365,19 +365,7 @@ void AutoDownloader::processJob(const QSharedPointer &job) for (AutoDownloadRule &rule: m_rules) { if (!rule.isEnabled()) continue; if (!rule.feedURLs().contains(job->feedURL)) continue; - if (!rule.matches(job->articleData.value(Article::KeyTitle).toString())) continue; - - auto articleDate = job->articleData.value(Article::KeyDate).toDateTime(); - // if rule is in ignoring state do nothing with matched torrent - if (rule.ignoreDays() > 0) { - if (rule.lastMatch().isValid()) { - if (articleDate < rule.lastMatch().addDays(rule.ignoreDays())) - return; - } - } - - rule.setLastMatch(articleDate); - rule.appendLastComputedEpisode(); + if (!rule.accepts(job->articleData)) continue; m_dirty = true; storeDeferred(); diff --git a/src/base/rss/rss_autodownloadrule.cpp b/src/base/rss/rss_autodownloadrule.cpp index e6755dd18..643abbde0 100644 --- a/src/base/rss/rss_autodownloadrule.cpp +++ b/src/base/rss/rss_autodownloadrule.cpp @@ -351,8 +351,15 @@ bool AutoDownloadRule::matchesSmartEpisodeFilter(const QString& articleTitle) co return true; } -bool AutoDownloadRule::matches(const QString &articleTitle) const +bool AutoDownloadRule::matches(const QVariantHash &articleData) const { + const QDateTime articleDate {articleData[Article::KeyDate].toDateTime()}; + if (ignoreDays() > 0) { + if (lastMatch().isValid() && (articleDate < lastMatch().addDays(ignoreDays()))) + return false; + } + + const QString articleTitle {articleData[Article::KeyTitle].toString()}; if (!matchesMustContainExpression(articleTitle)) return false; if (!matchesMustNotContainExpression(articleTitle)) @@ -365,6 +372,20 @@ bool AutoDownloadRule::matches(const QString &articleTitle) const return true; } +bool AutoDownloadRule::accepts(const QVariantHash &articleData) +{ + if (!matches(articleData)) + return false; + + setLastMatch(articleData[Article::KeyDate].toDateTime()); + + if (!m_dataPtr->lastComputedEpisode.isEmpty()) { + // TODO: probably need to add a marker for PROPER/REPACK to avoid duplicate downloads + m_dataPtr->previouslyMatchedEpisodes.append(m_dataPtr->lastComputedEpisode); + m_dataPtr->lastComputedEpisode.clear(); + } +} + AutoDownloadRule &AutoDownloadRule::operator=(const AutoDownloadRule &other) { m_dataPtr = other.m_dataPtr; @@ -621,15 +642,6 @@ void AutoDownloadRule::setPreviouslyMatchedEpisodes(const QStringList &previousl m_dataPtr->previouslyMatchedEpisodes = previouslyMatchedEpisodes; } -void AutoDownloadRule::appendLastComputedEpisode() -{ - if (!m_dataPtr->lastComputedEpisode.isEmpty()) { - // TODO: probably need to add a marker for PROPER/REPACK to avoid duplicate downloads - m_dataPtr->previouslyMatchedEpisodes.append(m_dataPtr->lastComputedEpisode); - m_dataPtr->lastComputedEpisode.clear(); - } -} - QString AutoDownloadRule::episodeFilter() const { return m_dataPtr->episodeFilter; diff --git a/src/base/rss/rss_autodownloadrule.h b/src/base/rss/rss_autodownloadrule.h index 2802f38be..15fd113fb 100644 --- a/src/base/rss/rss_autodownloadrule.h +++ b/src/base/rss/rss_autodownloadrule.h @@ -71,7 +71,6 @@ namespace RSS QString episodeFilter() const; void setEpisodeFilter(const QString &e); - void appendLastComputedEpisode(); QStringList previouslyMatchedEpisodes() const; void setPreviouslyMatchedEpisodes(const QStringList &previouslyMatchedEpisodes); @@ -82,7 +81,8 @@ namespace RSS QString assignedCategory() const; void setCategory(const QString &category); - bool matches(const QString &articleTitle) const; + bool matches(const QVariantHash &articleData) const; + bool accepts(const QVariantHash &articleData); AutoDownloadRule &operator=(const AutoDownloadRule &other); bool operator==(const AutoDownloadRule &other) const; diff --git a/src/gui/rss/automatedrssdownloader.cpp b/src/gui/rss/automatedrssdownloader.cpp index 38d8c0867..4fd05440e 100644 --- a/src/gui/rss/automatedrssdownloader.cpp +++ b/src/gui/rss/automatedrssdownloader.cpp @@ -114,6 +114,8 @@ AutomatedRssDownloader::AutomatedRssDownloader(QWidget *parent) connect(m_ui->checkRegex, &QCheckBox::stateChanged, this, &AutomatedRssDownloader::updateMustLineValidity); connect(m_ui->checkRegex, &QCheckBox::stateChanged, this, &AutomatedRssDownloader::updateMustNotLineValidity); connect(m_ui->checkSmart, &QCheckBox::stateChanged, this, &AutomatedRssDownloader::handleRuleDefinitionChanged); + connect(m_ui->spinIgnorePeriod, static_cast(&QSpinBox::valueChanged) + , this, &AutomatedRssDownloader::handleRuleDefinitionChanged); connect(m_ui->listFeeds, &QListWidget::itemChanged, this, &AutomatedRssDownloader::handleFeedCheckStateChange); @@ -581,7 +583,7 @@ void AutomatedRssDownloader::updateMatchingArticles() QStringList matchingArticles; foreach (auto article, feed->articles()) - if (rule.matches(article->title())) + if (rule.matches(article->data())) matchingArticles << article->title(); if (!matchingArticles.isEmpty()) addFeedArticlesToTree(feed, matchingArticles); From 53b9bcaaac92b309d31611b2dc8a470d6d22ce22 Mon Sep 17 00:00:00 2001 From: "Vladimir Golovnev (Glassez)" Date: Fri, 18 May 2018 14:55:41 +0300 Subject: [PATCH 3/3] Place "Use Smart Episode Filter" more correctly --- src/gui/rss/automatedrssdownloader.ui | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/gui/rss/automatedrssdownloader.ui b/src/gui/rss/automatedrssdownloader.ui index 5ae379665..dd48649dd 100644 --- a/src/gui/rss/automatedrssdownloader.ui +++ b/src/gui/rss/automatedrssdownloader.ui @@ -106,17 +106,6 @@ - - - - Smart Episode Filter will check the episode number to prevent downloading of duplicates. -Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also support - as a separator) - - - Use Smart Episode Filter - - - @@ -191,6 +180,17 @@ Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also + + + + Smart Episode Filter will check the episode number to prevent downloading of duplicates. +Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also support - as a separator) + + + Use Smart Episode Filter + + +