Browse Source

Merge pull request #8945 from glassez/rss-rule

Improve code of  RSS auto-downloading rule. Closes #8933
adaptive-webui-19844
Vladimir Golovnev 7 years ago committed by GitHub
parent
commit
0aee1cf7d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      src/base/rss/rss_autodownloader.cpp
  2. 223
      src/base/rss/rss_autodownloadrule.cpp
  3. 10
      src/base/rss/rss_autodownloadrule.h
  4. 4
      src/gui/rss/automatedrssdownloader.cpp
  5. 22
      src/gui/rss/automatedrssdownloader.ui

14
src/base/rss/rss_autodownloader.cpp

@ -365,19 +365,7 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job) @@ -365,19 +365,7 @@ void AutoDownloader::processJob(const QSharedPointer<ProcessingJob> &job)
for (AutoDownloadRule &rule: m_rules) {
if (!rule.isEnabled()) continue;
if (!rule.feedURLs().contains(job->feedURL)) continue;
if (!rule.matches(job->articleData.value(Article::KeyTitle).toString())) continue;
auto articleDate = job->articleData.value(Article::KeyDate).toDateTime();
// if rule is in ignoring state do nothing with matched torrent
if (rule.ignoreDays() > 0) {
if (rule.lastMatch().isValid()) {
if (articleDate < rule.lastMatch().addDays(rule.ignoreDays()))
return;
}
}
rule.setLastMatch(articleDate);
rule.appendLastComputedEpisode();
if (!rule.accepts(job->articleData)) continue;
m_dirty = true;
storeDeferred();

223
src/base/rss/rss_autodownloadrule.cpp

@ -39,6 +39,7 @@ @@ -39,6 +39,7 @@
#include <QString>
#include <QStringList>
#include "../global.h"
#include "../preferences.h"
#include "../tristatebool.h"
#include "../utils/fs.h"
@ -192,200 +193,199 @@ QRegularExpression AutoDownloadRule::cachedRegex(const QString &expression, bool @@ -192,200 +193,199 @@ QRegularExpression AutoDownloadRule::cachedRegex(const QString &expression, bool
// The cache is cleared whenever the regex/wildcard, must or must not contain fields or
// episode filter are modified.
Q_ASSERT(!expression.isEmpty());
QRegularExpression regex(m_dataPtr->cachedRegexes[expression]);
if (!regex.pattern().isEmpty())
return regex;
QRegularExpression &regex = m_dataPtr->cachedRegexes[expression];
if (regex.pattern().isEmpty()) {
regex = QRegularExpression {
(isRegex ? expression : Utils::String::wildcardToRegex(expression))
, QRegularExpression::CaseInsensitiveOption};
}
return m_dataPtr->cachedRegexes[expression] = QRegularExpression(isRegex ? expression : Utils::String::wildcardToRegex(expression), QRegularExpression::CaseInsensitiveOption);
return regex;
}
bool AutoDownloadRule::matches(const QString &articleTitle, const QString &expression) const
bool AutoDownloadRule::matchesExpression(const QString &articleTitle, const QString &expression) const
{
static QRegularExpression whitespace("\\s+");
const QRegularExpression whitespace {"\\s+"};
if (expression.isEmpty()) {
// A regex of the form "expr|" will always match, so do the same for wildcards
return true;
}
else if (m_dataPtr->useRegex) {
if (m_dataPtr->useRegex) {
QRegularExpression reg(cachedRegex(expression));
return reg.match(articleTitle).hasMatch();
}
else {
// Only match if every wildcard token (separated by spaces) is present in the article name.
// Order of wildcard tokens is unimportant (if order is important, they should have used *).
foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) {
QRegularExpression reg(cachedRegex(wildcard, false));
const QStringList wildcards {expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)};
for (const QString &wildcard : wildcards) {
const QRegularExpression reg {cachedRegex(wildcard, false)};
if (!reg.match(articleTitle).hasMatch())
return false;
}
}
return true;
}
bool AutoDownloadRule::matches(const QString &articleTitle) const
bool AutoDownloadRule::matchesMustContainExpression(const QString &articleTitle) const
{
// Reset the lastComputedEpisode, we don't want to leak it between matches
m_dataPtr->lastComputedEpisode.clear();
if (!m_dataPtr->mustContain.empty()) {
bool logged = false;
bool foundMustContain = false;
if (m_dataPtr->mustContain.empty())
return true;
// Each expression is either a regex, or a set of wildcards separated by whitespace.
// Accept if any complete expression matches.
foreach (const QString &expression, m_dataPtr->mustContain) {
if (!logged) {
// qDebug() << "Checking matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expressions:") << m_dataPtr->mustContain.join("|");
logged = true;
}
for (const QString &expression : qAsConst(m_dataPtr->mustContain)) {
// A regex of the form "expr|" will always match, so do the same for wildcards
foundMustContain = matches(articleTitle, expression);
if (foundMustContain) {
// qDebug() << "Found matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expression:") << expression;
break;
}
if (matchesExpression(articleTitle, expression))
return true;
}
if (!foundMustContain)
return false;
}
if (!m_dataPtr->mustNotContain.empty()) {
bool logged = false;
bool AutoDownloadRule::matchesMustNotContainExpression(const QString& articleTitle) const
{
if (m_dataPtr->mustNotContain.empty())
return true;
// Each expression is either a regex, or a set of wildcards separated by whitespace.
// Reject if any complete expression matches.
foreach (const QString &expression, m_dataPtr->mustNotContain) {
if (!logged) {
// qDebug() << "Checking not matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expressions:") << m_dataPtr->mustNotContain.join("|");
logged = true;
}
for (const QString &expression : qAsConst(m_dataPtr->mustNotContain)) {
// A regex of the form "expr|" will always match, so do the same for wildcards
if (matches(articleTitle, expression)) {
// qDebug() << "Found not matching" << (m_dataPtr->useRegex ? "regex:" : "wildcard expression:") << expression;
if (matchesExpression(articleTitle, expression))
return false;
}
}
return true;
}
if (!m_dataPtr->episodeFilter.isEmpty()) {
// qDebug() << "Checking episode filter:" << m_dataPtr->episodeFilter;
QRegularExpression f(cachedRegex("(^\\d{1,4})x(.*;$)"));
QRegularExpressionMatch matcher = f.match(m_dataPtr->episodeFilter);
bool matched = matcher.hasMatch();
bool AutoDownloadRule::matchesEpisodeFilterExpression(const QString& articleTitle) const
{
// Reset the lastComputedEpisode, we don't want to leak it between matches
m_dataPtr->lastComputedEpisode.clear();
if (m_dataPtr->episodeFilter.isEmpty())
return true;
if (!matched)
const QRegularExpression filterRegex {cachedRegex("(^\\d{1,4})x(.*;$)")};
const QRegularExpressionMatch matcher {filterRegex.match(m_dataPtr->episodeFilter)};
if (!matcher.hasMatch())
return false;
QString s = matcher.captured(1);
QStringList eps = matcher.captured(2).split(";");
int sOurs = s.toInt();
const QString season {matcher.captured(1)};
const QStringList episodes {matcher.captured(2).split(';')};
const int seasonOurs {season.toInt()};
foreach (QString ep, eps) {
if (ep.isEmpty())
for (QString episode : episodes) {
if (episode.isEmpty())
continue;
// We need to trim leading zeroes, but if it's all zeros then we want episode zero.
while (ep.size() > 1 && ep.startsWith("0"))
ep = ep.right(ep.size() - 1);
while ((episode.size() > 1) && episode.startsWith('0'))
episode = episode.right(episode.size() - 1);
if (ep.indexOf('-') != -1) { // Range detected
QString partialPattern1 = "\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)";
QString partialPattern2 = "\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)";
QRegularExpression reg(cachedRegex(partialPattern1));
if (ep.endsWith('-')) { // Infinite range
int epOurs = ep.leftRef(ep.size() - 1).toInt();
if (episode.indexOf('-') != -1) { // Range detected
const QString partialPattern1 {"\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)"};
const QString partialPattern2 {"\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)"};
// Extract partial match from article and compare as digits
matcher = reg.match(articleTitle);
matched = matcher.hasMatch();
QRegularExpressionMatch matcher = cachedRegex(partialPattern1).match(articleTitle);
bool matched = matcher.hasMatch();
if (!matched) {
reg = QRegularExpression(cachedRegex(partialPattern2));
matcher = reg.match(articleTitle);
matcher = cachedRegex(partialPattern2).match(articleTitle);
matched = matcher.hasMatch();
}
if (matched) {
int sTheirs = matcher.captured(1).toInt();
int epTheirs = matcher.captured(2).toInt();
if (((sTheirs == sOurs) && (epTheirs >= epOurs)) || (sTheirs > sOurs)) {
// qDebug() << "Matched episode:" << ep;
// qDebug() << "Matched article:" << articleTitle;
const int seasonTheirs {matcher.captured(1).toInt()};
const int episodeTheirs {matcher.captured(2).toInt()};
if (episode.endsWith('-')) { // Infinite range
const int episodeOurs {episode.leftRef(episode.size() - 1).toInt()};
if (((seasonTheirs == seasonOurs) && (episodeTheirs >= episodeOurs)) || (seasonTheirs > seasonOurs))
return true;
}
}
}
else { // Normal range
QStringList range = ep.split('-');
const QStringList range {episode.split('-')};
Q_ASSERT(range.size() == 2);
if (range.first().toInt() > range.last().toInt())
continue; // Ignore this subrule completely
int epOursFirst = range.first().toInt();
int epOursLast = range.last().toInt();
// Extract partial match from article and compare as digits
matcher = reg.match(articleTitle);
matched = matcher.hasMatch();
if (!matched) {
reg = QRegularExpression(cachedRegex(partialPattern2));
matcher = reg.match(articleTitle);
matched = matcher.hasMatch();
}
if (matched) {
int sTheirs = matcher.captured(1).toInt();
int epTheirs = matcher.captured(2).toInt();
if ((sTheirs == sOurs) && ((epOursFirst <= epTheirs) && (epOursLast >= epTheirs))) {
// qDebug() << "Matched episode:" << ep;
// qDebug() << "Matched article:" << articleTitle;
const int episodeOursFirst {range.first().toInt()};
const int episodeOursLast {range.last().toInt()};
if ((seasonTheirs == seasonOurs) && ((episodeOursFirst <= episodeTheirs) && (episodeOursLast >= episodeTheirs)))
return true;
}
}
}
}
else { // Single number
QString expStr("\\b(?:s0?" + s + "[ -_\\.]?" + "e0?" + ep + "|" + s + "x" + "0?" + ep + ")(?:\\D|\\b)");
QRegularExpression reg(cachedRegex(expStr));
if (reg.match(articleTitle).hasMatch()) {
// qDebug() << "Matched episode:" << ep;
// qDebug() << "Matched article:" << articleTitle;
const QString expStr {QString("\\b(?:s0?%1[ -_\\.]?e0?%2|%1x0?%2)(?:\\D|\\b)").arg(season, episode)};
if (cachedRegex(expStr).match(articleTitle).hasMatch())
return true;
}
}
}
return false;
}
if (useSmartFilter()) {
// now see if this episode has been downloaded before
bool AutoDownloadRule::matchesSmartEpisodeFilter(const QString& articleTitle) const
{
if (!useSmartFilter())
return true;
const QString episodeStr = computeEpisodeName(articleTitle);
if (episodeStr.isEmpty())
return true;
if (!episodeStr.isEmpty()) {
bool previouslyMatched = m_dataPtr->previouslyMatchedEpisodes.contains(episodeStr);
bool isRepack = articleTitle.contains("REPACK", Qt::CaseInsensitive) || articleTitle.contains("PROPER", Qt::CaseInsensitive);
// See if this episode has been downloaded before
const bool previouslyMatched = m_dataPtr->previouslyMatchedEpisodes.contains(episodeStr);
const bool isRepack = articleTitle.contains("REPACK", Qt::CaseInsensitive) || articleTitle.contains("PROPER", Qt::CaseInsensitive);
if (previouslyMatched && !isRepack)
return false;
m_dataPtr->lastComputedEpisode = episodeStr;
return true;
}
bool AutoDownloadRule::matches(const QVariantHash &articleData) const
{
const QDateTime articleDate {articleData[Article::KeyDate].toDateTime()};
if (ignoreDays() > 0) {
if (lastMatch().isValid() && (articleDate < lastMatch().addDays(ignoreDays())))
return false;
}
// qDebug() << "Matched article:" << articleTitle;
const QString articleTitle {articleData[Article::KeyTitle].toString()};
if (!matchesMustContainExpression(articleTitle))
return false;
if (!matchesMustNotContainExpression(articleTitle))
return false;
if (!matchesEpisodeFilterExpression(articleTitle))
return false;
if (!matchesSmartEpisodeFilter(articleTitle))
return false;
return true;
}
bool AutoDownloadRule::accepts(const QVariantHash &articleData)
{
if (!matches(articleData))
return false;
setLastMatch(articleData[Article::KeyDate].toDateTime());
if (!m_dataPtr->lastComputedEpisode.isEmpty()) {
// TODO: probably need to add a marker for PROPER/REPACK to avoid duplicate downloads
m_dataPtr->previouslyMatchedEpisodes.append(m_dataPtr->lastComputedEpisode);
m_dataPtr->lastComputedEpisode.clear();
}
}
AutoDownloadRule &AutoDownloadRule::operator=(const AutoDownloadRule &other)
{
m_dataPtr = other.m_dataPtr;
@ -642,15 +642,6 @@ void AutoDownloadRule::setPreviouslyMatchedEpisodes(const QStringList &previousl @@ -642,15 +642,6 @@ void AutoDownloadRule::setPreviouslyMatchedEpisodes(const QStringList &previousl
m_dataPtr->previouslyMatchedEpisodes = previouslyMatchedEpisodes;
}
void AutoDownloadRule::appendLastComputedEpisode()
{
if (!m_dataPtr->lastComputedEpisode.isEmpty()) {
// TODO: probably need to add a marker for PROPER/REPACK to avoid duplicate downloads
m_dataPtr->previouslyMatchedEpisodes.append(m_dataPtr->lastComputedEpisode);
m_dataPtr->lastComputedEpisode.clear();
}
}
QString AutoDownloadRule::episodeFilter() const
{
return m_dataPtr->episodeFilter;

10
src/base/rss/rss_autodownloadrule.h

@ -71,7 +71,6 @@ namespace RSS @@ -71,7 +71,6 @@ namespace RSS
QString episodeFilter() const;
void setEpisodeFilter(const QString &e);
void appendLastComputedEpisode();
QStringList previouslyMatchedEpisodes() const;
void setPreviouslyMatchedEpisodes(const QStringList &previouslyMatchedEpisodes);
@ -82,7 +81,8 @@ namespace RSS @@ -82,7 +81,8 @@ namespace RSS
QString assignedCategory() const;
void setCategory(const QString &category);
bool matches(const QString &articleTitle) const;
bool matches(const QVariantHash &articleData) const;
bool accepts(const QVariantHash &articleData);
AutoDownloadRule &operator=(const AutoDownloadRule &other);
bool operator==(const AutoDownloadRule &other) const;
@ -95,7 +95,11 @@ namespace RSS @@ -95,7 +95,11 @@ namespace RSS
static AutoDownloadRule fromLegacyDict(const QVariantHash &dict);
private:
bool matches(const QString &articleTitle, const QString &expression) const;
bool matchesMustContainExpression(const QString &articleTitle) const;
bool matchesMustNotContainExpression(const QString &articleTitle) const;
bool matchesEpisodeFilterExpression(const QString &articleTitle) const;
bool matchesSmartEpisodeFilter(const QString &articleTitle) const;
bool matchesExpression(const QString &articleTitle, const QString &expression) const;
QRegularExpression cachedRegex(const QString &expression, bool isRegex = true) const;
QSharedDataPointer<AutoDownloadRuleData> m_dataPtr;

4
src/gui/rss/automatedrssdownloader.cpp

@ -114,6 +114,8 @@ AutomatedRssDownloader::AutomatedRssDownloader(QWidget *parent) @@ -114,6 +114,8 @@ AutomatedRssDownloader::AutomatedRssDownloader(QWidget *parent)
connect(m_ui->checkRegex, &QCheckBox::stateChanged, this, &AutomatedRssDownloader::updateMustLineValidity);
connect(m_ui->checkRegex, &QCheckBox::stateChanged, this, &AutomatedRssDownloader::updateMustNotLineValidity);
connect(m_ui->checkSmart, &QCheckBox::stateChanged, this, &AutomatedRssDownloader::handleRuleDefinitionChanged);
connect(m_ui->spinIgnorePeriod, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged)
, this, &AutomatedRssDownloader::handleRuleDefinitionChanged);
connect(m_ui->listFeeds, &QListWidget::itemChanged, this, &AutomatedRssDownloader::handleFeedCheckStateChange);
@ -581,7 +583,7 @@ void AutomatedRssDownloader::updateMatchingArticles() @@ -581,7 +583,7 @@ void AutomatedRssDownloader::updateMatchingArticles()
QStringList matchingArticles;
foreach (auto article, feed->articles())
if (rule.matches(article->title()))
if (rule.matches(article->data()))
matchingArticles << article->title();
if (!matchingArticles.isEmpty())
addFeedArticlesToTree(feed, matchingArticles);

22
src/gui/rss/automatedrssdownloader.ui

@ -106,17 +106,6 @@ @@ -106,17 +106,6 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="checkSmart">
<property name="toolTip">
<string>Smart Episode Filter will check the episode number to prevent downloading of duplicates.
Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also support - as a separator)</string>
</property>
<property name="text">
<string>Use Smart Episode Filter</string>
</property>
</widget>
</item>
<item>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
@ -191,6 +180,17 @@ Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also @@ -191,6 +180,17 @@ Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also
</item>
</layout>
</item>
<item>
<widget class="QCheckBox" name="checkSmart">
<property name="toolTip">
<string>Smart Episode Filter will check the episode number to prevent downloading of duplicates.
Supports the formats: S01E01, 1x1, 2017.01.01 and 01.01.2017 (Date formats also support - as a separator)</string>
</property>
<property name="text">
<string>Use Smart Episode Filter</string>
</property>
</widget>
</item>
<item>
<widget class="Line" name="line">
<property name="orientation">

Loading…
Cancel
Save