Browse Source

Optimize DAT filter parsing. Parse in memory and in place instead of copying strings around.

adaptive-webui-19844
sledgehammer999 8 years ago
parent
commit
51cf6bd2a5
No known key found for this signature in database
GPG Key ID: 6E4A2D025B7CC9A2
  1. 282
      src/base/bittorrent/private/filterparserthread.cpp
  2. 7
      src/base/bittorrent/private/filterparserthread.h

282
src/base/bittorrent/private/filterparserthread.cpp

@ -32,7 +32,8 @@
#include <QDataStream> #include <QDataStream>
#include <QFile> #include <QFile>
#include <QStringList>
#include <cctype>
#include "base/logger.h" #include "base/logger.h"
@ -40,43 +41,48 @@ namespace libt = libtorrent;
namespace namespace
{ {
class DATIPv4Parser { class IPv4Parser
{
public: public:
const char* tryParse(const char* str, bool& ok) { bool tryParse(const char *str)
unsigned char number = 0; {
unsigned char octetIndex = 0;
const char* numberStart = str; const char *octetStart = str;
char* endptr; char *endptr;
for (; *str; ++str) { for (; *str; ++str) {
if (*str == '.') { if (*str == '.') {
m_buf[number++] = static_cast<unsigned char>(strtol(numberStart, &endptr, 10)); long int extractedNum = strtol(octetStart, &endptr, 10);
if (endptr != str) { if ((extractedNum >= 0L) && (extractedNum <= 255L))
break; m_buf[octetIndex++] = static_cast<unsigned char>(extractedNum);
} else
if (number == 4) { // an IP might end with '.': 192.168.1.2. return false;
ok = true;
return str + 1; if (endptr != str)
} return false;
numberStart = str + 1; if (octetIndex == 4)
return true;
octetStart = str + 1;
} }
} }
#if 1
// The following is needed for parsing of a string with IP, but in the eMule files there is always a space after an IP, if (str != octetStart) {
// and this case is handled above long int extractedNum = strtol(octetStart, &endptr, 10);
if (str != numberStart) { if ((extractedNum >= 0L) && (extractedNum <= 255L))
m_buf[number] = static_cast<unsigned char>(strtol(numberStart, &endptr, 10)); m_buf[octetIndex] = static_cast<unsigned char>(strtol(octetStart, &endptr, 10));
if (endptr == str && number == 3) { else
ok = true; return false;
return str + 1;
} if ((endptr == str) && (octetIndex == 3))
return true;
} }
#endif
ok = false; return false;
return str + 1;
} }
libt::address_v4::bytes_type parsed() const { libt::address_v4::bytes_type parsed() const
{
return m_buf; return m_buf;
} }
@ -84,17 +90,15 @@ namespace
libt::address_v4::bytes_type m_buf; libt::address_v4::bytes_type m_buf;
}; };
bool parseIPAddress(const QByteArray &_ip, libt::address &address) bool parseIPAddress(const char *data, libt::address &address)
{ {
DATIPv4Parser parser; IPv4Parser parser;
boost::system::error_code ec; boost::system::error_code ec;
bool ok = false;
parser.tryParse(_ip.constData(), ok); if (parser.tryParse(data))
if (ok)
address = libt::address_v4(parser.parsed()); address = libt::address_v4(parser.parsed());
else else
address = libt::address::from_string(_ip.constData(), ec); address = libt::address_v6::from_string(data, ec);
return !ec; return !ec;
} }
@ -124,69 +128,124 @@ int FilterParserThread::parseDATFilterFile()
return ruleCount; return ruleCount;
} }
const QByteArray data = file.readAll(); static const int bufferSize = 2 * 1024 * 1024; // 2 MiB
std::vector<char> buffer(bufferSize, 0); // seems a bit faster than QVector
qint64 bytesRead = 0;
int offset = 0;
int start = 0; int start = 0;
int endOfLine = -1; int endOfLine = -1;
int nbLine = 0;
while (true) { while (true) {
start = endOfLine + 1; bytesRead = file.read(buffer.data() + offset, bufferSize - offset);
if (start >= data.size()) if (bytesRead < 0)
break;
int dataSize = bytesRead + offset;
if (bytesRead == 0 && dataSize == 0)
break; break;
if (data[start] == '#' || (data[start] == '/' && data[start + 1] == '/'))
continue;
endOfLine = data.indexOf('\n', start); for (start = 0; start < dataSize; ++start) {
if (endOfLine == -1) break; endOfLine = -1;
// The file might have ended without the last line having a newline
if (!(bytesRead == 0 && dataSize > 0)) {
for (int i = start; i < dataSize; ++i) {
if (buffer[i] == '\n') {
endOfLine = i;
// We need to NULL the newline in case the line has only an IP range.
// In that case the parser won't work for the end IP, because it ends
// with the newline and not with a number.
buffer[i] = '\0';
break;
}
}
}
else {
endOfLine = dataSize;
buffer[dataSize] = '\0';
}
QVector<int> delimIndices = indicesOfDelimiters(data, ',', start, endOfLine); if (endOfLine == -1) {
// read the next chunk from file
// but first move(copy) the leftover data to the front of the buffer
offset = dataSize - start;
memmove(buffer.data(), buffer.data() + start, offset);
break;
}
else {
++nbLine;
}
// Check if there is at least one item (ip range) if ((buffer[start] == '#')
if (delimIndices.isEmpty()) || ((buffer[start] == '/') && ((start + 1 < dataSize) && (buffer[start + 1] == '/')))) {
continue; start = endOfLine;
continue;
}
// Check if there is an access value (apparently not mandatory) // Each line should follow this format:
if (delimIndices.size()) { // 001.009.096.105 - 001.009.096.105 , 000 , Some organization
// There is possibly one // The 3rd entry is access level and if above 127 the IP range isn't blocked.
const QByteArray accesscode(data.constData() + delimIndices[0] + 1, (delimIndices[1] - 1) - (delimIndices[0] + 1) + 1); int firstComma = findAndNullDelimiter(buffer.data(), ',', start, endOfLine);
const int nbAccess = accesscode.toInt(); if (firstComma != -1)
// Ignoring this rule because access value is too high findAndNullDelimiter(buffer.data(), ',', firstComma + 1, endOfLine);
if (nbAccess > 127)
// Check if there is an access value (apparently not mandatory)
if (firstComma != -1) {
// There is possibly one
const long int nbAccess = strtol(buffer.data() + firstComma + 1, nullptr, 10);
// Ignoring this rule because access value is too high
if (nbAccess > 127L) {
start = endOfLine;
continue;
}
}
// IP Range should be split by a dash
int endOfIPRange = ((firstComma == -1) ? (endOfLine - 1) : (firstComma - 1));
int delimIP = findAndNullDelimiter(buffer.data(), '-', start, endOfIPRange);
if (delimIP == -1) {
Logger::instance()->addMessage(tr("IP filter line %1 is malformed.").arg(nbLine), Log::CRITICAL);
start = endOfLine;
continue; continue;
} }
// IP Range should be split by a dash libt::address startAddr;
QVector<int> delimIP = indicesOfDelimiters(data, '-', start, delimIndices[0] - 1); int newStart = trim(buffer.data(), start, delimIP - 1);
if (delimIP.size() != 1) { if (!parseIPAddress(buffer.data() + newStart, startAddr)) {
//Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Line is: %2").arg(nbLine).arg(QString(line)), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Start IP of the range is malformed.").arg(nbLine), Log::CRITICAL);
continue; start = endOfLine;
} continue;
}
libt::address startAddr; libt::address endAddr;
if (!parseIPAddress(trim(data, start, delimIP[0] - 1), startAddr)) { newStart = trim(buffer.data(), delimIP + 1, endOfIPRange);
//Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Start IP of the range is malformed: %2").arg(nbLine).arg(QString(IPs.at(0))), Log::CRITICAL); if (!parseIPAddress(buffer.data() + newStart, endAddr)) {
continue; Logger::instance()->addMessage(tr("IP filter line %1 is malformed. End IP of the range is malformed.").arg(nbLine), Log::CRITICAL);
} start = endOfLine;
continue;
}
libt::address endAddr; if ((startAddr.is_v4() != endAddr.is_v4())
if (!parseIPAddress(trim(data, delimIP[0] + 1, delimIndices[0] - 1), endAddr)) { || (startAddr.is_v6() != endAddr.is_v6())) {
//Logger::instance()->addMessage(tr("IP filter line %1 is malformed. End IP of the range is malformed: %2").arg(nbLine).arg(QString(IPs.at(1))), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed. One IP is IPv4 and the other is IPv6!").arg(nbLine), Log::CRITICAL);
continue; start = endOfLine;
} continue;
}
if (startAddr.is_v4() != endAddr.is_v4() start = endOfLine;
|| startAddr.is_v6() != endAddr.is_v6()) {
//Logger::instance()->addMessage(tr("IP filter line %1 is malformed. One IP is IPv4 and the other is IPv6!").arg(nbLine), Log::CRITICAL);
continue;
}
// Now Add to the filter // Now Add to the filter
try { try {
m_filter.add_rule(startAddr, endAddr, libt::ip_filter::blocked); m_filter.add_rule(startAddr, endAddr, libt::ip_filter::blocked);
++ruleCount; ++ruleCount;
} }
catch(std::exception &) { catch (std::exception &e) {
//Logger::instance()->addMessage(tr("IP filter exception thrown for line %1. Line is: %2").arg(nbLine).arg(QString(line)), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter exception thrown for line %1. Exception is: %2").arg(nbLine)
.arg(QString::fromLocal8Bit(e.what())), Log::CRITICAL);
}
} }
if (start >= dataSize)
offset = 0;
} }
return ruleCount; return ruleCount;
@ -215,31 +274,31 @@ int FilterParserThread::parseP2PFilterFile()
// Line is split by : // Line is split by :
QList<QByteArray> partsList = line.split(':'); QList<QByteArray> partsList = line.split(':');
if (partsList.size() < 2) { if (partsList.size() < 2) {
Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Line is: %2").arg(nbLine).arg(QString(line)), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed.").arg(nbLine), Log::CRITICAL);
continue; continue;
} }
// Get IP range // Get IP range
QList<QByteArray> IPs = partsList.last().split('-'); QList<QByteArray> IPs = partsList.last().split('-');
if (IPs.size() != 2) { if (IPs.size() != 2) {
Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Line is: %2").arg(nbLine).arg(QString(line)), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed.").arg(nbLine), Log::CRITICAL);
continue; continue;
} }
libt::address startAddr; libt::address startAddr;
if (!parseIPAddress(IPs.at(0), startAddr)) { if (!parseIPAddress(IPs.at(0), startAddr)) {
Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Start IP of the range is malformed: %2").arg(nbLine).arg(QString(IPs.at(0))), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed. Start IP of the range is malformed.").arg(nbLine), Log::CRITICAL);
continue; continue;
} }
libt::address endAddr; libt::address endAddr;
if (!parseIPAddress(IPs.at(1), endAddr)) { if (!parseIPAddress(IPs.at(1), endAddr)) {
Logger::instance()->addMessage(tr("IP filter line %1 is malformed. End IP of the range is malformed: %2").arg(nbLine).arg(QString(IPs.at(1))), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed. End IP of the range is malformed.").arg(nbLine), Log::CRITICAL);
continue; continue;
} }
if (startAddr.is_v4() != endAddr.is_v4() if ((startAddr.is_v4() != endAddr.is_v4())
|| startAddr.is_v6() != endAddr.is_v6()) { || (startAddr.is_v6() != endAddr.is_v6())) {
Logger::instance()->addMessage(tr("IP filter line %1 is malformed. One IP is IPv4 and the other is IPv6!").arg(nbLine), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter line %1 is malformed. One IP is IPv4 and the other is IPv6!").arg(nbLine), Log::CRITICAL);
continue; continue;
} }
@ -248,8 +307,9 @@ int FilterParserThread::parseP2PFilterFile()
m_filter.add_rule(startAddr, endAddr, libt::ip_filter::blocked); m_filter.add_rule(startAddr, endAddr, libt::ip_filter::blocked);
++ruleCount; ++ruleCount;
} }
catch(std::exception &) { catch (std::exception &e) {
Logger::instance()->addMessage(tr("IP filter exception thrown for line %1. Line is: %2").arg(nbLine).arg(QString(line)), Log::CRITICAL); Logger::instance()->addMessage(tr("IP filter exception thrown for line %1. Exception is: %2").arg(nbLine)
.arg(QString::fromLocal8Bit(e.what())), Log::CRITICAL);
} }
} }
@ -440,45 +500,39 @@ void FilterParserThread::run()
qDebug("IP Filter thread: finished parsing, filter applied"); qDebug("IP Filter thread: finished parsing, filter applied");
} }
QVector<int> FilterParserThread::indicesOfDelimiters(const QByteArray &data, const char delimiter, const int start, const int end) int FilterParserThread::findAndNullDelimiter(char *const data, char delimiter, int start, int end)
{ {
if (start >= end) return QVector<int>(); for (int i = start; i <= end; ++i) {
if (data[i] == delimiter) {
QVector<int> indices; data[i] = '\0';
int index = start; return i;
while (true) { }
index = data.indexOf(delimiter, index);
if (index == -1 || index >= end)
break;
indices.append(index);
++index;
} }
return indices; return -1;
} }
QByteArray FilterParserThread::trim(const QByteArray &data, const int start, const int end) int FilterParserThread::trim(char* const data, int start, int end)
{ {
if (start >= end) return QByteArray(); if (start >= end) return start;
int newStart = start;
int first = start;
int last = end;
for (int i = start; i <= end; ++i) { for (int i = start; i <= end; ++i) {
if (data[i] != ' ') { if (isspace(data[i]) != 0) {
first = i; data[i] = '\0';
}
else {
newStart = i;
break; break;
} }
} }
for (int i = end; i >= start; --i) { for (int i = end; i >= start; --i) {
if (data[i] != ' ') { if (isspace(data[i]) != 0)
last = i; data[i] = '\0';
else
break; break;
}
} }
if (first >= last) return QByteArray(); return newStart;
return QByteArray(data.constData() + first, last - first + 1);
} }

7
src/base/bittorrent/private/filterparserthread.h

@ -36,9 +36,6 @@
#include <libtorrent/ip_filter.hpp> #include <libtorrent/ip_filter.hpp>
class QDataStream; class QDataStream;
class QStringList;
class QByteArray;
template<typename T> class QVector;
class FilterParserThread : public QThread class FilterParserThread : public QThread
{ {
@ -58,8 +55,8 @@ protected:
void run(); void run();
private: private:
QVector<int> indicesOfDelimiters(const QByteArray &data, const char delimiter, const int start, const int end); int findAndNullDelimiter(char *const data, char delimiter, int start, int end);
QByteArray trim(const QByteArray &data, const int start, const int end); int trim(char *const data, int start, int end);
int parseDATFilterFile(); int parseDATFilterFile();
int parseP2PFilterFile(); int parseP2PFilterFile();
int getlineInStream(QDataStream &stream, std::string &name, char delim); int getlineInStream(QDataStream &stream, std::string &name, char delim);

Loading…
Cancel
Save