1
0
mirror of git://erdgeist.org/opentracker synced 2025-02-04 19:16:23 +00:00
opentracker/scan_urlencoded_query.c

165 lines
4.7 KiB
C
Raw Normal View History

/* This software was written by Dirk Engling <erdgeist@erdgeist.org>
2007-12-20 05:59:34 +00:00
It is considered beerware. Prost. Skol. Cheers or whatever.
2008-10-28 01:27:22 +00:00
2007-12-20 05:59:34 +00:00
$id$ */
2007-01-05 00:00:42 +00:00
2008-12-05 21:28:40 +00:00
/* Opentracker */
2006-12-08 20:28:17 +00:00
#include "scan_urlencoded_query.h"
2008-12-05 21:28:40 +00:00
/* Libwofat */
#include "scan.h"
/* System */
#include <string.h>
/* Idea is to do a in place replacement or guarantee at least
strlen( string ) bytes in deststring
watch http://www.ietf.org/rfc/rfc2396.txt
unreserved = alphanum | mark
mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
we add '%' to the matrix to not stop at encoded chars.
After losing too many requests to being too strict, add the following characters to reserved matrix
relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
*/
2007-12-17 13:23:27 +00:00
/* This matrix holds for each ascii character the information,
whether it is a non-terminating character for on of the three
scan states we are in, that is 'path', 'param' and 'value' from
/path?param=value&param=value, it is encoded in bit 0, 1 and 2
respectively
The top bit of lower nibble indicates, whether this character is
a hard terminator, ie. \0, \n or \s, where the whole scanning
process should terminate
*/
static const unsigned char is_unreserved[256] = {
8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2009-01-14 00:05:06 +00:00
8,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
4,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,7,
8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,7,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
2007-12-17 13:23:27 +00:00
/* Do a fast nibble to hex representation conversion */
2007-10-18 11:50:54 +00:00
static unsigned char fromhex(unsigned char x) {
2024-04-15 00:39:02 +02:00
x -= '0';
if (x <= 9)
return x;
x &= ~0x20;
x -= 'A' - '0';
if (x < 6)
return x + 10;
return 0xff;
}
2007-12-17 13:23:27 +00:00
/* Skip the value of a param=value pair */
2024-04-15 00:39:02 +02:00
void scan_urlencoded_skipvalue(char **string) {
const unsigned char *s = *(const unsigned char **)string;
unsigned char f;
2007-12-17 13:23:27 +00:00
/* Since we are asked to skip the 'value', we assume to stop at
terminators for a 'value' string position */
2024-04-15 00:39:02 +02:00
while ((f = is_unreserved[*s++]) & SCAN_SEARCHPATH_VALUE)
;
2007-12-17 13:23:27 +00:00
/* If we stopped at a hard terminator like \0 or \n, make the
next scan_urlencoded_query encounter it again */
2024-04-15 00:39:02 +02:00
if (f & SCAN_SEARCHPATH_TERMINATOR)
--s;
2007-12-17 13:23:27 +00:00
2024-04-15 00:39:02 +02:00
*string = (char *)s;
}
2024-04-15 00:39:02 +02:00
int scan_find_keywords(const ot_keywords *keywords, char **string, SCAN_SEARCHPATH_FLAG flags) {
char *deststring = *string;
ssize_t match_length = scan_urlencoded_query(string, deststring, flags);
2024-04-15 00:39:02 +02:00
if (match_length < 0)
return match_length;
if (match_length == 0)
return -3;
2024-04-15 00:39:02 +02:00
while (keywords->key) {
if (!strncmp(keywords->key, deststring, match_length) && !keywords->key[match_length])
return keywords->value;
keywords++;
}
return -3;
}
ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags) {
2024-04-15 00:39:02 +02:00
const unsigned char *s = *(const unsigned char **)string;
unsigned char *d = (unsigned char *)deststring;
unsigned char b, c;
2007-12-17 13:23:27 +00:00
/* This is the main decoding loop.
'flag' determines, which characters are non-terminating in current context
(ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
*/
2024-04-15 00:39:02 +02:00
while (is_unreserved[c = *s++] & flags) {
2007-12-17 13:23:27 +00:00
/* When encountering an url escaped character, try to decode */
2024-04-15 00:39:02 +02:00
if (c == '%') {
if ((b = fromhex(*s++)) == 0xff)
return -1;
if ((c = fromhex(*s++)) == 0xff)
return -1;
c |= (b << 4);
}
2007-12-17 13:23:27 +00:00
/* Write (possibly decoded) character to output */
*d++ = c;
}
2024-04-15 00:39:02 +02:00
switch (c) {
case 0:
case '\r':
case '\n':
case ' ':
2007-12-17 13:23:27 +00:00
/* If we started scanning on a hard terminator, indicate we've finished */
2024-04-15 00:39:02 +02:00
if (d == (unsigned char *)deststring)
return -2;
2007-12-17 13:23:27 +00:00
/* Else make the next call to scan_urlencoded_param encounter it again */
2006-12-09 12:50:42 +00:00
--s;
break;
case '?':
2024-04-15 00:39:02 +02:00
if (flags != SCAN_PATH)
return -1;
break;
case '=':
2024-04-15 00:39:02 +02:00
if (flags != SCAN_SEARCHPATH_PARAM)
return -1;
break;
case '&':
2024-04-15 00:39:02 +02:00
if (flags == SCAN_PATH)
return -1;
if (flags == SCAN_SEARCHPATH_PARAM)
--s;
break;
default:
return -1;
}
2006-12-08 20:28:17 +00:00
*string = (char *)s;
2024-04-15 00:39:02 +02:00
return d - (unsigned char *)deststring;
}
2024-04-15 00:39:02 +02:00
ssize_t scan_fixed_int(char *data, size_t len, int *tmp) {
int minus = 0;
2024-04-15 00:39:02 +02:00
*tmp = 0;
if (*data == '-')
--len, ++data, ++minus;
while ((len > 0) && (*data >= '0') && (*data <= '9')) {
--len;
*tmp = 10 * *tmp + *data++ - '0';
}
if (minus)
*tmp = -*tmp;
return len;
}