1
0
mirror of git://erdgeist.org/opentracker synced 2025-01-12 07:50:52 +00:00
opentracker/scan_urlencoded_query.c
erdgeist 243d5961d0 fixed one performance bug, where "skipping values from a &param=values pair" was requested, the requestor ended up with "values" to be parsed again.
improved performance of fromhex
improved performance of is_unreserved() by moving it all into a simple byte array
improved performance of %41 => 'A' conversion by reordering variables
2007-10-18 00:21:22 +00:00

98 lines
2.9 KiB
C

/* This software was written by Dirk Engling <erdgeist@erdgeist.org>
It is considered beerware. Prost. Skol. Cheers or whatever. */
#include "scan.h"
#include "scan_urlencoded_query.h"
/* Idea is to do a in place replacement or guarantee at least
strlen( string ) bytes in deststring
watch http://www.ietf.org/rfc/rfc2396.txt
unreserved = alphanum | mark
mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
we add '%' to the matrix to not stop at encoded chars.
After losing too many requests to being too strict, add the following characters to reserved matrix
relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
*/
static const unsigned char is_unreserved[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
static unsigned char fromhex(unsigned char c) {
if (c>='0' && c<='9') return c-'0';
c &= 0xdf; /* Toggle off lower case bit */
if (c>='A' && c<='F') return c-'A'+10;
return 0xff;
}
ssize_t scan_urlencoded_query(char **string, char *deststring, int flags) {
const unsigned char* s=*(const unsigned char**) string;
unsigned char *d = (unsigned char*)deststring;
register unsigned char b, c;
retry_parsing:
while( is_unreserved[ c = *s++ ] ) {
if( c=='%') {
if( ( b = fromhex(*s++) ) == 0xff ) return -1;
if( ( c = fromhex(*s++) ) == 0xff ) return -1;
c|=(b<<4);
}
if( d ) *d++ = c;
}
switch( c ) {
case 0: case '\r': case '\n': case ' ':
if( d && ( d == (unsigned char*)deststring ) ) return -2;
--s;
break;
case '?':
if( flags != SCAN_PATH ) {
if( d ) *d++ = c;
goto retry_parsing;
}
break;
case '=':
if( flags != SCAN_SEARCHPATH_PARAM ) return -1;
break;
case '&':
if( flags == SCAN_PATH ) return -1;
if( flags == SCAN_SEARCHPATH_PARAM ) --s;
break;
default:
return -1;
}
*string = (char *)s;
return d - (unsigned char*)deststring;
}
ssize_t scan_fixed_int( char *data, size_t len, int *tmp ) {
*tmp = 0;
while( (len > 0) && (*data >= '0') && (*data <= '9') ) { --len; *tmp = 10**tmp + *data++-'0'; }
return len;
}
ssize_t scan_fixed_ip( char *data, size_t len, unsigned char ip[4] ) {
int u, i;
for( i=0; i<4; ++i ) {
ssize_t j = scan_fixed_int( data, len, &u );
if( j == (ssize_t)len ) return len;
ip[i] = u;
data += len - j;
len = j;
if ( i<3 ) {
if( !len || *data != '.') return -1;
--len; ++data;
}
}
return len;
}