You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1722 lines
48 KiB
1722 lines
48 KiB
//========= Copyright Valve Corporation, All rights reserved. ============// |
|
// |
|
// Purpose: |
|
// |
|
// $NoKeywords: $ |
|
// |
|
//=============================================================================// |
|
/* |
|
* |
|
* Copyright (c) 1998-9 |
|
* Dr John Maddock |
|
* |
|
* Permission to use, copy, modify, distribute and sell this software |
|
* and its documentation for any purpose is hereby granted without fee, |
|
* provided that the above copyright notice appear in all copies and |
|
* that both that copyright notice and this permission notice appear |
|
* in supporting documentation. Dr John Maddock makes no representations |
|
* about the suitability of this software for any purpose. |
|
* It is provided "as is" without express or implied warranty. |
|
* |
|
*/ |
|
|
|
/* |
|
* FILE regex.h |
|
* VERSION 2.12 |
|
*/ |
|
|
|
|
|
/* start with C compatability API */ |
|
|
|
#ifndef __REGEX_H |
|
#define __REGEX_H |
|
|
|
#include <cregex> |
|
|
|
#ifdef __cplusplus |
|
|
|
// what follows is all C++ don't include in C builds!! |
|
|
|
#include <new.h> |
|
#if !defined(JM_NO_TYPEINFO) |
|
#include <typeinfo> |
|
#endif |
|
#include <string.h> |
|
#include <jm/jstack.h> |
|
#include <jm/re_raw.h> |
|
#include <jm/re_nls.h> |
|
#include <jm/regfac.h> |
|
#include <jm/re_cls.h> |
|
#include <jm/re_coll.h> |
|
#include <jm/re_kmp.h> |
|
|
|
|
|
JM_NAMESPACE(__JM) |
|
|
|
// |
|
// define error hanling classes |
|
#if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_EXCEPTION_H) |
|
// standard classes are available: |
|
|
|
class JM_IX_DECL bad_expression : public __JM_STD::exception |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
__JM_STD::string code; |
|
public: |
|
bad_expression(const __JM_STD::string& s) : code(s) {} |
|
#else |
|
unsigned int code; |
|
public: |
|
bad_expression(unsigned int err) : code(err) {} |
|
#endif |
|
bad_expression(const bad_expression& e) : __JM_STD::exception(e), code(e.code) {} |
|
bad_expression& operator=(const bad_expression& e) |
|
{ |
|
#ifdef _MSC_VER |
|
static_cast<__JM_STD::exception*>(this)->operator=(e); |
|
#else |
|
__JM_STD::exception::operator=(e); |
|
#endif |
|
code = e.code; |
|
return *this; |
|
} |
|
virtual const char* what()const throw(); |
|
}; |
|
|
|
#elif !defined(JM_NO_EXCEPTIONS) |
|
// no standard classes, do it ourselves: |
|
|
|
class JM_IX_DECL bad_expression |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
__JM_STD::string code; |
|
public: |
|
bad_expression(const __JM_STD::string& s) : code(s) {} |
|
#else |
|
unsigned int code; |
|
public: |
|
bad_expression(unsigned int err) : code(err) {} |
|
#endif |
|
bad_expression(const bad_expression& e) : code(e.code) {} |
|
bad_expression& operator=(const bad_expression& e) { code = e.code; return *this; } |
|
virtual const char* what()const throw(); |
|
}; |
|
|
|
#endif |
|
|
|
// |
|
// define default traits classes for char and wchar_t types: |
|
// |
|
|
|
struct re_set_long; |
|
struct re_syntax_base; |
|
|
|
enum char_syntax_type |
|
{ |
|
syntax_char = 0, |
|
syntax_open_bracket = 1, // ( |
|
syntax_close_bracket = 2, // ) |
|
syntax_dollar = 3, // $ |
|
syntax_caret = 4, // ^ |
|
syntax_dot = 5, // . |
|
syntax_star = 6, // * |
|
syntax_plus = 7, // + |
|
syntax_question = 8, // ? |
|
syntax_open_set = 9, // [ |
|
syntax_close_set = 10, // ] |
|
syntax_or = 11, // | |
|
syntax_slash = 12, // |
|
syntax_hash = 13, // # |
|
syntax_dash = 14, // - |
|
syntax_open_brace = 15, // { |
|
syntax_close_brace = 16, // } |
|
syntax_digit = 17, // 0-9 |
|
syntax_b = 18, // for \b |
|
syntax_B = 19, // for \B |
|
syntax_left_word = 20, // for \< |
|
syntax_right_word = 21, // for \> |
|
syntax_w = 22, // for \w |
|
syntax_W = 23, // for \W |
|
syntax_start_buffer = 24, // for \` |
|
syntax_end_buffer = 25, // for \' |
|
syntax_newline = 26, // for newline alt |
|
syntax_comma = 27, // for {x,y} |
|
|
|
syntax_a = 28, // for \a |
|
syntax_f = 29, // for \f |
|
syntax_n = 30, // for \n |
|
syntax_r = 31, // for \r |
|
syntax_t = 32, // for \t |
|
syntax_v = 33, // for \v |
|
syntax_x = 34, // for \xdd |
|
syntax_c = 35, // for \cx |
|
syntax_colon = 36, // for [:...:] |
|
syntax_equal = 37, // for [=...=] |
|
|
|
// perl ops: |
|
syntax_e = 38, // for \e |
|
syntax_l = 39, // for \l |
|
syntax_L = 40, // for \L |
|
syntax_u = 41, // for \u |
|
syntax_U = 42, // for \U |
|
syntax_s = 43, // for \s |
|
syntax_S = 44, // for \S |
|
syntax_d = 45, // for \d |
|
syntax_D = 46, // for \D |
|
syntax_E = 47, // for \Q\E |
|
syntax_Q = 48, // for \Q\E |
|
syntax_X = 49, // for \X |
|
syntax_C = 50, // for \C |
|
syntax_Z = 51, // for \Z |
|
syntax_G = 52, // for \G |
|
|
|
syntax_max = 53 |
|
}; |
|
|
|
template <class charT> |
|
class char_regex_traits |
|
{ |
|
public: |
|
typedef charT char_type; |
|
// |
|
// uchar_type is the same size as char_type |
|
// but must be unsigned: |
|
typedef charT uchar_type; |
|
// |
|
// size_type is normally the same as charT |
|
// but could be unsigned int to improve performance |
|
// of narrow character types, NB must be unsigned: |
|
typedef jm_uintfast32_t size_type; |
|
|
|
// length: |
|
// returns the length of a null terminated string |
|
// can be left unimplimented for non-character types. |
|
static size_t length(const char_type* ); |
|
|
|
// syntax_type |
|
// returns the syntax type of a given charT |
|
// translates customised syntax to a unified enum. |
|
static unsigned int syntax_type(size_type c); |
|
|
|
// translate: |
|
// |
|
static charT RE_CALL translate(charT c, bool icase |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// transform: |
|
// |
|
// converts a string into a sort key for locale dependant |
|
// character ranges. |
|
static void RE_CALL transform(re_str<charT>& out, const re_str<charT>& in |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// transform_primary: |
|
// |
|
// converts a string into a primary sort key for locale dependant |
|
// equivalence classes. |
|
static void RE_CALL transform_primary(re_str<charT>& out, const re_str<charT>& in |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// is_separator |
|
// returns true if c is a newline character |
|
static bool RE_CALL is_separator(charT c); |
|
|
|
// is_combining |
|
// returns true if the character is a unicode |
|
// combining character |
|
static bool RE_CALL is_combining(charT c); |
|
|
|
// is_class |
|
// returns true if the character is a member |
|
// of the specified character class |
|
static bool RE_CALL is_class(charT c, jm_uintfast32_t f |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// toi |
|
// converts c to integer |
|
static int RE_CALL toi(charT c |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// toi |
|
// converts multi-character value to int |
|
// updating first as required |
|
static int RE_CALL toi(const charT*& first, const charT* last, int radix |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// lookup_classname |
|
// parses a class declaration of the form [:class:] |
|
// On entry first points to the first character of the class name. |
|
// |
|
static jm_uintfast32_t RE_CALL lookup_classname(const charT* first, const charT* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
// lookup_collatename |
|
// parses a collating element declaration of the form [.collating_name.] |
|
// On entry first points to the first character of the collating element name. |
|
// |
|
static bool RE_CALL lookup_collatename(re_str<charT>& s, const charT* first, const charT* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& |
|
#endif |
|
); |
|
|
|
}; |
|
|
|
JM_TEMPLATE_SPECIALISE |
|
class char_regex_traits<char> |
|
{ |
|
public: |
|
typedef char char_type; |
|
typedef unsigned char uchar_type; |
|
typedef unsigned int size_type; |
|
static size_t RE_CALL length(const char_type* p) |
|
{ |
|
return strlen(p); |
|
} |
|
static unsigned int RE_CALL syntax_type(size_type c |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return JM_USE_FACET(l, regfacet<char>).syntax_type((char)c); |
|
#else |
|
return re_syntax_map[c]; |
|
#endif |
|
} |
|
static char RE_CALL translate(char c, bool icase |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return icase ? JM_USE_FACET(l, __JM_STD::ctype<char>).tolower((char_type)c) : c; |
|
#else |
|
return icase ? re_lower_case_map[(size_type)(uchar_type)c] : c; |
|
#endif |
|
} |
|
static void RE_CALL transform(re_str<char>& out, const re_str<char>& in |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifndef RE_LOCALE_CPP |
|
re_transform(out, in); |
|
#else |
|
out = JM_USE_FACET(l, __JM_STD::collate<char>).transform(in.c_str(), in.c_str() + in.size()).c_str(); |
|
#endif |
|
} |
|
|
|
static void RE_CALL transform_primary(re_str<char>& out, const re_str<char>& in |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
transform(out, in MAYBE_PASS_LOCALE(l)); |
|
#ifdef RE_LOCALE_W32 |
|
re_trunc_primary(out); |
|
#else |
|
unsigned n = in.size() + out.size() / 4; |
|
if(n < out.size()) |
|
out[n] = 0; |
|
#endif |
|
} |
|
|
|
static bool RE_CALL is_separator(char c) |
|
{ |
|
return JM_MAKE_BOOL((c == '\n') || (c == '\r')); |
|
} |
|
|
|
static bool RE_CALL is_combining(char) |
|
{ |
|
return false; |
|
} |
|
|
|
static bool RE_CALL is_class(char c, jm_uintfast32_t f |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
if(JM_USE_FACET(l, __JM_STD::ctype<char>).is((__JM_STD::ctype<char>::mask)(f & char_class_all_base), c)) |
|
return true; |
|
if((f & char_class_underscore) && (c == '_')) |
|
return true; |
|
if((f & char_class_blank) && ((c == ' ') || (c == '\t'))) |
|
return true; |
|
return false; |
|
#else |
|
return JM_MAKE_BOOL(re_class_map[(size_type)(uchar_type)c] & f); |
|
#endif |
|
} |
|
static int RE_CALL toi(char c |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
return re_toi(c MAYBE_PASS_LOCALE(l)); |
|
} |
|
static int RE_CALL toi(const char*& first, const char* last, int radix |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
return re_toi(first, last, radix MAYBE_PASS_LOCALE(l)); |
|
} |
|
|
|
static jm_uintfast32_t RE_CALL lookup_classname(const char* first, const char* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return JM_USE_FACET(l, regfacet<char>).lookup_classname(first, last); |
|
#else |
|
return re_lookup_class(first, last); |
|
#endif |
|
} |
|
|
|
static bool RE_CALL lookup_collatename(re_str<char>& s, const char* first, const char* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
re_str<char> n(first, last); |
|
return JM_USE_FACET(l, regfacet<char>).lookup_collatename(s, n); |
|
#else |
|
return re_lookup_collate(s, first, last); |
|
#endif |
|
} |
|
}; |
|
|
|
#ifndef JM_NO_WCSTRING |
|
JM_TEMPLATE_SPECIALISE |
|
class char_regex_traits<wchar_t> |
|
{ |
|
public: |
|
typedef wchar_t char_type; |
|
typedef unsigned short uchar_type; |
|
typedef unsigned int size_type; |
|
static size_t RE_CALL length(const char_type* p) |
|
{ |
|
return wcslen(p); |
|
} |
|
static unsigned int RE_CALL syntax_type(size_type c |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return JM_USE_FACET(l, regfacet<wchar_t>).syntax_type((wchar_t)c); |
|
#else |
|
return re_get_syntax_type(c); |
|
#endif |
|
} |
|
static wchar_t RE_CALL translate(wchar_t c, bool icase |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return icase ? JM_USE_FACET(l, __JM_STD::ctype<wchar_t>).tolower((char_type)c) : c; |
|
#else |
|
return icase ? ((c < 256) ? re_lower_case_map_w[(uchar_type)c] : re_wtolower(c)) : c; |
|
#endif |
|
} |
|
|
|
static void RE_CALL transform(re_str<wchar_t>& out, const re_str<wchar_t>& in |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifndef RE_LOCALE_CPP |
|
re_transform(out, in); |
|
#else |
|
out = JM_USE_FACET(l, __JM_STD::collate<wchar_t>).transform(in.c_str(), in.c_str() + in.size()).c_str(); |
|
#endif |
|
} |
|
|
|
static void RE_CALL transform_primary(re_str<wchar_t>& out, const re_str<wchar_t>& in |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
transform(out, in MAYBE_PASS_LOCALE(l)); |
|
#ifdef RE_LOCALE_W32 |
|
re_trunc_primary(out); |
|
#else |
|
unsigned n = in.size() + out.size() / 4; |
|
if(n < out.size()) |
|
out[n] = 0; |
|
#endif |
|
} |
|
|
|
static bool RE_CALL is_separator(wchar_t c) |
|
{ |
|
return JM_MAKE_BOOL((c == L'\n') || (c == L'\r') || (c == (wchar_t)0x2028) || (c == (wchar_t)0x2029)); |
|
} |
|
|
|
static bool RE_CALL is_combining(wchar_t c) |
|
{ |
|
return re_is_combining(c); |
|
} |
|
|
|
static bool RE_CALL is_class(wchar_t c, jm_uintfast32_t f |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
if(JM_USE_FACET(l, __JM_STD::ctype<wchar_t>).is((__JM_STD::ctype<wchar_t>::mask)(f & char_class_all_base), c)) |
|
return true; |
|
if((f & char_class_underscore) && (c == '_')) |
|
return true; |
|
if((f & char_class_blank) && ((c == ' ') || (c == '\t'))) |
|
return true; |
|
if((f & char_class_unicode) && (c > (size_type)(uchar_type)255)) |
|
return true; |
|
return false; |
|
#else |
|
return JM_MAKE_BOOL(((uchar_type)c < 256) ? (re_unicode_classes[(size_type)(uchar_type)c] & f) : re_iswclass(c, f)); |
|
#endif |
|
} |
|
static int RE_CALL toi(wchar_t c |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
return re_toi(c MAYBE_PASS_LOCALE(l)); |
|
} |
|
static int RE_CALL toi(const wchar_t*& first, const wchar_t* last, int radix |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
return re_toi(first, last, radix MAYBE_PASS_LOCALE(l)); |
|
} |
|
|
|
static jm_uintfast32_t RE_CALL lookup_classname(const wchar_t* first, const wchar_t* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return JM_USE_FACET(l, regfacet<wchar_t>).lookup_classname(first, last); |
|
#else |
|
return re_lookup_class(first, last); |
|
#endif |
|
} |
|
|
|
|
|
static bool RE_CALL lookup_collatename(re_str<wchar_t>& s, const wchar_t* first, const wchar_t* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
re_str<wchar_t> n(first, last); |
|
return JM_USE_FACET(l, regfacet<wchar_t>).lookup_collatename(s, n); |
|
#else |
|
return re_lookup_collate(s, first, last); |
|
#endif |
|
} |
|
}; |
|
#endif |
|
|
|
// |
|
// class char_regex_traits_i |
|
// provides case insensitive traits classes: |
|
template <class charT> |
|
class char_regex_traits_i : public char_regex_traits<charT> {}; |
|
|
|
JM_TEMPLATE_SPECIALISE |
|
class char_regex_traits_i<char> : public char_regex_traits<char> |
|
{ |
|
public: |
|
typedef char char_type; |
|
typedef unsigned char uchar_type; |
|
typedef unsigned int size_type; |
|
typedef char_regex_traits<char> base_type; |
|
|
|
static char RE_CALL translate(char c, bool |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return JM_USE_FACET(l, __JM_STD::ctype<char>).tolower((char_type)c); |
|
#else |
|
return re_lower_case_map[(size_type)(uchar_type)c]; |
|
#endif |
|
} |
|
}; |
|
|
|
#ifndef JM_NO_WCSTRING |
|
JM_TEMPLATE_SPECIALISE |
|
class char_regex_traits_i<wchar_t> : public char_regex_traits<wchar_t> |
|
{ |
|
public: |
|
typedef wchar_t char_type; |
|
typedef unsigned short uchar_type; |
|
typedef unsigned int size_type; |
|
typedef char_regex_traits<wchar_t> base_type; |
|
|
|
static wchar_t RE_CALL translate(wchar_t c, bool |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
#ifdef RE_LOCALE_CPP |
|
return JM_USE_FACET(l, __JM_STD::ctype<wchar_t>).tolower((char_type)c); |
|
#else |
|
return (c < 256) ? re_lower_case_map_w[(uchar_type)c] : re_wtolower(c); |
|
#endif |
|
} |
|
static jm_uintfast32_t RE_CALL lookup_classname(const wchar_t* first, const wchar_t* last |
|
#ifdef RE_LOCALE_CPP |
|
, const __JM_STD::locale& l |
|
#endif |
|
) |
|
{ |
|
jm_uintfast32_t result = char_regex_traits<wchar_t>::lookup_classname(first, last MAYBE_PASS_LOCALE(l)); |
|
if((result & char_class_upper) == char_class_upper) |
|
result |= char_class_alpha; |
|
return result; |
|
} |
|
}; |
|
#endif |
|
|
|
enum mask_type |
|
{ |
|
mask_take = 1, |
|
mask_skip = 2, |
|
mask_any = mask_skip | mask_take, |
|
mask_all = mask_any |
|
}; |
|
|
|
struct __narrow_type{}; |
|
struct __wide_type{}; |
|
|
|
template <class charT> |
|
class is_byte; |
|
|
|
JM_TEMPLATE_SPECIALISE |
|
class is_byte<char> |
|
{ |
|
public: |
|
typedef __narrow_type width_type; |
|
}; |
|
|
|
JM_TEMPLATE_SPECIALISE |
|
class is_byte<unsigned char> |
|
{ |
|
public: |
|
typedef __narrow_type width_type; |
|
}; |
|
|
|
JM_TEMPLATE_SPECIALISE |
|
class is_byte<signed char> |
|
{ |
|
public: |
|
typedef __narrow_type width_type; |
|
}; |
|
|
|
template <class charT> |
|
class is_byte |
|
{ |
|
public: |
|
typedef __wide_type width_type; |
|
}; |
|
|
|
|
|
// |
|
// compiled structures |
|
// |
|
// the following defs describe the format of the compiled string |
|
// |
|
|
|
// |
|
// enum syntax_element_type |
|
// describes the type of a record |
|
enum syntax_element_type |
|
{ |
|
syntax_element_startmark = 0, |
|
syntax_element_endmark = syntax_element_startmark + 1, |
|
syntax_element_literal = syntax_element_endmark + 1, |
|
syntax_element_start_line = syntax_element_literal + 1, |
|
syntax_element_end_line = syntax_element_start_line + 1, |
|
syntax_element_wild = syntax_element_end_line + 1, |
|
syntax_element_match = syntax_element_wild + 1, |
|
syntax_element_word_boundary = syntax_element_match + 1, |
|
syntax_element_within_word = syntax_element_word_boundary + 1, |
|
syntax_element_word_start = syntax_element_within_word + 1, |
|
syntax_element_word_end = syntax_element_word_start + 1, |
|
syntax_element_buffer_start = syntax_element_word_end + 1, |
|
syntax_element_buffer_end = syntax_element_buffer_start + 1, |
|
syntax_element_backref = syntax_element_buffer_end + 1, |
|
syntax_element_long_set = syntax_element_backref + 1, |
|
syntax_element_set = syntax_element_long_set + 1, |
|
syntax_element_jump = syntax_element_set + 1, |
|
syntax_element_alt = syntax_element_jump + 1, |
|
syntax_element_rep = syntax_element_alt + 1, |
|
syntax_element_combining = syntax_element_rep + 1, |
|
syntax_element_soft_buffer_end = syntax_element_combining + 1, |
|
syntax_element_restart_continue = syntax_element_soft_buffer_end + 1 |
|
}; |
|
|
|
union offset_type |
|
{ |
|
re_syntax_base* p; |
|
unsigned i; |
|
}; |
|
|
|
// |
|
// struct re_syntax_base |
|
// base class for all syntax types: |
|
struct re_syntax_base |
|
{ |
|
syntax_element_type type; |
|
offset_type next; |
|
unsigned int can_be_null; |
|
}; |
|
|
|
// |
|
// struct re_brace |
|
// marks start or end of (...) |
|
struct re_brace : public re_syntax_base |
|
{ |
|
unsigned int index; |
|
}; |
|
|
|
// |
|
// struct re_literal |
|
// marks a literal string and |
|
// is followed by an array of charT[length]: |
|
struct re_literal : public re_syntax_base |
|
{ |
|
unsigned int length; |
|
}; |
|
|
|
// |
|
// struct re_long_set |
|
// provides data for sets [...] containing |
|
// wide characters |
|
struct re_set_long : public re_syntax_base |
|
{ |
|
unsigned int csingles, cranges, cequivalents; |
|
jm_uintfast32_t cclasses; |
|
bool isnot; |
|
}; |
|
|
|
// |
|
// struct re_set |
|
// provides a map of bools for sets containing |
|
// narrow, single byte characters. |
|
struct re_set : public re_syntax_base |
|
{ |
|
unsigned char __map[256]; |
|
}; |
|
|
|
// |
|
// struct re_jump |
|
// provides alternative next destination |
|
struct re_jump : public re_syntax_base |
|
{ |
|
offset_type alt; |
|
unsigned char __map[256]; |
|
}; |
|
|
|
// |
|
// struct re_repeat |
|
// provides repeat expressions |
|
struct re_repeat : public re_jump |
|
{ |
|
unsigned min, max; |
|
int id; |
|
bool leading; |
|
}; |
|
|
|
|
|
// |
|
// enum re_jump_size_type |
|
// provides compiled size of re_jump |
|
// allowing for trailing alignment |
|
// provide this so we know how many |
|
// bytes to insert |
|
enum re_jump_size_type |
|
{ |
|
re_jump_size = (sizeof(re_jump) + sizeof(padding) - 1) & ~(sizeof(padding) - 1), |
|
re_repeater_size = (sizeof(re_repeat) + sizeof(padding) - 1) & ~(sizeof(padding) - 1) |
|
}; |
|
|
|
|
|
// |
|
// class basic_regex |
|
// handles error codes and flags |
|
|
|
class JM_IX_DECL regbase |
|
{ |
|
protected: |
|
#ifdef RE_LOCALE_CPP |
|
__JM_STD::locale locale_inst; |
|
#endif |
|
jm_uintfast32_t _flags; |
|
unsigned int code; |
|
public: |
|
enum flag_type |
|
{ |
|
escape_in_lists = 1, // '\' special inside [...] |
|
char_classes = escape_in_lists << 1, // [[:CLASS:]] allowed |
|
intervals = char_classes << 1, // {x,y} allowed |
|
limited_ops = intervals << 1, // all of + ? and | are normal characters |
|
newline_alt = limited_ops << 1, // \n is the same as | |
|
bk_plus_qm = newline_alt << 1, // uses \+ and \? |
|
bk_braces = bk_plus_qm << 1, // uses \{ and \} |
|
bk_parens = bk_braces << 1, // uses \( and \) |
|
bk_refs = bk_parens << 1, // \d allowed |
|
bk_vbar = bk_refs << 1, // uses \| |
|
use_except = bk_vbar << 1, // exception on error |
|
failbit = use_except << 1, // error flag |
|
literal = failbit << 1, // all characters are literals |
|
icase = literal << 1, // characters are matched regardless of case |
|
nocollate = icase << 1, // don't use locale specific collation |
|
|
|
basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs, |
|
extended = char_classes | intervals | bk_refs, |
|
normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate |
|
}; |
|
|
|
enum restart_info |
|
{ |
|
restart_any = 0, |
|
restart_word = 1, |
|
restart_line = 2, |
|
restart_buf = 3, |
|
restart_continue = 4, |
|
restart_lit = 5, |
|
restart_fixed_lit = 6 |
|
}; |
|
|
|
unsigned int RE_CALL error_code()const |
|
{ |
|
return code; |
|
} |
|
|
|
void RE_CALL fail(unsigned int err); |
|
|
|
jm_uintfast32_t RE_CALL flags()const |
|
{ |
|
return _flags; |
|
} |
|
#ifdef RE_LOCALE_CPP |
|
__JM_STD::string RE_CALL errmsg()const |
|
{ |
|
return re_get_error_str(code, locale_inst); |
|
} |
|
#else |
|
const char* RE_CALL errmsg()const |
|
{ |
|
return re_get_error_str(code); |
|
} |
|
#endif |
|
|
|
regbase(); |
|
regbase(const regbase& b); |
|
|
|
#ifdef RE_LOCALE_CPP |
|
__JM_STD::locale RE_CALL imbue(const __JM_STD::locale& l); |
|
|
|
const __JM_STD::locale& RE_CALL locale()const |
|
{ |
|
return locale_inst; |
|
} |
|
#endif |
|
}; |
|
|
|
// |
|
// some forward declarations: |
|
|
|
template <class iterator, class Allocator JM_DEF_ALLOC_PARAM(iterator) > |
|
class reg_match; |
|
|
|
template <class iterator, class Allocator> |
|
class __priv_match_data; |
|
|
|
|
|
// |
|
// class reg_expression |
|
// represents the compiled |
|
// regular expression: |
|
// |
|
|
|
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) |
|
// |
|
// Ugly ugly hack, |
|
// template don't merge if they contain switch statements so declare these |
|
// templates in unnamed namespace (ie with internal linkage), each translation |
|
// unit then gets its own local copy, it works seemlessly but bloats the app. |
|
namespace{ |
|
#endif |
|
|
|
template <class charT, class traits JM_TRICKY_DEFAULT_PARAM(char_regex_traits<charT>), class Allocator JM_DEF_ALLOC_PARAM(charT) > |
|
class reg_expression : public regbase |
|
{ |
|
public: |
|
// typedefs: |
|
typedef Allocator alloc_type; |
|
typedef typename REBIND_TYPE(charT, alloc_type)::size_type size_type; |
|
typedef charT value_type; |
|
typedef charT char_type; |
|
typedef traits traits_type; |
|
typedef typename traits_type::size_type traits_size_type; |
|
typedef typename traits_type::uchar_type traits_uchar_type; |
|
|
|
private: |
|
#if defined(RE_LOCALE_C) || defined(RE_LOCALE_W32) |
|
re_initialiser<charT> locale_initialiser; |
|
#endif |
|
raw_storage<Allocator> data; |
|
unsigned _restart_type; |
|
unsigned marks; |
|
int repeats; |
|
unsigned char* startmap; |
|
charT* _expression; |
|
unsigned int _leading_len; |
|
const charT* _leading_string; |
|
unsigned int _leading_string_len; |
|
kmp_info<charT>* pkmp; |
|
|
|
void RE_CALL compile_maps(); |
|
void RE_CALL compile_map(re_syntax_base* node, unsigned char* __map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal = NULL)const; |
|
bool RE_CALL probe_start(re_syntax_base* node, charT c, re_syntax_base* terminal)const; |
|
bool RE_CALL probe_start_null(re_syntax_base* node, re_syntax_base* terminal)const; |
|
void RE_CALL fixup_apply(re_syntax_base* b, unsigned cbraces); |
|
void RE_CALL move_offsets(re_syntax_base* j, unsigned size); |
|
re_syntax_base* RE_CALL compile_set(const charT*& first, const charT* last); |
|
re_syntax_base* RE_CALL compile_set_aux(jstack<re_str<charT>, Allocator>& singles, jstack<re_str<charT>, Allocator>& ranges, jstack<jm_uintfast32_t, Allocator>& classes, jstack<re_str<charT>, Allocator>& equivalents, bool isnot, const __narrow_type&); |
|
re_syntax_base* RE_CALL compile_set_aux(jstack<re_str<charT>, Allocator>& singles, jstack<re_str<charT>, Allocator>& ranges, jstack<jm_uintfast32_t, Allocator>& classes, jstack<re_str<charT>, Allocator>& equivalents, bool isnot, const __wide_type&); |
|
re_syntax_base* RE_CALL compile_set_simple(re_syntax_base* dat, unsigned long cls, bool isnot = false); |
|
unsigned int RE_CALL parse_inner_set(const charT*& first, const charT* last); |
|
|
|
re_syntax_base* RE_CALL add_simple(re_syntax_base* dat, syntax_element_type type, unsigned int size = sizeof(re_syntax_base)); |
|
re_syntax_base* RE_CALL add_literal(re_syntax_base* dat, charT c); |
|
charT RE_CALL parse_escape(const charT*& first, const charT* last); |
|
void RE_CALL parse_range(const charT*& first, const charT* last, unsigned& min, unsigned& max); |
|
bool RE_CALL skip_space(const charT*& first, const charT* last); |
|
unsigned int RE_CALL probe_restart(re_syntax_base* dat); |
|
unsigned int RE_CALL fixup_leading_rep(re_syntax_base* dat, re_syntax_base* end); |
|
|
|
public: |
|
unsigned int RE_CALL set_expression(const charT* p, const charT* end, jm_uintfast32_t f = regbase::normal); |
|
unsigned int RE_CALL set_expression(const charT* p, jm_uintfast32_t f = regbase::normal) { return set_expression(p, p + traits_type::length(p), f); } |
|
reg_expression(const Allocator& a = Allocator()); |
|
reg_expression(const charT* p, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator()); |
|
reg_expression(const charT* p1, const charT* p2, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator()); |
|
reg_expression(const charT* p, size_type len, jm_uintfast32_t f, const Allocator& a = Allocator()); |
|
reg_expression(const reg_expression&); |
|
~reg_expression(); |
|
reg_expression& RE_CALL operator=(const reg_expression&); |
|
|
|
#ifndef JM_NO_MEMBER_TEMPLATES |
|
|
|
template <class ST, class SA> |
|
unsigned int RE_CALL set_expression(const __JM_STD::basic_string<charT, ST, SA>& p, jm_uintfast32_t f = regbase::normal) |
|
{ return set_expression(p.data(), p.data() + p.size(), f); } |
|
|
|
template <class ST, class SA> |
|
reg_expression(const __JM_STD::basic_string<charT, ST, SA>& p, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator()) |
|
: data(a), pkmp(0) { set_expression(p, f); } |
|
|
|
#elif !defined(JM_NO_STRING_DEF_ARGS) |
|
unsigned int RE_CALL set_expression(const __JM_STD::basic_string<charT>& p, jm_uintfast32_t f = regbase::normal) |
|
{ return set_expression(p.data(), p.data() + p.size(), f); } |
|
|
|
reg_expression(const __JM_STD::basic_string<charT>& p, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator()) |
|
: data(a), pkmp(0) { set_expression(p, f); } |
|
|
|
#endif |
|
|
|
|
|
bool RE_CALL operator==(const reg_expression&); |
|
bool RE_CALL operator<(const reg_expression&); |
|
alloc_type RE_CALL allocator()const; |
|
const charT* RE_CALL expression()const { return _expression; } |
|
unsigned RE_CALL mark_count()const { return marks; } |
|
|
|
#if !defined(JM_NO_TEMPLATE_FRIEND) && (!defined(JM_NO_TEMPLATE_SWITCH_MERGE) || defined(JM_NO_NAMESPACES)) |
|
#if 0 |
|
template <class Predicate, class I, class charT, class traits, class A, class A2> |
|
friend unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a); |
|
|
|
template <class I, class A, class charT, class traits, class A2> |
|
friend bool query_match(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags); |
|
|
|
template <class I, class A, class charT, class traits, class A2> |
|
friend bool query_match_aux(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, |
|
unsigned flags, __priv_match_data<I, A>& pd, I* restart); |
|
|
|
template <class I, class A, class charT, class traits, class A2> |
|
friend bool reg_search(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags); |
|
private: |
|
#endif |
|
#endif |
|
|
|
int RE_CALL repeat_count() const { return repeats; } |
|
unsigned int RE_CALL restart_type()const { return _restart_type; } |
|
const re_syntax_base* RE_CALL first()const { return (const re_syntax_base*)data.data(); } |
|
const unsigned char* RE_CALL get_map()const { return startmap; } |
|
unsigned int RE_CALL leading_length()const { return _leading_len; } |
|
const kmp_info<charT>* get_kmp()const { return pkmp; } |
|
static bool RE_CALL can_start(charT c, const unsigned char* __map, unsigned char mask, const __wide_type&); |
|
static bool RE_CALL can_start(charT c, const unsigned char* __map, unsigned char mask, const __narrow_type&); |
|
}; |
|
|
|
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) |
|
} // namespace |
|
#endif |
|
|
|
|
|
// |
|
// class reg_match and reg_match_base |
|
// handles what matched where |
|
|
|
template <class iterator> |
|
struct sub_match |
|
{ |
|
iterator first; |
|
iterator second; |
|
bool matched; |
|
#ifndef JM_NO_MEMBER_TEMPLATES |
|
template <class charT, class traits, class Allocator> |
|
operator __JM_STD::basic_string<charT, traits, Allocator> ()const; |
|
#elif !defined(JM_NO_STRING_DEF_ARGS) |
|
operator __JM_STD::basic_string<char> ()const; |
|
operator __JM_STD::basic_string<wchar_t> ()const; |
|
#endif |
|
operator int()const; |
|
operator unsigned int()const; |
|
operator short()const |
|
{ |
|
return (short)(int)(*this); |
|
} |
|
operator unsigned short()const |
|
{ |
|
return (unsigned short)(unsigned int)(*this); |
|
} |
|
sub_match() { matched = false; } |
|
sub_match(iterator i) : first(i), second(i), matched(false) {} |
|
}; |
|
|
|
#ifndef JM_NO_MEMBER_TEMPLATES |
|
template <class iterator> |
|
template <class charT, class traits, class Allocator> |
|
sub_match<iterator>::operator __JM_STD::basic_string<charT, traits, Allocator> ()const |
|
{ |
|
#if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO) |
|
if(typeid(charT) != typeid(*first)) |
|
throw __JM_STD::bad_cast(); |
|
#endif |
|
__JM_STD::basic_string<charT, traits, Allocator> result; |
|
iterator i = first; |
|
while(i != second) |
|
{ |
|
result.append(1, *i); |
|
++i; |
|
} |
|
return result; |
|
} |
|
#elif !defined(JM_NO_STRING_DEF_ARGS) |
|
template <class iterator> |
|
sub_match<iterator>::operator __JM_STD::basic_string<char> ()const |
|
{ |
|
#if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO) |
|
if(typeid(char) != typeid(*first)) |
|
throw __JM_STD::bad_cast(); |
|
#endif |
|
__JM_STD::basic_string<char> result; |
|
iterator i = first; |
|
while(i != second) |
|
{ |
|
result.append(1, *i); |
|
++i; |
|
} |
|
return result; |
|
} |
|
template <class iterator> |
|
sub_match<iterator>::operator __JM_STD::basic_string<wchar_t> ()const |
|
{ |
|
#if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO) |
|
if(typeid(wchar_t) != typeid(*first)) |
|
throw __JM_STD::bad_cast(); |
|
#endif |
|
__JM_STD::basic_string<wchar_t> result; |
|
iterator i = first; |
|
while(i != second) |
|
{ |
|
result.append(1, *i); |
|
++i; |
|
} |
|
return result; |
|
} |
|
#endif |
|
template <class iterator> |
|
sub_match<iterator>::operator int()const |
|
{ |
|
iterator i = first; |
|
int neg = 1; |
|
if((i != second) && (*i == '-')) |
|
{ |
|
neg = -1; |
|
++i; |
|
} |
|
neg *= (int)re_toi(i, second, 10 MAYBE_PASS_LOCALE(__JM_STD::locale())); |
|
#if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO) |
|
if(i != second) |
|
{ |
|
throw __JM_STD::bad_cast(); |
|
} |
|
#endif |
|
return neg; |
|
} |
|
template <class iterator> |
|
sub_match<iterator>::operator unsigned int()const |
|
{ |
|
iterator i = first; |
|
unsigned int result = (int)re_toi(i, second, 10 MAYBE_PASS_LOCALE(__JM_STD::locale())); |
|
#if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO) |
|
if(i != second) |
|
{ |
|
throw __JM_STD::bad_cast(); |
|
} |
|
#endif |
|
return result; |
|
} |
|
|
|
|
|
template <class iterator, class Allocator JM_DEF_ALLOC_PARAM(iterator) > |
|
class reg_match_base |
|
{ |
|
public: |
|
typedef Allocator alloc_type; |
|
typedef typename REBIND_TYPE(iterator, Allocator)::size_type size_type; |
|
typedef JM_MAYBE_TYPENAME REBIND_TYPE(char, Allocator) c_alloc; |
|
typedef iterator value_type; |
|
|
|
protected: |
|
struct reference : public c_alloc |
|
{ |
|
unsigned int cmatches; |
|
unsigned count; |
|
sub_match<iterator> head, tail, null; |
|
unsigned int lines; |
|
iterator line_pos; |
|
reference(const Allocator& a) : c_alloc(a) { } |
|
}; |
|
|
|
reference* ref; |
|
|
|
void RE_CALL cow(); |
|
|
|
// protected contructor for derived class... |
|
reg_match_base(bool){} |
|
void RE_CALL free(); |
|
|
|
public: |
|
|
|
reg_match_base(const Allocator& a = Allocator()); |
|
|
|
reg_match_base(const reg_match_base& m) |
|
{ |
|
ref = m.ref; |
|
++(ref->count); |
|
} |
|
|
|
reg_match_base& RE_CALL operator=(const reg_match_base& m); |
|
|
|
~reg_match_base() |
|
{ |
|
free(); |
|
} |
|
|
|
size_type RE_CALL size()const |
|
{ |
|
return ref->cmatches; |
|
} |
|
|
|
const sub_match<iterator>& RE_CALL operator[](int n) const |
|
{ |
|
if((n >= 0) && ((unsigned int)n < ref->cmatches)) |
|
return *(sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>)*n); |
|
return (n == -1) ? ref->head : (n == -2) ? ref->tail : ref->null; |
|
} |
|
|
|
Allocator RE_CALL allocator()const; |
|
|
|
size_t RE_CALL length()const |
|
{ |
|
jm_assert(ref->cmatches); |
|
size_t n = 0; |
|
JM_DISTANCE(((sub_match<iterator>*)(ref+1))->first, ((sub_match<iterator>*)(ref+1))->second, n); |
|
return n; |
|
} |
|
|
|
unsigned int RE_CALL line()const |
|
{ |
|
return ref->lines; |
|
} |
|
|
|
iterator RE_CALL line_start()const |
|
{ |
|
return ref->line_pos; |
|
} |
|
|
|
void swap(reg_match_base& that) |
|
{ |
|
reference* t = that.ref; |
|
that.ref = ref; |
|
ref = t; |
|
} |
|
|
|
friend class reg_match<iterator, Allocator>; |
|
#if !defined(JM_NO_TEMPLATE_FRIEND) && (!defined(JM_NO_TEMPLATE_SWITCH_MERGE) || defined(JM_NO_NAMESPACES)) |
|
private: |
|
template <class Predicate, class I, class charT, class traits, class A, class A2> |
|
friend unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a); |
|
|
|
template <class I, class A, class charT, class traits, class A2> |
|
friend bool query_match(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags); |
|
|
|
template <class I, class A, class charT, class traits, class A2> |
|
friend bool query_match_aux(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, |
|
unsigned flags, __priv_match_data<I, A>& pd, I* restart); |
|
|
|
template <class I, class A, class charT, class traits, class A2> |
|
friend bool reg_search(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags); |
|
#endif |
|
void RE_CALL set_size(size_type n); |
|
void RE_CALL set_size(size_type n, iterator i, iterator j); |
|
void RE_CALL maybe_assign(const reg_match_base& m); |
|
void RE_CALL init_fail(iterator i, iterator j); |
|
|
|
void RE_CALL set_first(iterator i) |
|
{ |
|
cow(); |
|
((sub_match<iterator>*)(ref+1))->first = i; |
|
ref->head.second = i; |
|
ref->head.matched = (ref->head.first == ref->head.second) ? false : true; |
|
} |
|
|
|
void RE_CALL set_first(iterator i, size_t pos) |
|
{ |
|
cow(); |
|
((sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>) * pos))->first = i; |
|
if(pos == 0) |
|
{ |
|
ref->head.second = i; |
|
ref->head.matched = (ref->head.first == ref->head.second) ? false : true; |
|
} |
|
} |
|
|
|
void RE_CALL set_second(iterator i) |
|
{ |
|
cow(); |
|
((sub_match<iterator>*)(ref+1))->second = i; |
|
((sub_match<iterator>*)(ref+1))->matched = true; |
|
ref->tail.first = i; |
|
ref->tail.matched = (ref->tail.first == ref->tail.second) ? false : true; |
|
} |
|
|
|
void RE_CALL set_second(iterator i, size_t pos) |
|
{ |
|
cow(); |
|
((sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>) * pos))->second = i; |
|
((sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>) * pos))->matched = true; |
|
if(pos == 0) |
|
{ |
|
ref->tail.first = i; |
|
ref->tail.matched = (ref->tail.first == ref->tail.second) ? false : true; |
|
} |
|
} |
|
|
|
void RE_CALL set_line(unsigned int i, iterator pos) |
|
{ |
|
ref->lines = i; |
|
ref->line_pos = pos; |
|
} |
|
}; |
|
|
|
template <class iterator, class Allocator> |
|
reg_match_base<iterator, Allocator>::reg_match_base(const Allocator& a) |
|
{ |
|
ref = (reference*)c_alloc(a).allocate(sizeof(sub_match<iterator>) + sizeof(reference)); |
|
#ifndef JM_NO_EXCEPTIONS |
|
try |
|
{ |
|
#endif |
|
new (ref) reference(a); |
|
ref->cmatches = 1; |
|
ref->count = 1; |
|
// construct the sub_match<iterator>: |
|
#ifndef JM_NO_EXCEPTIONS |
|
try |
|
{ |
|
#endif |
|
new ((sub_match<iterator>*)(ref+1)) sub_match<iterator>(); |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
jm_destroy(ref); |
|
throw; |
|
} |
|
#endif |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
c_alloc(a).deallocate((char*)(void*)ref, sizeof(sub_match<iterator>) + sizeof(reference)); |
|
throw; |
|
} |
|
#endif |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
Allocator RE_CALL reg_match_base<iterator, Allocator>::allocator()const |
|
{ |
|
return *((c_alloc*)ref); |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
inline reg_match_base<iterator, Allocator>& RE_CALL reg_match_base<iterator, Allocator>::operator=(const reg_match_base<iterator, Allocator>& m) |
|
{ |
|
if(ref != m.ref) |
|
{ |
|
free(); |
|
ref = m.ref; |
|
++(ref->count); |
|
} |
|
return *this; |
|
} |
|
|
|
|
|
template <class iterator, class Allocator> |
|
void RE_CALL reg_match_base<iterator, Allocator>::free() |
|
{ |
|
if(--(ref->count) == 0) |
|
{ |
|
c_alloc a(*ref); |
|
sub_match<iterator>* p1, *p2; |
|
p1 = (sub_match<iterator>*)(ref+1); |
|
p2 = p1 + ref->cmatches; |
|
while(p1 != p2) |
|
{ |
|
jm_destroy(p1); |
|
++p1; |
|
} |
|
jm_destroy(ref); |
|
a.deallocate((char*)(void*)ref, sizeof(sub_match<iterator>) * ref->cmatches + sizeof(reference)); |
|
} |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
void RE_CALL reg_match_base<iterator, Allocator>::set_size(size_type n) |
|
{ |
|
if(ref->cmatches != n) |
|
{ |
|
reference* newref = (reference*)ref->allocate(sizeof(sub_match<iterator>) * n + sizeof(reference)); |
|
#ifndef JM_NO_EXCEPTIONS |
|
try |
|
{ |
|
#endif |
|
new (newref) reference(*ref); |
|
newref->count = 1; |
|
newref->cmatches = n; |
|
sub_match<iterator>* p1, *p2; |
|
p1 = (sub_match<iterator>*)(newref+1); |
|
p2 = p1 + newref->cmatches; |
|
#ifndef JM_NO_EXCEPTIONS |
|
try |
|
{ |
|
#endif |
|
while(p1 != p2) |
|
{ |
|
new (p1) sub_match<iterator>(); |
|
++p1; |
|
} |
|
free(); |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
p2 = (sub_match<iterator>*)(newref+1); |
|
while(p2 != p1) |
|
{ |
|
jm_destroy(p2); |
|
++p2; |
|
} |
|
jm_destroy(ref); |
|
throw; |
|
} |
|
#endif |
|
ref = newref; |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * n + sizeof(reference)); |
|
throw; |
|
} |
|
#endif |
|
} |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
void RE_CALL reg_match_base<iterator, Allocator>::set_size(size_type n, iterator i, iterator j) |
|
{ |
|
if(ref->cmatches != n) |
|
{ |
|
reference* newref = (reference*)ref->allocate(sizeof(sub_match<iterator>) * n + sizeof(reference));; |
|
#ifndef JM_NO_EXCEPTIONS |
|
try{ |
|
#endif |
|
new (newref) reference(*ref); |
|
newref->count = 1; |
|
newref->cmatches = n; |
|
sub_match<iterator>* p1, *p2; |
|
p1 = (sub_match<iterator>*)(newref+1); |
|
p2 = p1 + newref->cmatches; |
|
#ifndef JM_NO_EXCEPTIONS |
|
try |
|
{ |
|
#endif |
|
while(p1 != p2) |
|
{ |
|
new (p1) sub_match<iterator>(j); |
|
++p1; |
|
} |
|
free(); |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
p2 = (sub_match<iterator>*)(newref+1); |
|
while(p2 != p1) |
|
{ |
|
jm_destroy(p2); |
|
++p2; |
|
} |
|
jm_destroy(ref); |
|
throw; |
|
} |
|
#endif |
|
ref = newref; |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * n + sizeof(reference)); |
|
throw; |
|
} |
|
#endif |
|
} |
|
else |
|
{ |
|
cow(); |
|
// set iterators to be i, matched to false: |
|
sub_match<iterator>* p1, *p2; |
|
p1 = (sub_match<iterator>*)(ref+1); |
|
p2 = p1 + ref->cmatches; |
|
while(p1 != p2) |
|
{ |
|
p1->first = j; |
|
p1->second = j; |
|
p1->matched = false; |
|
++p1; |
|
} |
|
} |
|
ref->head.first = i; |
|
ref->tail.second = j; |
|
ref->head.matched = ref->tail.matched = true; |
|
ref->null.first = ref->null.second = j; |
|
ref->null.matched = false; |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
inline void RE_CALL reg_match_base<iterator, Allocator>::init_fail(iterator i, iterator j) |
|
{ |
|
set_size(ref->cmatches, i, j); |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
void RE_CALL reg_match_base<iterator, Allocator>::maybe_assign(const reg_match_base<iterator, Allocator>& m) |
|
{ |
|
sub_match<iterator>* p1, *p2; |
|
p1 = (sub_match<iterator>*)(ref+1); |
|
p2 = (sub_match<iterator>*)(m.ref+1); |
|
unsigned int len1, len2; |
|
unsigned int i; |
|
for(i = 0; i < ref->cmatches; ++i) |
|
{ |
|
len1 = len2 = 0; |
|
JM_DISTANCE(p1->first, p1->second, len1); |
|
JM_DISTANCE(p2->first, p2->second, len2); |
|
if((len1 != len2) || ((p1->matched == false) && (p2->matched == true))) |
|
break; |
|
if((p1->matched == true) && (p2->matched == false)) |
|
return; |
|
++p1; |
|
++p2; |
|
} |
|
if(i == ref->cmatches) |
|
return; |
|
if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) ) |
|
*this = m; |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
void RE_CALL reg_match_base<iterator, Allocator>::cow() |
|
{ |
|
if(ref->count > 1) |
|
{ |
|
reference* newref = (reference*)ref->allocate(sizeof(sub_match<iterator>) * ref->cmatches + sizeof(reference)); |
|
#ifndef JM_NO_EXCEPTIONS |
|
try{ |
|
#endif |
|
new (newref) reference(*ref); |
|
newref->count = 1; |
|
sub_match<iterator>* p1, *p2, *p3; |
|
p1 = (sub_match<iterator>*)(newref+1); |
|
p2 = p1 + newref->cmatches; |
|
p3 = (sub_match<iterator>*)(ref+1); |
|
#ifndef JM_NO_EXCEPTIONS |
|
try{ |
|
#endif |
|
while(p1 != p2) |
|
{ |
|
new (p1) sub_match<iterator>(*p3); |
|
++p1; |
|
++p3; |
|
} |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
p2 = (sub_match<iterator>*)(newref+1); |
|
while(p2 != p1) |
|
{ |
|
jm_destroy(p2); |
|
++p2; |
|
} |
|
jm_destroy(ref); |
|
throw; |
|
} |
|
#endif |
|
--(ref->count); |
|
ref = newref; |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * ref->cmatches + sizeof(reference)); |
|
throw; |
|
} |
|
#endif |
|
} |
|
} |
|
|
|
// |
|
// class reg_match |
|
// encapsulates reg_match_base, does a deep copy rather than |
|
// reference counting to ensure thread safety when copying |
|
// other reg_match instances |
|
|
|
template <class iterator, class Allocator> |
|
class reg_match : public reg_match_base<iterator, Allocator> |
|
{ |
|
public: |
|
reg_match(const Allocator& a = Allocator()) |
|
: reg_match_base<iterator, Allocator>(a){} |
|
|
|
reg_match(const reg_match_base<iterator, Allocator>& m) |
|
: reg_match_base<iterator, Allocator>(m){} |
|
|
|
reg_match& operator=(const reg_match_base<iterator, Allocator>& m) |
|
{ |
|
// shallow copy |
|
reg_match_base<iterator, Allocator>::operator=(m); |
|
return *this; |
|
} |
|
|
|
reg_match(const reg_match& m); |
|
reg_match& operator=(const reg_match& m); |
|
|
|
}; |
|
|
|
template <class iterator, class Allocator> |
|
reg_match<iterator, Allocator>::reg_match(const reg_match<iterator, Allocator>& m) |
|
: reg_match_base<iterator, Allocator>(false) |
|
{ |
|
reg_match_base<iterator, Allocator>::ref = (typename reg_match_base<iterator, Allocator>::reference *)m.ref->allocate(sizeof(sub_match<iterator>) * m.ref->cmatches + sizeof(typename reg_match_base<iterator, Allocator>::reference)); |
|
#ifndef JM_NO_EXCEPTIONS |
|
try{ |
|
#endif |
|
new (reg_match_base<iterator, Allocator>::ref) typename reg_match_base<iterator, Allocator>::reference(*m.ref); |
|
reg_match_base<iterator, Allocator>::ref->count = 1; |
|
sub_match<iterator>* p1, *p2, *p3; |
|
p1 = (sub_match<iterator>*)(reg_match_base<iterator, Allocator>::ref+1); |
|
p2 = p1 + reg_match_base<iterator, Allocator>::ref->cmatches; |
|
p3 = (sub_match<iterator>*)(m.ref+1); |
|
#ifndef JM_NO_EXCEPTIONS |
|
try{ |
|
#endif |
|
while(p1 != p2) |
|
{ |
|
new (p1) sub_match<iterator>(*p3); |
|
++p1; |
|
++p3; |
|
} |
|
#ifndef JM_NO_EXCEPTIONS |
|
} |
|
catch(...) |
|
{ |
|
p2 = (sub_match<iterator>*)(reg_match_base<iterator, Allocator>::ref+1); |
|
while(p2 != p1) |
|
{ |
|
jm_destroy(p2); |
|
++p2; |
|
} |
|
jm_destroy(ref); |
|
throw; |
|
} |
|
} |
|
catch(...) |
|
{ |
|
m.ref->deallocate((char*)(void*)reg_match_base<iterator, Allocator>::ref, sizeof(sub_match<iterator>) * m.ref->cmatches + sizeof(typename reg_match_base<iterator, Allocator>::reference)); |
|
throw; |
|
} |
|
#endif |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
reg_match<iterator, Allocator>& reg_match<iterator, Allocator>::operator=(const reg_match<iterator, Allocator>& m) |
|
{ |
|
reg_match<iterator, Allocator> t(m); |
|
this->swap(t); |
|
return *this; |
|
} |
|
|
|
|
|
template <class iterator, class charT, class traits_type, class Allocator> |
|
iterator RE_CALL re_is_set_member(iterator next, |
|
iterator last, |
|
re_set_long* set, |
|
const reg_expression<charT, traits_type, Allocator>& e); |
|
|
|
JM_END_NAMESPACE // namespace regex |
|
|
|
#include <jm/regcomp.h> |
|
|
|
JM_NAMESPACE(__JM) |
|
|
|
typedef reg_expression<char, char_regex_traits<char>, JM_DEF_ALLOC(char)> regex; |
|
#ifndef JM_NO_WCSTRING |
|
typedef reg_expression<wchar_t, char_regex_traits<wchar_t>, JM_DEF_ALLOC(wchar_t)> wregex; |
|
#endif |
|
|
|
typedef reg_match<const char*, regex::alloc_type> cmatch; |
|
#ifndef JM_NO_WCSTRING |
|
typedef reg_match<const wchar_t*, wregex::alloc_type> wcmatch; |
|
#endif |
|
|
|
JM_END_NAMESPACE // namespace regex |
|
|
|
#include <jm/regmatch.h> |
|
#include <jm/regfmt.h> |
|
|
|
#if !defined(JM_NO_NAMESPACES) && !defined(JM_NO_USING) |
|
|
|
#ifndef JM_NO_EXCEPTIONS |
|
using __JM::bad_expression; |
|
#endif |
|
using __JM::char_regex_traits; |
|
using __JM::char_regex_traits_i; |
|
using __JM::regbase; |
|
using __JM::reg_expression; |
|
using __JM::reg_match; |
|
using __JM::reg_match_base; |
|
using __JM::sub_match; |
|
using __JM::regex; |
|
using __JM::cmatch; |
|
#ifndef JM_NO_WCSTRING |
|
using __JM::wregex; |
|
using __JM::wcmatch; |
|
#endif |
|
using __JM::query_match; |
|
using __JM::reg_search; |
|
using __JM::reg_grep; |
|
using __JM::reg_format; |
|
using __JM::reg_merge; |
|
using __JM::jm_def_alloc; |
|
|
|
#endif |
|
|
|
#endif // __cplusplus |
|
|
|
#endif // include |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|