You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1707 lines
55 KiB
1707 lines
55 KiB
//========= Copyright Valve Corporation, All rights reserved. ============// |
|
// |
|
// Purpose: |
|
// |
|
// $NoKeywords: $ |
|
// |
|
//=============================================================================// |
|
/* |
|
* |
|
* Copyright (c) 1998-9 |
|
* Dr John Maddock |
|
* |
|
* Permission to use, copy, modify, distribute and sell this software |
|
* and its documentation for any purpose is hereby granted without fee, |
|
* provided that the above copyright notice appear in all copies and |
|
* that both that copyright notice and this permission notice appear |
|
* in supporting documentation. Dr John Maddock makes no representations |
|
* about the suitability of this software for any purpose. |
|
* It is provided "as is" without express or implied warranty. |
|
* |
|
*/ |
|
|
|
/* |
|
* FILE regmatch.h |
|
* VERSION 2.12 |
|
* regular expression matching algorithms |
|
*/ |
|
|
|
|
|
#ifndef __REGMATCH_H |
|
#define __REGMATCH_H |
|
|
|
|
|
JM_NAMESPACE(__JM) |
|
|
|
template <class iterator, class charT, class traits_type, class Allocator> |
|
iterator RE_CALL re_is_set_member(iterator next, |
|
iterator last, |
|
re_set_long* set, |
|
const reg_expression<charT, traits_type, Allocator>& e) |
|
{ |
|
const charT* p = (const charT*)(set+1); |
|
iterator ptr; |
|
unsigned int i; |
|
bool icase = e.flags() & regbase::icase; |
|
|
|
// try and match a single character, could be a multi-character |
|
// collating element... |
|
for(i = 0; i < set->csingles; ++i) |
|
{ |
|
ptr = next; |
|
while(*p && (ptr != last)) |
|
{ |
|
if(traits_type::translate(*ptr, icase MAYBE_PASS_LOCALE(e.locale())) != *p) |
|
break; |
|
++p; |
|
++ptr; |
|
} |
|
if(*p == 0) // if null we've matched |
|
return set->isnot ? next : (ptr == next) ? ++next : ptr; |
|
|
|
while(*p)++p; |
|
++p; // skip null |
|
} |
|
|
|
charT col = traits_type::translate(*next, icase MAYBE_PASS_LOCALE(e.locale())); |
|
|
|
|
|
if(set->cranges || set->cequivalents) |
|
{ |
|
re_str<charT> s2(col); |
|
re_str<charT> s1; |
|
// |
|
// try and match a range, NB only a single character can match |
|
if(set->cranges) |
|
{ |
|
if(e.flags() & regbase::nocollate) |
|
s1 = s2; |
|
else |
|
traits_type::transform(s1, s2 MAYBE_PASS_LOCALE(e.locale())); |
|
for(i = 0; i < set->cranges; ++i) |
|
{ |
|
if(s1 <= p) |
|
{ |
|
while(*p)++p; |
|
++p; |
|
if(s1 >= p) |
|
return set->isnot ? next : ++next; |
|
} |
|
else |
|
{ |
|
// skip first string |
|
while(*p)++p; |
|
++p; |
|
} |
|
// skip second string |
|
while(*p)++p; |
|
++p; |
|
} |
|
} |
|
// |
|
// try and match an equivalence class, NB only a single character can match |
|
if(set->cequivalents) |
|
{ |
|
traits_type::transform_primary(s1, s2 MAYBE_PASS_LOCALE(e.locale())); |
|
for(i = 0; i < set->cequivalents; ++i) |
|
{ |
|
if(s1 == p) |
|
return set->isnot ? next : ++next; |
|
// skip string |
|
while(*p)++p; |
|
++p; |
|
} |
|
} |
|
} |
|
|
|
if(traits_type::is_class(col, set->cclasses MAYBE_PASS_LOCALE(e.locale())) == true) |
|
return set->isnot ? next : ++next; |
|
return set->isnot ? ++next : next; |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
class __priv_match_data |
|
{ |
|
public: |
|
typedef JM_MAYBE_TYPENAME REBIND_TYPE(int, Allocator) i_alloc; |
|
typedef JM_MAYBE_TYPENAME REBIND_TYPE(iterator, Allocator) it_alloc; |
|
|
|
reg_match_base<iterator, Allocator> temp_match; |
|
// failure stacks: |
|
jstack<reg_match_base<iterator, Allocator>, Allocator> matches; |
|
jstack<iterator, Allocator> prev_pos; |
|
jstack<const re_syntax_base*, Allocator> prev_record; |
|
jstack<int, Allocator> prev_acc; |
|
int* accumulators; |
|
unsigned int caccumulators; |
|
iterator* loop_starts; |
|
|
|
__priv_match_data(const reg_match_base<iterator, Allocator>&); |
|
|
|
~__priv_match_data() |
|
{ |
|
free(); |
|
} |
|
void free(); |
|
void set_accumulator_size(unsigned int size); |
|
int* get_accumulators() |
|
{ |
|
return accumulators; |
|
} |
|
iterator* get_loop_starts() |
|
{ |
|
return loop_starts; |
|
} |
|
}; |
|
|
|
template <class iterator, class Allocator> |
|
__priv_match_data<iterator, Allocator>::__priv_match_data(const reg_match_base<iterator, Allocator>& m) |
|
: temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()) |
|
{ |
|
accumulators = 0; |
|
caccumulators = 0; |
|
loop_starts = 0; |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
void __priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size) |
|
{ |
|
if(size > caccumulators) |
|
{ |
|
free(); |
|
caccumulators = size; |
|
accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators); |
|
loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators); |
|
for(unsigned i = 0; i < caccumulators; ++i) |
|
new (loop_starts + i) iterator(); |
|
} |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
void __priv_match_data<iterator, Allocator>::free() |
|
{ |
|
if(caccumulators) |
|
{ |
|
//REBIND_INSTANCE(int, Allocator, temp_match.allocator()).deallocate(accumulators, caccumulators); |
|
i_alloc temp1(temp_match.allocator()); |
|
temp1.deallocate(accumulators, caccumulators); |
|
for(unsigned i = 0; i < caccumulators; ++i) |
|
jm_destroy(loop_starts + i); |
|
//REBIND_INSTANCE(iterator, Allocator, temp_match.allocator()).deallocate(loop_starts, caccumulators); |
|
it_alloc temp2(temp_match.allocator()); |
|
temp2.deallocate(loop_starts, caccumulators); |
|
} |
|
} |
|
|
|
// |
|
// proc query_match |
|
// returns true if the specified regular expression matches |
|
// at position first. Fills in what matched in m. |
|
// |
|
template <class iterator, class Allocator, class charT, class traits, class Allocator2> |
|
bool query_match(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) |
|
{ |
|
// prepare m for failure: |
|
if((flags & match_init) == 0) |
|
{ |
|
m.set_size(e.mark_count(), first, last); |
|
} |
|
__priv_match_data<iterator, Allocator> pd(m); |
|
iterator restart; |
|
return query_match_aux(first, last, m, e, flags, pd, &restart); |
|
} |
|
|
|
// |
|
// query_match convenience interfaces: |
|
#ifndef JM_NO_PARTIAL_FUNC_SPEC |
|
// |
|
// this isn't really a partial specialisation, but template function |
|
// overloading - if the compiler doesn't support partial specialisation |
|
// then it really won't support this either: |
|
template <class charT, class Allocator, class traits, class Allocator2> |
|
inline bool query_match(const charT* str, |
|
reg_match<const charT*, Allocator>& m, |
|
const reg_expression<charT, traits, Allocator2>& e, |
|
unsigned flags = match_default) |
|
{ |
|
return query_match(str, str + traits::length(str), m, e, flags); |
|
} |
|
|
|
#ifndef JM_NO_STRING_H |
|
template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2> |
|
inline bool query_match(const __JM_STD::basic_string<charT, ST, SA>& s, |
|
reg_match<typename __JM_STD::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, |
|
const reg_expression<charT, traits, Allocator2>& e, |
|
unsigned flags = match_default) |
|
{ |
|
return query_match(s.begin(), s.end(), m, e, flags); |
|
} |
|
#endif |
|
#else // partial specialisation |
|
inline bool query_match(const char* str, |
|
cmatch& m, |
|
const regex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return query_match(str, str + regex::traits_type::length(str), m, e, flags); |
|
} |
|
#ifndef JM_NO_WCSTRING |
|
inline bool query_match(const wchar_t* str, |
|
wcmatch& m, |
|
const wregex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return query_match(str, str + wregex::traits_type::length(str), m, e, flags); |
|
} |
|
#endif |
|
#ifndef JM_NO_STRING_H |
|
inline bool query_match(const __JM_STD::string& s, |
|
reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, |
|
const regex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return query_match(s.begin(), s.end(), m, e, flags); |
|
} |
|
#if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) |
|
inline bool query_match(const __JM_STD::basic_string<wchar_t>& s, |
|
reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>& m, |
|
const wregex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return query_match(s.begin(), s.end(), m, e, flags); |
|
} |
|
#endif |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) |
|
// |
|
// Ugly ugly hack, |
|
// template don't merge if they contain switch statements so declare these |
|
// templates in unnamed namespace (ie with internal linkage), each translation |
|
// unit then gets its own local copy, it works seemlessly but bloats the app. |
|
namespace{ |
|
#endif |
|
|
|
template <class iterator, class Allocator, class charT, class traits, class Allocator2> |
|
bool query_match_aux(iterator first, |
|
iterator last, |
|
reg_match<iterator, Allocator>& m, |
|
const reg_expression<charT, traits, Allocator2>& e, |
|
unsigned flags, |
|
__priv_match_data<iterator, Allocator>& pd, |
|
iterator* restart) |
|
{ |
|
if(e.flags() & regbase::failbit) |
|
return false; |
|
|
|
typedef typename traits::size_type traits_size_type; |
|
typedef typename traits::uchar_type traits_uchar_type; |
|
typedef typename is_byte<charT>::width_type width_type; |
|
|
|
#ifdef RE_LOCALE_CPP |
|
const __JM_STD::locale& locale_inst = e.locale(); |
|
#endif |
|
|
|
// declare some local aliases to reduce pointer loads |
|
// good optimising compilers should make this unnecessary!! |
|
jstack<reg_match_base<iterator, Allocator>, Allocator>& matches = pd.matches; |
|
jstack<iterator, Allocator>& prev_pos = pd.prev_pos; |
|
jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record; |
|
jstack<int, Allocator>& prev_acc = pd.prev_acc; |
|
reg_match_base<iterator, Allocator>& temp_match = pd.temp_match; |
|
temp_match.set_first(first); |
|
|
|
//temp_match.set_size(e.mark_count(), first, last); |
|
register const re_syntax_base* ptr = e.first(); |
|
bool match_found = false; |
|
bool need_push_match = (e.mark_count() > 1); |
|
int cur_acc = -1; // no active accumulator |
|
pd.set_accumulator_size(e.repeat_count()); |
|
int* accumulators = pd.get_accumulators(); |
|
iterator* start_loop = pd.get_loop_starts(); |
|
int k; // for loops |
|
bool icase = e.flags() & regbase::icase; |
|
*restart = first; |
|
iterator base = first; |
|
|
|
// prepare m for failure: |
|
/* |
|
if((flags & match_init) == 0) |
|
{ |
|
m.init_fail(first, last); |
|
} */ |
|
|
|
retry: |
|
|
|
while(first != last) |
|
{ |
|
jm_assert(ptr); |
|
switch(ptr->type) |
|
{ |
|
case syntax_element_match: |
|
match_jump: |
|
{ |
|
// match found, save then fallback in case we missed a |
|
// longer one. |
|
if((flags & match_not_null) && (first == temp_match[0].first)) |
|
goto failure; |
|
temp_match.set_second(first); |
|
m.maybe_assign(temp_match); |
|
match_found = true; |
|
if((flags & match_any) || ((first == last) && (need_push_match == false))) |
|
{ |
|
// either we don't care what we match or we've matched |
|
// the whole string and can't match anything longer. |
|
while(matches.empty() == false) |
|
matches.pop(); |
|
while(prev_pos.empty() == false) |
|
prev_pos.pop(); |
|
while(prev_record.empty() == false) |
|
prev_record.pop(); |
|
while(prev_acc.empty() == false) |
|
prev_acc.pop(); |
|
return true; |
|
} |
|
} |
|
goto failure; |
|
case syntax_element_startmark: |
|
temp_match.set_first(first, ((re_brace*)ptr)->index); |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_endmark: |
|
temp_match.set_second(first, ((re_brace*)ptr)->index); |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_literal: |
|
{ |
|
unsigned int len = ((re_literal*)ptr)->length; |
|
charT* what = (charT*)(((re_literal*)ptr) + 1); |
|
// |
|
// compare string with what we stored in |
|
// our records: |
|
for(unsigned int i = 0; i < len; ++i, ++first) |
|
{ |
|
if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != what[i])) |
|
goto failure; |
|
} |
|
ptr = ptr->next.p; |
|
break; |
|
} |
|
case syntax_element_start_line: |
|
outer_line_check: |
|
if(first == temp_match[0].first) |
|
{ |
|
// we're at the start of the buffer |
|
if(flags & match_prev_avail) |
|
{ |
|
inner_line_check: |
|
// check the previous value even though its before |
|
// the start of our "buffer". |
|
iterator t(first); |
|
--t; |
|
if(traits::is_separator(*t) && !((*t == '\r') && (*first == '\n')) ) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
goto failure; |
|
} |
|
if((flags & match_not_bol) == 0) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
goto failure; |
|
} |
|
// we're in the middle of the string |
|
goto inner_line_check; |
|
case syntax_element_end_line: |
|
// we're not yet at the end so *first is always valid: |
|
if(traits::is_separator(*first)) |
|
{ |
|
if((first != base) || (flags & match_prev_avail)) |
|
{ |
|
// check that we're not in the middle of \r\n sequence |
|
iterator t(first); |
|
--t; |
|
if((*t == '\r') && (*first == '\n')) |
|
{ |
|
goto failure; |
|
} |
|
} |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
goto failure; |
|
case syntax_element_wild: |
|
// anything except possibly NULL or \n: |
|
if(traits::is_separator(*first)) |
|
{ |
|
if(flags & match_not_dot_newline) |
|
goto failure; |
|
ptr = ptr->next.p; |
|
++first; |
|
continue; |
|
} |
|
if(*first == charT(0)) |
|
{ |
|
if(flags & match_not_dot_null) |
|
goto failure; |
|
ptr = ptr->next.p; |
|
++first; |
|
continue; |
|
} |
|
ptr = ptr->next.p; |
|
++first; |
|
break; |
|
case syntax_element_word_boundary: |
|
{ |
|
// prev and this character must be opposites: |
|
bool b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); |
|
if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) |
|
{ |
|
if(flags & match_not_bow) |
|
b ^= true; |
|
else |
|
b ^= false; |
|
} |
|
else |
|
{ |
|
--first; |
|
b ^= traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); |
|
++first; |
|
} |
|
if(b) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
goto failure; |
|
} |
|
case syntax_element_within_word: |
|
// both prev and this character must be char_class_word: |
|
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) |
|
{ |
|
bool b; |
|
if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) |
|
b = false; |
|
else |
|
{ |
|
--first; |
|
b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); |
|
++first; |
|
} |
|
if(b) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
} |
|
goto failure; |
|
case syntax_element_word_start: |
|
if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) |
|
{ |
|
// start of buffer: |
|
if(flags & match_not_bow) |
|
goto failure; |
|
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
goto failure; |
|
} |
|
// otherwise inside buffer: |
|
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) |
|
{ |
|
iterator t(first); |
|
--t; |
|
if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
} |
|
goto failure; // if we fall through to here then we've failed |
|
case syntax_element_word_end: |
|
if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) |
|
goto failure; // start of buffer can't be end of word |
|
|
|
// otherwise inside buffer: |
|
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) |
|
{ |
|
iterator t(first); |
|
--t; |
|
if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
} |
|
goto failure; // if we fall through to here then we've failed |
|
case syntax_element_buffer_start: |
|
if((first != temp_match[0].first) || (flags & match_not_bob)) |
|
goto failure; |
|
// OK match: |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_buffer_end: |
|
if((first != last) || (flags & match_not_eob)) |
|
goto failure; |
|
// OK match: |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_backref: |
|
{ |
|
// compare with what we previously matched: |
|
iterator i = temp_match[((re_brace*)ptr)->index].first; |
|
iterator j = temp_match[((re_brace*)ptr)->index].second; |
|
while(i != j) |
|
{ |
|
if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != traits::translate(*i, icase MAYBE_PASS_LOCALE(locale_inst)))) |
|
goto failure; |
|
++i; |
|
++first; |
|
} |
|
ptr = ptr->next.p; |
|
break; |
|
} |
|
case syntax_element_long_set: |
|
{ |
|
// let the traits class do the work: |
|
iterator t = re_is_set_member(first, last, (re_set_long*)ptr, e); |
|
if(t != first) |
|
{ |
|
ptr = ptr->next.p; |
|
first = t; |
|
continue; |
|
} |
|
goto failure; |
|
} |
|
case syntax_element_set: |
|
// lookup character in table: |
|
if(((re_set*)ptr)->__map[(traits_uchar_type)traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))]) |
|
{ |
|
ptr = ptr->next.p; |
|
++first; |
|
continue; |
|
} |
|
goto failure; |
|
case syntax_element_jump: |
|
ptr = ((re_jump*)ptr)->alt.p; |
|
continue; |
|
case syntax_element_alt: |
|
{ |
|
// alt_jump: |
|
if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, (unsigned char)mask_take, width_type())) |
|
{ |
|
// we can take the first alternative, |
|
// see if we need to push next alternative: |
|
if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) |
|
{ |
|
if(need_push_match) |
|
matches.push(temp_match); |
|
for(k = 0; k <= cur_acc; ++k) |
|
prev_pos.push(start_loop[k]); |
|
prev_pos.push(first); |
|
prev_record.push(ptr); |
|
for(k = 0; k <= cur_acc; ++k) |
|
prev_acc.push(accumulators[k]); |
|
prev_acc.push(cur_acc); |
|
} |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) |
|
{ |
|
ptr = ((re_jump*)ptr)->alt.p; |
|
continue; |
|
} |
|
goto failure; // neither option is possible |
|
} |
|
case syntax_element_rep: |
|
{ |
|
// repeater_jump: |
|
// if we're moving to a higher id (nested repeats etc) |
|
// zero out our accumualtors: |
|
if(cur_acc < ((re_repeat*)ptr)->id) |
|
{ |
|
cur_acc = ((re_repeat*)ptr)->id; |
|
accumulators[cur_acc] = 0; |
|
start_loop[cur_acc] = iterator(); |
|
} |
|
|
|
cur_acc = ((re_repeat*)ptr)->id; |
|
|
|
if(((re_repeat*)ptr)->leading) |
|
*restart = first; |
|
|
|
//charT c = traits::translate(*first MAYBE_PASS_LOCALE(locale_inst)); |
|
|
|
// first of all test for special case where this is last element, |
|
// if that is the case then repeat as many times as possible: |
|
|
|
if(((re_repeat*)ptr)->alt.p->type == syntax_element_match) |
|
{ |
|
// see if we can take the repeat: |
|
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) |
|
&& reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) |
|
{ |
|
// push terminating match as fallback: |
|
if((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) |
|
{ |
|
if((prev_record.empty() == false) && (prev_record.peek() == ((re_repeat*)ptr)->alt.p)) |
|
{ |
|
// we already have the required fallback |
|
// don't add any more, just update this one: |
|
if(need_push_match) |
|
matches.peek() = temp_match; |
|
prev_pos.peek() = first; |
|
} |
|
else |
|
{ |
|
if(need_push_match) |
|
matches.push(temp_match); |
|
prev_pos.push(first); |
|
prev_record.push(((re_repeat*)ptr)->alt.p); |
|
} |
|
} |
|
// move to next item in list: |
|
if(first != start_loop[cur_acc]) |
|
{ |
|
++accumulators[cur_acc]; |
|
ptr = ptr->next.p; |
|
start_loop[cur_acc] = first; |
|
continue; |
|
} |
|
goto failure; |
|
} |
|
// see if we can skip the repeat: |
|
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) |
|
&& reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) |
|
{ |
|
ptr = ((re_repeat*)ptr)->alt.p; |
|
continue; |
|
} |
|
// otherwise fail: |
|
goto failure; |
|
} |
|
|
|
// see if we can skip the repeat: |
|
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) |
|
&& reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) |
|
{ |
|
// see if we can push failure info: |
|
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) |
|
&& reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) |
|
{ |
|
// check to see if the last loop matched a NULL string |
|
// if so then we really don't want to loop again: |
|
if(((unsigned int)accumulators[cur_acc] == ((re_repeat*)ptr)->min) |
|
|| (first != start_loop[cur_acc])) |
|
{ |
|
if(need_push_match) |
|
matches.push(temp_match); |
|
prev_pos.push(first); |
|
prev_record.push(ptr); |
|
for(k = 0; k <= cur_acc; ++k) |
|
prev_acc.push(accumulators[k]); |
|
//prev_acc.push(cur_acc); |
|
} |
|
} |
|
ptr = ((re_repeat*)ptr)->alt.p; |
|
continue; |
|
} |
|
|
|
// otherwise see if we can take the repeat: |
|
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) |
|
&& reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type()) && |
|
(first != start_loop[cur_acc])) |
|
{ |
|
// move to next item in list: |
|
++accumulators[cur_acc]; |
|
ptr = ptr->next.p; |
|
start_loop[cur_acc] = first; |
|
continue; |
|
} |
|
|
|
// if we get here then neither option is allowed so fail: |
|
goto failure; |
|
|
|
} |
|
case syntax_element_combining: |
|
if(traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) |
|
goto failure; |
|
++first; |
|
while((first != last) && traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++first; |
|
ptr = ptr->next.p; |
|
continue; |
|
case syntax_element_soft_buffer_end: |
|
{ |
|
if(flags & match_not_eob) |
|
goto failure; |
|
iterator p(first); |
|
while((p != last) && traits::is_separator(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++p; |
|
if(p != last) |
|
goto failure; |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
case syntax_element_restart_continue: |
|
if(first != temp_match[-1].first) |
|
goto failure; |
|
ptr = ptr->next.p; |
|
continue; |
|
default: |
|
jm_assert(0); // should never get to here!! |
|
return false; |
|
} |
|
} |
|
|
|
// |
|
// if we get to here then we've run out of characters to match against, |
|
// we could however still have non-character regex items left |
|
if(ptr->can_be_null == 0) |
|
goto failure; |
|
while(true) |
|
{ |
|
jm_assert(ptr); |
|
switch(ptr->type) |
|
{ |
|
case syntax_element_match: |
|
goto match_jump; |
|
case syntax_element_startmark: |
|
temp_match.set_first(first, ((re_brace*)ptr)->index); |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_endmark: |
|
temp_match.set_second(first, ((re_brace*)ptr)->index); |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_start_line: |
|
goto outer_line_check; |
|
case syntax_element_end_line: |
|
// we're at the end so *first is never valid: |
|
if((flags & match_not_eol) == 0) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
goto failure; |
|
case syntax_element_word_boundary: |
|
case syntax_element_word_end: |
|
if(((flags & match_not_eow) == 0) && (first != temp_match[0].first)) |
|
{ |
|
iterator t(first); |
|
--t; |
|
if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) |
|
{ |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
} |
|
goto failure; |
|
case syntax_element_buffer_end: |
|
case syntax_element_soft_buffer_end: |
|
if(flags & match_not_eob) |
|
goto failure; |
|
// OK match: |
|
ptr = ptr->next.p; |
|
break; |
|
case syntax_element_jump: |
|
ptr = ((re_jump*)ptr)->alt.p; |
|
continue; |
|
case syntax_element_alt: |
|
if(ptr->can_be_null & mask_take) |
|
{ |
|
// we can test the first alternative, |
|
// see if we need to push next alternative: |
|
if(ptr->can_be_null & mask_skip) |
|
{ |
|
if(need_push_match) |
|
matches.push(temp_match); |
|
for(k = 0; k <= cur_acc; ++k) |
|
prev_pos.push(start_loop[k]); |
|
prev_pos.push(first); |
|
prev_record.push(ptr); |
|
for(k = 0; k <= cur_acc; ++k) |
|
prev_acc.push(accumulators[k]); |
|
prev_acc.push(cur_acc); |
|
} |
|
ptr = ptr->next.p; |
|
continue; |
|
} |
|
if(ptr->can_be_null & mask_skip) |
|
{ |
|
ptr = ((re_jump*)ptr)->alt.p; |
|
continue; |
|
} |
|
goto failure; // neither option is possible |
|
case syntax_element_rep: |
|
// if we're moving to a higher id (nested repeats etc) |
|
// zero out our accumualtors: |
|
if(cur_acc < ((re_repeat*)ptr)->id) |
|
{ |
|
cur_acc = ((re_repeat*)ptr)->id; |
|
accumulators[cur_acc] = 0; |
|
start_loop[cur_acc] = first; |
|
} |
|
|
|
cur_acc = ((re_repeat*)ptr)->id; |
|
|
|
// see if we can skip the repeat: |
|
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) |
|
&& (ptr->can_be_null & mask_skip)) |
|
{ |
|
// don't push failure info, there's no point: |
|
ptr = ((re_repeat*)ptr)->alt.p; |
|
continue; |
|
} |
|
|
|
// otherwise see if we can take the repeat: |
|
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) |
|
&& ((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip))) |
|
{ |
|
// move to next item in list: |
|
++accumulators[cur_acc]; |
|
ptr = ptr->next.p; |
|
start_loop[cur_acc] = first; |
|
continue; |
|
} |
|
|
|
// if we get here then neither option is allowed so fail: |
|
goto failure; |
|
case syntax_element_restart_continue: |
|
if(first != temp_match[-1].first) |
|
goto failure; |
|
ptr = ptr->next.p; |
|
continue; |
|
default: |
|
goto failure; |
|
} |
|
} |
|
|
|
failure: |
|
|
|
if(prev_record.empty() == false) |
|
{ |
|
ptr = prev_record.peek(); |
|
switch(ptr->type) |
|
{ |
|
case syntax_element_alt: |
|
// get next alternative: |
|
ptr = ((re_jump*)ptr)->alt.p; |
|
if(need_push_match) |
|
matches.pop(temp_match); |
|
prev_acc.pop(cur_acc); |
|
for(k = cur_acc; k >= 0; --k) |
|
prev_acc.pop(accumulators[k]); |
|
prev_pos.pop(first); |
|
for(k = cur_acc; k >= 0; --k) |
|
prev_pos.pop(start_loop[k]); |
|
prev_record.pop(); |
|
goto retry; |
|
case syntax_element_rep: |
|
// we're doing least number of repeats first, |
|
// increment count and repeat again: |
|
if(need_push_match) |
|
matches.pop(temp_match); |
|
prev_pos.pop(first); |
|
cur_acc = ((re_repeat*)ptr)->id; |
|
for(k = cur_acc; k >= 0; --k) |
|
prev_acc.pop(accumulators[k]); |
|
prev_record.pop(); |
|
if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max) |
|
goto failure; // repetions exhausted. |
|
ptr = ptr->next.p; |
|
start_loop[cur_acc] = first; |
|
goto retry; |
|
case syntax_element_match: |
|
if(need_push_match) |
|
matches.pop(temp_match); |
|
prev_pos.pop(first); |
|
prev_record.pop(); |
|
goto retry; |
|
default: |
|
jm_assert(0); |
|
// mustn't get here!! |
|
} |
|
} |
|
|
|
if(match_found) |
|
return true; |
|
|
|
// if we get to here then everything has failed |
|
// and no match was found: |
|
return false; |
|
} |
|
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) |
|
} // namespace |
|
#endif |
|
|
|
|
|
template <class iterator> |
|
void __skip_and_inc(unsigned int& clines, iterator& last_line, iterator& first, const iterator last) |
|
{ |
|
while(first != last) |
|
{ |
|
if(*first == '\n') |
|
{ |
|
last_line = ++first; |
|
++clines; |
|
} |
|
else |
|
++first; |
|
} |
|
} |
|
|
|
template <class iterator> |
|
void __skip_and_dec(unsigned int& clines, iterator& last_line, iterator& first, iterator base, unsigned int len) |
|
{ |
|
bool need_line = false; |
|
for(unsigned int i = 0; i < len; ++i) |
|
{ |
|
--first; |
|
if(*first == '\n') |
|
{ |
|
need_line = true; |
|
--clines; |
|
} |
|
} |
|
|
|
if(need_line) |
|
{ |
|
last_line = first; |
|
|
|
if(last_line != base) |
|
--last_line; |
|
else |
|
return; |
|
|
|
while((last_line != base) && (*last_line != '\n')) |
|
--last_line; |
|
if(*last_line == '\n') |
|
++last_line; |
|
} |
|
} |
|
|
|
template <class iterator> |
|
inline void __inc_one(unsigned int& clines, iterator& last_line, iterator& first) |
|
{ |
|
if(*first == '\n') |
|
{ |
|
last_line = ++first; |
|
++clines; |
|
} |
|
else |
|
++first; |
|
} |
|
|
|
template <class iterator, class Allocator> |
|
struct grep_search_predicate |
|
{ |
|
reg_match<iterator, Allocator>* pm; |
|
grep_search_predicate(reg_match<iterator, Allocator>* p) : pm(p) {} |
|
bool operator()(const reg_match<iterator, Allocator>& m) |
|
{ |
|
*pm = static_cast<const reg_match_base<iterator, Allocator>&>(m); |
|
return false; |
|
} |
|
}; |
|
|
|
#if !defined(JM_NO_TEMPLATE_RETURNS) && !defined(JM_NO_PARTIAL_FUNC_SPEC) |
|
|
|
template <class iterator, class Allocator> |
|
inline const reg_match_base<iterator, Allocator>& grep_out_type(const grep_search_predicate<iterator, Allocator>& o, const Allocator&) |
|
{ |
|
return *(o.pm); |
|
} |
|
|
|
#endif |
|
|
|
template <class T, class Allocator> |
|
inline const Allocator& grep_out_type(const T&, const Allocator& a) |
|
{ |
|
return a; |
|
} |
|
|
|
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) |
|
// |
|
// Ugly ugly hack, |
|
// template don't merge if they contain switch statements so declare these |
|
// templates in unnamed namespace (ie with internal linkage), each translation |
|
// unit then gets its own local copy, it works seemlessly but bloats the app. |
|
namespace{ |
|
#endif |
|
|
|
// |
|
// reg_grep2: |
|
// find all non-overlapping matches within the sequence first last: |
|
// |
|
template <class Predicate, class I, class charT, class traits, class A, class A2> |
|
unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a) |
|
{ |
|
if(e.flags() & regbase::failbit) |
|
return 0; |
|
|
|
typedef typename traits::size_type traits_size_type; |
|
typedef typename traits::uchar_type traits_uchar_type; |
|
typedef typename is_byte<charT>::width_type width_type; |
|
|
|
reg_match<I, A2> m(grep_out_type(foo, a)); |
|
I restart; |
|
m.set_size(e.mark_count(), first, last); |
|
m.set_line(1, first); |
|
|
|
#ifdef RE_LOCALE_CPP |
|
const __JM_STD::locale& locale_inst = e.locale(); |
|
#endif |
|
|
|
unsigned int clines = 1; |
|
unsigned int cmatches = 0; |
|
I last_line = first; |
|
I next_base; |
|
I base = first; |
|
bool need_init; |
|
|
|
flags |= match_init; |
|
|
|
__priv_match_data<I, A2> pd(m); |
|
|
|
const unsigned char* __map = e.get_map(); |
|
unsigned int type; |
|
|
|
if(first == last) |
|
{ |
|
// special case, only test if can_be_null, |
|
// don't dereference any pointers!! |
|
if(e.first()->can_be_null) |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
foo(m); |
|
++cmatches; |
|
} |
|
return cmatches; |
|
} |
|
|
|
// try one time whatever: |
|
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type() ) ) |
|
{ |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
// update to end of what matched |
|
// trying to match again with match_not_null set if this |
|
// is a null match... |
|
need_init = true; |
|
if(first == m[0].second) |
|
{ |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) |
|
{ |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
} |
|
else |
|
{ |
|
need_init = false; |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
if(need_init) |
|
{ |
|
__skip_and_inc(clines, last_line, first, m[0].second); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
} |
|
} |
|
else |
|
{ |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
else |
|
__inc_one(clines, last_line, first); |
|
flags |= match_prev_avail | match_not_bob; |
|
|
|
|
|
// depending on what the first record is we may be able to |
|
// optimise the search: |
|
type = (flags & match_continuous) ? regbase::restart_continue : e.restart_type(); |
|
|
|
if(type == regbase::restart_buf) |
|
return cmatches; |
|
|
|
switch(type) |
|
{ |
|
case regbase::restart_lit: |
|
case regbase::restart_fixed_lit: |
|
{ |
|
const kmp_info<charT>* info = e.get_kmp(); |
|
int len = info->len; |
|
const charT* x = info->pstr; |
|
int j = 0; |
|
bool icase = e.flags() & regbase::icase; |
|
while (first != last) |
|
{ |
|
while((j > -1) && (x[j] != traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) |
|
j = info->kmp_next[j]; |
|
__inc_one(clines, last_line, first); |
|
++j; |
|
if(j >= len) |
|
{ |
|
if(type == regbase::restart_fixed_lit) |
|
{ |
|
__skip_and_dec(clines, last_line, first, base, j); |
|
restart = first; |
|
restart += len; |
|
m.set_first(first); |
|
m.set_second(restart); |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
j = 0; |
|
} |
|
else |
|
{ |
|
restart = first; |
|
__skip_and_dec(clines, last_line, first, base, j); |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
|
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
// update to end of what matched |
|
__skip_and_inc(clines, last_line, first, m[0].second); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
j = 0; |
|
} |
|
else |
|
{ |
|
for(int k = 0; (restart != first) && (k < j); ++k, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
j = 0; //we could do better than this... |
|
} |
|
} |
|
} |
|
} |
|
break; |
|
} |
|
case regbase::restart_any: |
|
{ |
|
while(first != last) |
|
{ |
|
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) |
|
{ |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
|
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
// update to end of what matched |
|
// trying to match again with match_not_null set if this |
|
// is a null match... |
|
need_init = true; |
|
if(first == m[0].second) |
|
{ |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
} |
|
else |
|
{ |
|
need_init = false; |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
if(need_init) |
|
{ |
|
__skip_and_inc(clines, last_line, first, m[0].second); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
} |
|
continue; |
|
} |
|
else |
|
{ |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
else |
|
__inc_one(clines, last_line, first); |
|
} |
|
} |
|
break; |
|
case regbase::restart_word: |
|
{ |
|
// do search optimised for word starts: |
|
while(first != last) |
|
{ |
|
--first; |
|
if(*first == '\n') |
|
--clines; |
|
// skip the word characters: |
|
while((first != last) && traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) |
|
++first; |
|
// now skip the white space: |
|
while((first != last) && (traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false)) |
|
__inc_one(clines, last_line, first); |
|
if(first == last) |
|
break; |
|
|
|
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) |
|
{ |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
// update to end of what matched |
|
// trying to match again with match_not_null set if this |
|
// is a null match... |
|
need_init = true; |
|
if(first == m[0].second) |
|
{ |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
} |
|
else |
|
{ |
|
need_init = false; |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
if(need_init) |
|
{ |
|
__skip_and_inc(clines, last_line, first, m[0].second); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
} |
|
} |
|
else |
|
{ |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
else |
|
__inc_one(clines, last_line, first); |
|
} |
|
} |
|
break; |
|
case regbase::restart_line: |
|
{ |
|
// do search optimised for line starts: |
|
while(first != last) |
|
{ |
|
// find first charcter after a line break: |
|
--first; |
|
if(*first == '\n') |
|
--clines; |
|
while((first != last) && (*first != '\n')) |
|
++first; |
|
if(first == last) |
|
break; |
|
++first; |
|
if(first == last) |
|
break; |
|
|
|
++clines; |
|
last_line = first; |
|
|
|
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) |
|
{ |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
// update to end of what matched |
|
// trying to match again with match_not_null set if this |
|
// is a null match... |
|
need_init = true; |
|
if(first == m[0].second) |
|
{ |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
} |
|
else |
|
{ |
|
need_init = false; |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
if(need_init) |
|
{ |
|
__skip_and_inc(clines, last_line, first, m[0].second); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
} |
|
} |
|
else |
|
{ |
|
for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); |
|
if(restart != last) |
|
++restart; |
|
__skip_and_inc(clines, last_line, first, restart); |
|
} |
|
} |
|
else |
|
__inc_one(clines, last_line, first); |
|
} |
|
} |
|
break; |
|
case regbase::restart_continue: |
|
{ |
|
while(first != last) |
|
{ |
|
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) |
|
{ |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
// update to end of what matched |
|
// trying to match again with match_not_null set if this |
|
// is a null match... |
|
if(first == m[0].second) |
|
{ |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
} |
|
else |
|
return cmatches; // can't continue from null match |
|
} |
|
__skip_and_inc(clines, last_line, first, m[0].second); |
|
next_base = m[0].second; |
|
pd.temp_match.init_fail(next_base, last); |
|
m.init_fail(next_base, last); |
|
continue; |
|
} |
|
} |
|
return cmatches; |
|
} |
|
} |
|
break; |
|
} |
|
|
|
|
|
// finally check trailing null string: |
|
if(e.first()->can_be_null) |
|
{ |
|
if(query_match_aux(first, last, m, e, flags, pd, &restart)) |
|
{ |
|
m.set_line(clines, last_line); |
|
++cmatches; |
|
if(foo(m) == false) |
|
return cmatches; |
|
} |
|
} |
|
|
|
return cmatches; |
|
} |
|
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) |
|
} // namespace |
|
#endif |
|
|
|
|
|
template <class iterator, class Allocator, class charT, class traits, class Allocator2> |
|
bool reg_search(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) |
|
{ |
|
if(e.flags() & regbase::failbit) |
|
return false; |
|
|
|
typedef typename traits::size_type traits_size_type; |
|
typedef typename traits::uchar_type traits_uchar_type; |
|
|
|
// prepare m for failure: |
|
if((flags & match_init) == 0) |
|
{ |
|
m.set_size(e.mark_count(), first, last); |
|
} |
|
|
|
flags |= match_init; |
|
return reg_grep2(grep_search_predicate<iterator, Allocator>(&m), first, last, e, flags, m.allocator()); |
|
} |
|
|
|
// |
|
// reg_search convenience interfaces: |
|
#ifndef JM_NO_PARTIAL_FUNC_SPEC |
|
// |
|
// this isn't really a partial specialisation, but template function |
|
// overloading - if the compiler doesn't support partial specialisation |
|
// then it really won't support this either: |
|
template <class charT, class Allocator, class traits, class Allocator2> |
|
inline bool reg_search(const charT* str, |
|
reg_match<const charT*, Allocator>& m, |
|
const reg_expression<charT, traits, Allocator2>& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_search(str, str + traits::length(str), m, e, flags); |
|
} |
|
|
|
#ifndef JM_NO_STRING_H |
|
template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2> |
|
inline bool reg_search(const __JM_STD::basic_string<charT, ST, SA>& s, |
|
reg_match<typename __JM_STD::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, |
|
const reg_expression<charT, traits, Allocator2>& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_search(s.begin(), s.end(), m, e, flags); |
|
} |
|
#endif |
|
#else // partial specialisation |
|
inline bool reg_search(const char* str, |
|
cmatch& m, |
|
const regex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_search(str, str + regex::traits_type::length(str), m, e, flags); |
|
} |
|
#ifndef JM_NO_WCSTRING |
|
inline bool reg_search(const wchar_t* str, |
|
wcmatch& m, |
|
const wregex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_search(str, str + wregex::traits_type::length(str), m, e, flags); |
|
} |
|
#endif |
|
#ifndef JM_NO_STRING_H |
|
inline bool reg_search(const __JM_STD::string& s, |
|
reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, |
|
const regex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_search(s.begin(), s.end(), m, e, flags); |
|
} |
|
#if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) |
|
inline bool reg_search(const __JM_STD::basic_string<wchar_t>& s, |
|
reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>& m, |
|
const wregex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_search(s.begin(), s.end(), m, e, flags); |
|
} |
|
#endif |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
// |
|
// reg_grep: |
|
// find all non-overlapping matches within the sequence first last: |
|
// |
|
template <class Predicate, class iterator, class charT, class traits, class Allocator> |
|
inline unsigned int reg_grep(Predicate foo, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) |
|
{ |
|
return reg_grep2(foo, first, last, e, flags, e.allocator()); |
|
} |
|
|
|
// |
|
// reg_grep convenience interfaces: |
|
#ifndef JM_NO_PARTIAL_FUNC_SPEC |
|
// |
|
// this isn't really a partial specialisation, but template function |
|
// overloading - if the compiler doesn't support partial specialisation |
|
// then it really won't support this either: |
|
template <class Predicate, class charT, class Allocator, class traits> |
|
inline bool reg_grep(Predicate foo, const charT* str, |
|
const reg_expression<charT, traits, Allocator>& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_grep(foo, str, str + traits::length(str), e, flags); |
|
} |
|
|
|
#ifndef JM_NO_STRING_H |
|
template <class Predicate, class ST, class SA, class Allocator, class charT, class traits> |
|
inline bool reg_grep(Predicate foo, const __JM_STD::basic_string<charT, ST, SA>& s, |
|
const reg_expression<charT, traits, Allocator>& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_grep(foo, s.begin(), s.end(), e, flags); |
|
} |
|
#endif |
|
#else // partial specialisation |
|
inline bool reg_grep(bool (*foo)(const cmatch&), const char* str, |
|
const regex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_grep(foo, str, str + regex::traits_type::length(str), e, flags); |
|
} |
|
#ifndef JM_NO_WCSTRING |
|
inline bool reg_grep(bool (*foo)(const wcmatch&), const wchar_t* str, |
|
const wregex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_grep(foo, str, str + wregex::traits_type::length(str), e, flags); |
|
} |
|
#endif |
|
#ifndef JM_NO_STRING_H |
|
inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::string::const_iterator, regex::alloc_type>&), const __JM_STD::string& s, |
|
const regex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_grep(foo, s.begin(), s.end(), e, flags); |
|
} |
|
#if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) |
|
inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>&), |
|
const __JM_STD::basic_string<wchar_t>& s, |
|
const wregex& e, |
|
unsigned flags = match_default) |
|
{ |
|
return reg_grep(foo, s.begin(), s.end(), e, flags); |
|
} |
|
#endif |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
// |
|
// finally for compatablity with version 1.x of the library |
|
// we need a form of reg_grep that takes an output iterator |
|
// as its first argument: |
|
// |
|
|
|
// |
|
// struct grep_match: |
|
// stores what matched during a reg_grep, |
|
// the output iterator type passed to reg_grep must have an |
|
// operator*() that returns a type with an |
|
// operator=(const grep_match<iterator, Allocator>&); |
|
// |
|
template <class iterator, class Allocator> |
|
struct grep_match |
|
{ |
|
unsigned int line; |
|
iterator line_start; |
|
reg_match<iterator, Allocator> what; |
|
|
|
grep_match(Allocator a = Allocator()) : what(a) {} |
|
|
|
grep_match(unsigned int l, iterator p1, const reg_match<iterator, Allocator>& m) |
|
: what(m) { line = l; line_start = p1; } |
|
|
|
bool operator == (const grep_match& ) |
|
{ return false; } |
|
|
|
bool operator < (const grep_match&) |
|
{ return false; } |
|
}; |
|
|
|
template <class O, class I, class A> |
|
struct grep_adaptor |
|
{ |
|
O oi; |
|
reg_match<I, A> m; |
|
grep_adaptor(O i, A a) : m(a), oi(i) {} |
|
bool operator()(const reg_match_base<I, A>& w) |
|
{ |
|
m.what = w; |
|
m.line = w.line(); |
|
m.line_start = w.line_start(); |
|
*oi = m; |
|
++oi; |
|
return true; |
|
} |
|
}; |
|
|
|
template <class Out, class iterator, class charT, class traits, class Allocator> |
|
inline unsigned int reg_grep_old(Out oi, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) |
|
{ |
|
return reg_grep2(grep_adaptor<Out, iterator, Allocator>(oi, e.allocator()), first, last, e, flags, e.allocator()); |
|
} |
|
|
|
|
|
|
|
JM_END_NAMESPACE // namespace regex |
|
|
|
#endif // __REGMATCH_H |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|