//========= Copyright Valve Corporation, All rights reserved. ============// // // Purpose: // // $NoKeywords: $ // //=============================================================================// /* * * Copyright (c) 1998-9 * Dr John Maddock * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Dr John Maddock makes no representations * about the suitability of this software for any purpose. * It is provided "as is" without express or implied warranty. * */ /* * FILE regmatch.h * VERSION 2.12 * regular expression matching algorithms */ #ifndef __REGMATCH_H #define __REGMATCH_H JM_NAMESPACE(__JM) template iterator RE_CALL re_is_set_member(iterator next, iterator last, re_set_long* set, const reg_expression& e) { const charT* p = (const charT*)(set+1); iterator ptr; unsigned int i; bool icase = e.flags() & regbase::icase; // try and match a single character, could be a multi-character // collating element... for(i = 0; i < set->csingles; ++i) { ptr = next; while(*p && (ptr != last)) { if(traits_type::translate(*ptr, icase MAYBE_PASS_LOCALE(e.locale())) != *p) break; ++p; ++ptr; } if(*p == 0) // if null we've matched return set->isnot ? next : (ptr == next) ? ++next : ptr; while(*p)++p; ++p; // skip null } charT col = traits_type::translate(*next, icase MAYBE_PASS_LOCALE(e.locale())); if(set->cranges || set->cequivalents) { re_str s2(col); re_str s1; // // try and match a range, NB only a single character can match if(set->cranges) { if(e.flags() & regbase::nocollate) s1 = s2; else traits_type::transform(s1, s2 MAYBE_PASS_LOCALE(e.locale())); for(i = 0; i < set->cranges; ++i) { if(s1 <= p) { while(*p)++p; ++p; if(s1 >= p) return set->isnot ? next : ++next; } else { // skip first string while(*p)++p; ++p; } // skip second string while(*p)++p; ++p; } } // // try and match an equivalence class, NB only a single character can match if(set->cequivalents) { traits_type::transform_primary(s1, s2 MAYBE_PASS_LOCALE(e.locale())); for(i = 0; i < set->cequivalents; ++i) { if(s1 == p) return set->isnot ? next : ++next; // skip string while(*p)++p; ++p; } } } if(traits_type::is_class(col, set->cclasses MAYBE_PASS_LOCALE(e.locale())) == true) return set->isnot ? next : ++next; return set->isnot ? ++next : next; } template class __priv_match_data { public: typedef JM_MAYBE_TYPENAME REBIND_TYPE(int, Allocator) i_alloc; typedef JM_MAYBE_TYPENAME REBIND_TYPE(iterator, Allocator) it_alloc; reg_match_base temp_match; // failure stacks: jstack, Allocator> matches; jstack prev_pos; jstack prev_record; jstack prev_acc; int* accumulators; unsigned int caccumulators; iterator* loop_starts; __priv_match_data(const reg_match_base&); ~__priv_match_data() { free(); } void free(); void set_accumulator_size(unsigned int size); int* get_accumulators() { return accumulators; } iterator* get_loop_starts() { return loop_starts; } }; template __priv_match_data::__priv_match_data(const reg_match_base& m) : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()) { accumulators = 0; caccumulators = 0; loop_starts = 0; } template void __priv_match_data::set_accumulator_size(unsigned int size) { if(size > caccumulators) { free(); caccumulators = size; accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators); loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators); for(unsigned i = 0; i < caccumulators; ++i) new (loop_starts + i) iterator(); } } template void __priv_match_data::free() { if(caccumulators) { //REBIND_INSTANCE(int, Allocator, temp_match.allocator()).deallocate(accumulators, caccumulators); i_alloc temp1(temp_match.allocator()); temp1.deallocate(accumulators, caccumulators); for(unsigned i = 0; i < caccumulators; ++i) jm_destroy(loop_starts + i); //REBIND_INSTANCE(iterator, Allocator, temp_match.allocator()).deallocate(loop_starts, caccumulators); it_alloc temp2(temp_match.allocator()); temp2.deallocate(loop_starts, caccumulators); } } // // proc query_match // returns true if the specified regular expression matches // at position first. Fills in what matched in m. // template bool query_match(iterator first, iterator last, reg_match& m, const reg_expression& e, unsigned flags = match_default) { // prepare m for failure: if((flags & match_init) == 0) { m.set_size(e.mark_count(), first, last); } __priv_match_data pd(m); iterator restart; return query_match_aux(first, last, m, e, flags, pd, &restart); } // // query_match convenience interfaces: #ifndef JM_NO_PARTIAL_FUNC_SPEC // // this isn't really a partial specialisation, but template function // overloading - if the compiler doesn't support partial specialisation // then it really won't support this either: template inline bool query_match(const charT* str, reg_match& m, const reg_expression& e, unsigned flags = match_default) { return query_match(str, str + traits::length(str), m, e, flags); } #ifndef JM_NO_STRING_H template inline bool query_match(const __JM_STD::basic_string& s, reg_match::const_iterator, Allocator>& m, const reg_expression& e, unsigned flags = match_default) { return query_match(s.begin(), s.end(), m, e, flags); } #endif #else // partial specialisation inline bool query_match(const char* str, cmatch& m, const regex& e, unsigned flags = match_default) { return query_match(str, str + regex::traits_type::length(str), m, e, flags); } #ifndef JM_NO_WCSTRING inline bool query_match(const wchar_t* str, wcmatch& m, const wregex& e, unsigned flags = match_default) { return query_match(str, str + wregex::traits_type::length(str), m, e, flags); } #endif #ifndef JM_NO_STRING_H inline bool query_match(const __JM_STD::string& s, reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, const regex& e, unsigned flags = match_default) { return query_match(s.begin(), s.end(), m, e, flags); } #if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) inline bool query_match(const __JM_STD::basic_string& s, reg_match<__JM_STD::basic_string::const_iterator, wregex::alloc_type>& m, const wregex& e, unsigned flags = match_default) { return query_match(s.begin(), s.end(), m, e, flags); } #endif #endif #endif #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) // // Ugly ugly hack, // template don't merge if they contain switch statements so declare these // templates in unnamed namespace (ie with internal linkage), each translation // unit then gets its own local copy, it works seemlessly but bloats the app. namespace{ #endif template bool query_match_aux(iterator first, iterator last, reg_match& m, const reg_expression& e, unsigned flags, __priv_match_data& pd, iterator* restart) { if(e.flags() & regbase::failbit) return false; typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; typedef typename is_byte::width_type width_type; #ifdef RE_LOCALE_CPP const __JM_STD::locale& locale_inst = e.locale(); #endif // declare some local aliases to reduce pointer loads // good optimising compilers should make this unnecessary!! jstack, Allocator>& matches = pd.matches; jstack& prev_pos = pd.prev_pos; jstack& prev_record = pd.prev_record; jstack& prev_acc = pd.prev_acc; reg_match_base& temp_match = pd.temp_match; temp_match.set_first(first); //temp_match.set_size(e.mark_count(), first, last); register const re_syntax_base* ptr = e.first(); bool match_found = false; bool need_push_match = (e.mark_count() > 1); int cur_acc = -1; // no active accumulator pd.set_accumulator_size(e.repeat_count()); int* accumulators = pd.get_accumulators(); iterator* start_loop = pd.get_loop_starts(); int k; // for loops bool icase = e.flags() & regbase::icase; *restart = first; iterator base = first; // prepare m for failure: /* if((flags & match_init) == 0) { m.init_fail(first, last); } */ retry: while(first != last) { jm_assert(ptr); switch(ptr->type) { case syntax_element_match: match_jump: { // match found, save then fallback in case we missed a // longer one. if((flags & match_not_null) && (first == temp_match[0].first)) goto failure; temp_match.set_second(first); m.maybe_assign(temp_match); match_found = true; if((flags & match_any) || ((first == last) && (need_push_match == false))) { // either we don't care what we match or we've matched // the whole string and can't match anything longer. while(matches.empty() == false) matches.pop(); while(prev_pos.empty() == false) prev_pos.pop(); while(prev_record.empty() == false) prev_record.pop(); while(prev_acc.empty() == false) prev_acc.pop(); return true; } } goto failure; case syntax_element_startmark: temp_match.set_first(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_endmark: temp_match.set_second(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_literal: { unsigned int len = ((re_literal*)ptr)->length; charT* what = (charT*)(((re_literal*)ptr) + 1); // // compare string with what we stored in // our records: for(unsigned int i = 0; i < len; ++i, ++first) { if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != what[i])) goto failure; } ptr = ptr->next.p; break; } case syntax_element_start_line: outer_line_check: if(first == temp_match[0].first) { // we're at the start of the buffer if(flags & match_prev_avail) { inner_line_check: // check the previous value even though its before // the start of our "buffer". iterator t(first); --t; if(traits::is_separator(*t) && !((*t == '\r') && (*first == '\n')) ) { ptr = ptr->next.p; continue; } goto failure; } if((flags & match_not_bol) == 0) { ptr = ptr->next.p; continue; } goto failure; } // we're in the middle of the string goto inner_line_check; case syntax_element_end_line: // we're not yet at the end so *first is always valid: if(traits::is_separator(*first)) { if((first != base) || (flags & match_prev_avail)) { // check that we're not in the middle of \r\n sequence iterator t(first); --t; if((*t == '\r') && (*first == '\n')) { goto failure; } } ptr = ptr->next.p; continue; } goto failure; case syntax_element_wild: // anything except possibly NULL or \n: if(traits::is_separator(*first)) { if(flags & match_not_dot_newline) goto failure; ptr = ptr->next.p; ++first; continue; } if(*first == charT(0)) { if(flags & match_not_dot_null) goto failure; ptr = ptr->next.p; ++first; continue; } ptr = ptr->next.p; ++first; break; case syntax_element_word_boundary: { // prev and this character must be opposites: bool b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) { if(flags & match_not_bow) b ^= true; else b ^= false; } else { --first; b ^= traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); ++first; } if(b) { ptr = ptr->next.p; continue; } goto failure; } case syntax_element_within_word: // both prev and this character must be char_class_word: if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { bool b; if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) b = false; else { --first; b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); ++first; } if(b) { ptr = ptr->next.p; continue; } } goto failure; case syntax_element_word_start: if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) { // start of buffer: if(flags & match_not_bow) goto failure; if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { ptr = ptr->next.p; continue; } goto failure; } // otherwise inside buffer: if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { iterator t(first); --t; if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) { ptr = ptr->next.p; continue; } } goto failure; // if we fall through to here then we've failed case syntax_element_word_end: if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) goto failure; // start of buffer can't be end of word // otherwise inside buffer: if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) { iterator t(first); --t; if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { ptr = ptr->next.p; continue; } } goto failure; // if we fall through to here then we've failed case syntax_element_buffer_start: if((first != temp_match[0].first) || (flags & match_not_bob)) goto failure; // OK match: ptr = ptr->next.p; break; case syntax_element_buffer_end: if((first != last) || (flags & match_not_eob)) goto failure; // OK match: ptr = ptr->next.p; break; case syntax_element_backref: { // compare with what we previously matched: iterator i = temp_match[((re_brace*)ptr)->index].first; iterator j = temp_match[((re_brace*)ptr)->index].second; while(i != j) { if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != traits::translate(*i, icase MAYBE_PASS_LOCALE(locale_inst)))) goto failure; ++i; ++first; } ptr = ptr->next.p; break; } case syntax_element_long_set: { // let the traits class do the work: iterator t = re_is_set_member(first, last, (re_set_long*)ptr, e); if(t != first) { ptr = ptr->next.p; first = t; continue; } goto failure; } case syntax_element_set: // lookup character in table: if(((re_set*)ptr)->__map[(traits_uchar_type)traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))]) { ptr = ptr->next.p; ++first; continue; } goto failure; case syntax_element_jump: ptr = ((re_jump*)ptr)->alt.p; continue; case syntax_element_alt: { // alt_jump: if(reg_expression::can_start(*first, ((re_jump*)ptr)->__map, (unsigned char)mask_take, width_type())) { // we can take the first alternative, // see if we need to push next alternative: if(reg_expression::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) { if(need_push_match) matches.push(temp_match); for(k = 0; k <= cur_acc; ++k) prev_pos.push(start_loop[k]); prev_pos.push(first); prev_record.push(ptr); for(k = 0; k <= cur_acc; ++k) prev_acc.push(accumulators[k]); prev_acc.push(cur_acc); } ptr = ptr->next.p; continue; } if(reg_expression::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) { ptr = ((re_jump*)ptr)->alt.p; continue; } goto failure; // neither option is possible } case syntax_element_rep: { // repeater_jump: // if we're moving to a higher id (nested repeats etc) // zero out our accumualtors: if(cur_acc < ((re_repeat*)ptr)->id) { cur_acc = ((re_repeat*)ptr)->id; accumulators[cur_acc] = 0; start_loop[cur_acc] = iterator(); } cur_acc = ((re_repeat*)ptr)->id; if(((re_repeat*)ptr)->leading) *restart = first; //charT c = traits::translate(*first MAYBE_PASS_LOCALE(locale_inst)); // first of all test for special case where this is last element, // if that is the case then repeat as many times as possible: if(((re_repeat*)ptr)->alt.p->type == syntax_element_match) { // see if we can take the repeat: if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && reg_expression::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) { // push terminating match as fallback: if((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) { if((prev_record.empty() == false) && (prev_record.peek() == ((re_repeat*)ptr)->alt.p)) { // we already have the required fallback // don't add any more, just update this one: if(need_push_match) matches.peek() = temp_match; prev_pos.peek() = first; } else { if(need_push_match) matches.push(temp_match); prev_pos.push(first); prev_record.push(((re_repeat*)ptr)->alt.p); } } // move to next item in list: if(first != start_loop[cur_acc]) { ++accumulators[cur_acc]; ptr = ptr->next.p; start_loop[cur_acc] = first; continue; } goto failure; } // see if we can skip the repeat: if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) && reg_expression::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) { ptr = ((re_repeat*)ptr)->alt.p; continue; } // otherwise fail: goto failure; } // see if we can skip the repeat: if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) && reg_expression::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) { // see if we can push failure info: if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && reg_expression::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) { // check to see if the last loop matched a NULL string // if so then we really don't want to loop again: if(((unsigned int)accumulators[cur_acc] == ((re_repeat*)ptr)->min) || (first != start_loop[cur_acc])) { if(need_push_match) matches.push(temp_match); prev_pos.push(first); prev_record.push(ptr); for(k = 0; k <= cur_acc; ++k) prev_acc.push(accumulators[k]); //prev_acc.push(cur_acc); } } ptr = ((re_repeat*)ptr)->alt.p; continue; } // otherwise see if we can take the repeat: if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && reg_expression::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type()) && (first != start_loop[cur_acc])) { // move to next item in list: ++accumulators[cur_acc]; ptr = ptr->next.p; start_loop[cur_acc] = first; continue; } // if we get here then neither option is allowed so fail: goto failure; } case syntax_element_combining: if(traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) goto failure; ++first; while((first != last) && traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++first; ptr = ptr->next.p; continue; case syntax_element_soft_buffer_end: { if(flags & match_not_eob) goto failure; iterator p(first); while((p != last) && traits::is_separator(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++p; if(p != last) goto failure; ptr = ptr->next.p; continue; } case syntax_element_restart_continue: if(first != temp_match[-1].first) goto failure; ptr = ptr->next.p; continue; default: jm_assert(0); // should never get to here!! return false; } } // // if we get to here then we've run out of characters to match against, // we could however still have non-character regex items left if(ptr->can_be_null == 0) goto failure; while(true) { jm_assert(ptr); switch(ptr->type) { case syntax_element_match: goto match_jump; case syntax_element_startmark: temp_match.set_first(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_endmark: temp_match.set_second(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_start_line: goto outer_line_check; case syntax_element_end_line: // we're at the end so *first is never valid: if((flags & match_not_eol) == 0) { ptr = ptr->next.p; continue; } goto failure; case syntax_element_word_boundary: case syntax_element_word_end: if(((flags & match_not_eow) == 0) && (first != temp_match[0].first)) { iterator t(first); --t; if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { ptr = ptr->next.p; continue; } } goto failure; case syntax_element_buffer_end: case syntax_element_soft_buffer_end: if(flags & match_not_eob) goto failure; // OK match: ptr = ptr->next.p; break; case syntax_element_jump: ptr = ((re_jump*)ptr)->alt.p; continue; case syntax_element_alt: if(ptr->can_be_null & mask_take) { // we can test the first alternative, // see if we need to push next alternative: if(ptr->can_be_null & mask_skip) { if(need_push_match) matches.push(temp_match); for(k = 0; k <= cur_acc; ++k) prev_pos.push(start_loop[k]); prev_pos.push(first); prev_record.push(ptr); for(k = 0; k <= cur_acc; ++k) prev_acc.push(accumulators[k]); prev_acc.push(cur_acc); } ptr = ptr->next.p; continue; } if(ptr->can_be_null & mask_skip) { ptr = ((re_jump*)ptr)->alt.p; continue; } goto failure; // neither option is possible case syntax_element_rep: // if we're moving to a higher id (nested repeats etc) // zero out our accumualtors: if(cur_acc < ((re_repeat*)ptr)->id) { cur_acc = ((re_repeat*)ptr)->id; accumulators[cur_acc] = 0; start_loop[cur_acc] = first; } cur_acc = ((re_repeat*)ptr)->id; // see if we can skip the repeat: if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) && (ptr->can_be_null & mask_skip)) { // don't push failure info, there's no point: ptr = ((re_repeat*)ptr)->alt.p; continue; } // otherwise see if we can take the repeat: if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && ((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip))) { // move to next item in list: ++accumulators[cur_acc]; ptr = ptr->next.p; start_loop[cur_acc] = first; continue; } // if we get here then neither option is allowed so fail: goto failure; case syntax_element_restart_continue: if(first != temp_match[-1].first) goto failure; ptr = ptr->next.p; continue; default: goto failure; } } failure: if(prev_record.empty() == false) { ptr = prev_record.peek(); switch(ptr->type) { case syntax_element_alt: // get next alternative: ptr = ((re_jump*)ptr)->alt.p; if(need_push_match) matches.pop(temp_match); prev_acc.pop(cur_acc); for(k = cur_acc; k >= 0; --k) prev_acc.pop(accumulators[k]); prev_pos.pop(first); for(k = cur_acc; k >= 0; --k) prev_pos.pop(start_loop[k]); prev_record.pop(); goto retry; case syntax_element_rep: // we're doing least number of repeats first, // increment count and repeat again: if(need_push_match) matches.pop(temp_match); prev_pos.pop(first); cur_acc = ((re_repeat*)ptr)->id; for(k = cur_acc; k >= 0; --k) prev_acc.pop(accumulators[k]); prev_record.pop(); if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max) goto failure; // repetions exhausted. ptr = ptr->next.p; start_loop[cur_acc] = first; goto retry; case syntax_element_match: if(need_push_match) matches.pop(temp_match); prev_pos.pop(first); prev_record.pop(); goto retry; default: jm_assert(0); // mustn't get here!! } } if(match_found) return true; // if we get to here then everything has failed // and no match was found: return false; } #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) } // namespace #endif template void __skip_and_inc(unsigned int& clines, iterator& last_line, iterator& first, const iterator last) { while(first != last) { if(*first == '\n') { last_line = ++first; ++clines; } else ++first; } } template void __skip_and_dec(unsigned int& clines, iterator& last_line, iterator& first, iterator base, unsigned int len) { bool need_line = false; for(unsigned int i = 0; i < len; ++i) { --first; if(*first == '\n') { need_line = true; --clines; } } if(need_line) { last_line = first; if(last_line != base) --last_line; else return; while((last_line != base) && (*last_line != '\n')) --last_line; if(*last_line == '\n') ++last_line; } } template inline void __inc_one(unsigned int& clines, iterator& last_line, iterator& first) { if(*first == '\n') { last_line = ++first; ++clines; } else ++first; } template struct grep_search_predicate { reg_match* pm; grep_search_predicate(reg_match* p) : pm(p) {} bool operator()(const reg_match& m) { *pm = static_cast&>(m); return false; } }; #if !defined(JM_NO_TEMPLATE_RETURNS) && !defined(JM_NO_PARTIAL_FUNC_SPEC) template inline const reg_match_base& grep_out_type(const grep_search_predicate& o, const Allocator&) { return *(o.pm); } #endif template inline const Allocator& grep_out_type(const T&, const Allocator& a) { return a; } #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) // // Ugly ugly hack, // template don't merge if they contain switch statements so declare these // templates in unnamed namespace (ie with internal linkage), each translation // unit then gets its own local copy, it works seemlessly but bloats the app. namespace{ #endif // // reg_grep2: // find all non-overlapping matches within the sequence first last: // template unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression& e, unsigned flags, A2 a) { if(e.flags() & regbase::failbit) return 0; typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; typedef typename is_byte::width_type width_type; reg_match m(grep_out_type(foo, a)); I restart; m.set_size(e.mark_count(), first, last); m.set_line(1, first); #ifdef RE_LOCALE_CPP const __JM_STD::locale& locale_inst = e.locale(); #endif unsigned int clines = 1; unsigned int cmatches = 0; I last_line = first; I next_base; I base = first; bool need_init; flags |= match_init; __priv_match_data pd(m); const unsigned char* __map = e.get_map(); unsigned int type; if(first == last) { // special case, only test if can_be_null, // don't dereference any pointers!! if(e.first()->can_be_null) if(query_match_aux(first, last, m, e, flags, pd, &restart)) { foo(m); ++cmatches; } return cmatches; } // try one time whatever: if( reg_expression::can_start(*first, __map, (unsigned char)mask_any, width_type() ) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched // trying to match again with match_not_null set if this // is a null match... need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); flags |= match_prev_avail | match_not_bob; // depending on what the first record is we may be able to // optimise the search: type = (flags & match_continuous) ? regbase::restart_continue : e.restart_type(); if(type == regbase::restart_buf) return cmatches; switch(type) { case regbase::restart_lit: case regbase::restart_fixed_lit: { const kmp_info* info = e.get_kmp(); int len = info->len; const charT* x = info->pstr; int j = 0; bool icase = e.flags() & regbase::icase; while (first != last) { while((j > -1) && (x[j] != traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) j = info->kmp_next[j]; __inc_one(clines, last_line, first); ++j; if(j >= len) { if(type == regbase::restart_fixed_lit) { __skip_and_dec(clines, last_line, first, base, j); restart = first; restart += len; m.set_first(first); m.set_second(restart); m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; __skip_and_inc(clines, last_line, first, restart); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); j = 0; } else { restart = first; __skip_and_dec(clines, last_line, first, base, j); if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); j = 0; } else { for(int k = 0; (restart != first) && (k < j); ++k, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); j = 0; //we could do better than this... } } } } break; } case regbase::restart_any: { while(first != last) { if( reg_expression::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched // trying to match again with match_not_null set if this // is a null match... need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } continue; } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); } } break; case regbase::restart_word: { // do search optimised for word starts: while(first != last) { --first; if(*first == '\n') --clines; // skip the word characters: while((first != last) && traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) ++first; // now skip the white space: while((first != last) && (traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false)) __inc_one(clines, last_line, first); if(first == last) break; if( reg_expression::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched // trying to match again with match_not_null set if this // is a null match... need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); } } break; case regbase::restart_line: { // do search optimised for line starts: while(first != last) { // find first charcter after a line break: --first; if(*first == '\n') --clines; while((first != last) && (*first != '\n')) ++first; if(first == last) break; ++first; if(first == last) break; ++clines; last_line = first; if( reg_expression::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched // trying to match again with match_not_null set if this // is a null match... need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); } } break; case regbase::restart_continue: { while(first != last) { if( reg_expression::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched // trying to match again with match_not_null set if this // is a null match... if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else return cmatches; // can't continue from null match } __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); continue; } } return cmatches; } } break; } // finally check trailing null string: if(e.first()->can_be_null) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } } return cmatches; } #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES) } // namespace #endif template bool reg_search(iterator first, iterator last, reg_match& m, const reg_expression& e, unsigned flags = match_default) { if(e.flags() & regbase::failbit) return false; typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; // prepare m for failure: if((flags & match_init) == 0) { m.set_size(e.mark_count(), first, last); } flags |= match_init; return reg_grep2(grep_search_predicate(&m), first, last, e, flags, m.allocator()); } // // reg_search convenience interfaces: #ifndef JM_NO_PARTIAL_FUNC_SPEC // // this isn't really a partial specialisation, but template function // overloading - if the compiler doesn't support partial specialisation // then it really won't support this either: template inline bool reg_search(const charT* str, reg_match& m, const reg_expression& e, unsigned flags = match_default) { return reg_search(str, str + traits::length(str), m, e, flags); } #ifndef JM_NO_STRING_H template inline bool reg_search(const __JM_STD::basic_string& s, reg_match::const_iterator, Allocator>& m, const reg_expression& e, unsigned flags = match_default) { return reg_search(s.begin(), s.end(), m, e, flags); } #endif #else // partial specialisation inline bool reg_search(const char* str, cmatch& m, const regex& e, unsigned flags = match_default) { return reg_search(str, str + regex::traits_type::length(str), m, e, flags); } #ifndef JM_NO_WCSTRING inline bool reg_search(const wchar_t* str, wcmatch& m, const wregex& e, unsigned flags = match_default) { return reg_search(str, str + wregex::traits_type::length(str), m, e, flags); } #endif #ifndef JM_NO_STRING_H inline bool reg_search(const __JM_STD::string& s, reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, const regex& e, unsigned flags = match_default) { return reg_search(s.begin(), s.end(), m, e, flags); } #if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) inline bool reg_search(const __JM_STD::basic_string& s, reg_match<__JM_STD::basic_string::const_iterator, wregex::alloc_type>& m, const wregex& e, unsigned flags = match_default) { return reg_search(s.begin(), s.end(), m, e, flags); } #endif #endif #endif // // reg_grep: // find all non-overlapping matches within the sequence first last: // template inline unsigned int reg_grep(Predicate foo, iterator first, iterator last, const reg_expression& e, unsigned flags = match_default) { return reg_grep2(foo, first, last, e, flags, e.allocator()); } // // reg_grep convenience interfaces: #ifndef JM_NO_PARTIAL_FUNC_SPEC // // this isn't really a partial specialisation, but template function // overloading - if the compiler doesn't support partial specialisation // then it really won't support this either: template inline bool reg_grep(Predicate foo, const charT* str, const reg_expression& e, unsigned flags = match_default) { return reg_grep(foo, str, str + traits::length(str), e, flags); } #ifndef JM_NO_STRING_H template inline bool reg_grep(Predicate foo, const __JM_STD::basic_string& s, const reg_expression& e, unsigned flags = match_default) { return reg_grep(foo, s.begin(), s.end(), e, flags); } #endif #else // partial specialisation inline bool reg_grep(bool (*foo)(const cmatch&), const char* str, const regex& e, unsigned flags = match_default) { return reg_grep(foo, str, str + regex::traits_type::length(str), e, flags); } #ifndef JM_NO_WCSTRING inline bool reg_grep(bool (*foo)(const wcmatch&), const wchar_t* str, const wregex& e, unsigned flags = match_default) { return reg_grep(foo, str, str + wregex::traits_type::length(str), e, flags); } #endif #ifndef JM_NO_STRING_H inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::string::const_iterator, regex::alloc_type>&), const __JM_STD::string& s, const regex& e, unsigned flags = match_default) { return reg_grep(foo, s.begin(), s.end(), e, flags); } #if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING) inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::basic_string::const_iterator, wregex::alloc_type>&), const __JM_STD::basic_string& s, const wregex& e, unsigned flags = match_default) { return reg_grep(foo, s.begin(), s.end(), e, flags); } #endif #endif #endif // // finally for compatablity with version 1.x of the library // we need a form of reg_grep that takes an output iterator // as its first argument: // // // struct grep_match: // stores what matched during a reg_grep, // the output iterator type passed to reg_grep must have an // operator*() that returns a type with an // operator=(const grep_match&); // template struct grep_match { unsigned int line; iterator line_start; reg_match what; grep_match(Allocator a = Allocator()) : what(a) {} grep_match(unsigned int l, iterator p1, const reg_match& m) : what(m) { line = l; line_start = p1; } bool operator == (const grep_match& ) { return false; } bool operator < (const grep_match&) { return false; } }; template struct grep_adaptor { O oi; reg_match m; grep_adaptor(O i, A a) : m(a), oi(i) {} bool operator()(const reg_match_base& w) { m.what = w; m.line = w.line(); m.line_start = w.line_start(); *oi = m; ++oi; return true; } }; template inline unsigned int reg_grep_old(Out oi, iterator first, iterator last, const reg_expression& e, unsigned flags = match_default) { return reg_grep2(grep_adaptor(oi, e.allocator()), first, last, e, flags, e.allocator()); } JM_END_NAMESPACE // namespace regex #endif // __REGMATCH_H