]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * | |
3 | * Copyright (c) 2002 | |
4 | * John Maddock | |
5 | * | |
6 | * Use, modification and distribution are subject to the | |
7 | * Boost Software License, Version 1.0. (See accompanying file | |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | * | |
10 | */ | |
11 | ||
12 | /* | |
13 | * LOCATION: see http://www.boost.org for most recent version. | |
14 | * FILE perl_matcher_common.cpp | |
15 | * VERSION see <boost/version.hpp> | |
16 | * DESCRIPTION: Definitions of perl_matcher member functions that are | |
17 | * specific to the recursive implementation. | |
18 | */ | |
19 | ||
20 | #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP | |
21 | #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP | |
22 | ||
23 | #ifdef BOOST_MSVC | |
24 | #pragma warning(push) | |
25 | #pragma warning(disable: 4103) | |
26 | #endif | |
27 | #ifdef BOOST_HAS_ABI_HEADERS | |
28 | # include BOOST_ABI_PREFIX | |
29 | #endif | |
30 | #ifdef BOOST_MSVC | |
31 | #pragma warning(pop) | |
32 | #endif | |
33 | ||
34 | #ifdef BOOST_MSVC | |
35 | #pragma warning(push) | |
36 | #pragma warning(disable: 4800) | |
37 | #endif | |
38 | ||
39 | namespace boost{ | |
40 | namespace BOOST_REGEX_DETAIL_NS{ | |
41 | ||
42 | template <class BidiIterator> | |
43 | class backup_subex | |
44 | { | |
45 | int index; | |
46 | sub_match<BidiIterator> sub; | |
47 | public: | |
48 | template <class A> | |
49 | backup_subex(const match_results<BidiIterator, A>& w, int i) | |
50 | : index(i), sub(w[i], false) {} | |
51 | template <class A> | |
52 | void restore(match_results<BidiIterator, A>& w) | |
53 | { | |
54 | w.set_first(sub.first, index, index == 0); | |
55 | w.set_second(sub.second, index, sub.matched, index == 0); | |
56 | } | |
57 | const sub_match<BidiIterator>& get() { return sub; } | |
58 | }; | |
59 | ||
60 | template <class BidiIterator, class Allocator, class traits> | |
61 | bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() | |
62 | { | |
63 | static matcher_proc_type const s_match_vtable[34] = | |
64 | { | |
65 | (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), | |
66 | &perl_matcher<BidiIterator, Allocator, traits>::match_endmark, | |
67 | &perl_matcher<BidiIterator, Allocator, traits>::match_literal, | |
68 | &perl_matcher<BidiIterator, Allocator, traits>::match_start_line, | |
69 | &perl_matcher<BidiIterator, Allocator, traits>::match_end_line, | |
70 | &perl_matcher<BidiIterator, Allocator, traits>::match_wild, | |
71 | &perl_matcher<BidiIterator, Allocator, traits>::match_match, | |
72 | &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary, | |
73 | &perl_matcher<BidiIterator, Allocator, traits>::match_within_word, | |
74 | &perl_matcher<BidiIterator, Allocator, traits>::match_word_start, | |
75 | &perl_matcher<BidiIterator, Allocator, traits>::match_word_end, | |
76 | &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start, | |
77 | &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end, | |
78 | &perl_matcher<BidiIterator, Allocator, traits>::match_backref, | |
79 | &perl_matcher<BidiIterator, Allocator, traits>::match_long_set, | |
80 | &perl_matcher<BidiIterator, Allocator, traits>::match_set, | |
81 | &perl_matcher<BidiIterator, Allocator, traits>::match_jump, | |
82 | &perl_matcher<BidiIterator, Allocator, traits>::match_alt, | |
83 | &perl_matcher<BidiIterator, Allocator, traits>::match_rep, | |
84 | &perl_matcher<BidiIterator, Allocator, traits>::match_combining, | |
85 | &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end, | |
86 | &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue, | |
87 | // Although this next line *should* be evaluated at compile time, in practice | |
88 | // some compilers (VC++) emit run-time initialisation which breaks thread | |
89 | // safety, so use a dispatch function instead: | |
90 | //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), | |
91 | &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch, | |
92 | &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat, | |
93 | &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, | |
94 | &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, | |
95 | &perl_matcher<BidiIterator, Allocator, traits>::match_backstep, | |
96 | &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, | |
97 | &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, | |
98 | &perl_matcher<BidiIterator, Allocator, traits>::match_recursion, | |
99 | &perl_matcher<BidiIterator, Allocator, traits>::match_fail, | |
100 | &perl_matcher<BidiIterator, Allocator, traits>::match_accept, | |
101 | &perl_matcher<BidiIterator, Allocator, traits>::match_commit, | |
102 | &perl_matcher<BidiIterator, Allocator, traits>::match_then, | |
103 | }; | |
104 | ||
105 | if(state_count > max_state_count) | |
106 | raise_error(traits_inst, regex_constants::error_complexity); | |
107 | while(pstate) | |
108 | { | |
109 | matcher_proc_type proc = s_match_vtable[pstate->type]; | |
110 | ++state_count; | |
111 | if(!(this->*proc)()) | |
112 | { | |
113 | if((m_match_flags & match_partial) && (position == last) && (position != search_base)) | |
114 | m_has_partial_match = true; | |
115 | return 0; | |
116 | } | |
117 | } | |
118 | return true; | |
119 | } | |
120 | ||
121 | template <class BidiIterator, class Allocator, class traits> | |
122 | bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() | |
123 | { | |
124 | int index = static_cast<const re_brace*>(pstate)->index; | |
125 | icase = static_cast<const re_brace*>(pstate)->icase; | |
126 | bool r = true; | |
127 | switch(index) | |
128 | { | |
129 | case 0: | |
130 | pstate = pstate->next.p; | |
131 | break; | |
132 | case -1: | |
133 | case -2: | |
134 | { | |
135 | // forward lookahead assert: | |
136 | BidiIterator old_position(position); | |
137 | const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; | |
138 | pstate = pstate->next.p->next.p; | |
139 | r = match_all_states(); | |
140 | pstate = next_pstate; | |
141 | position = old_position; | |
142 | if((r && (index != -1)) || (!r && (index != -2))) | |
143 | r = false; | |
144 | else | |
145 | r = true; | |
146 | if(r && m_have_accept) | |
147 | r = skip_until_paren(INT_MAX); | |
148 | break; | |
149 | } | |
150 | case -3: | |
151 | { | |
152 | // independent sub-expression: | |
153 | bool old_independent = m_independent; | |
154 | m_independent = true; | |
155 | const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; | |
156 | pstate = pstate->next.p->next.p; | |
157 | bool can_backtrack = m_can_backtrack; | |
158 | r = match_all_states(); | |
159 | if(r) | |
160 | m_can_backtrack = can_backtrack; | |
161 | pstate = next_pstate; | |
162 | m_independent = old_independent; | |
163 | #ifdef BOOST_REGEX_MATCH_EXTRA | |
164 | if(r && (m_match_flags & match_extra)) | |
165 | { | |
166 | // | |
167 | // our captures have been stored in *m_presult | |
168 | // we need to unpack them, and insert them | |
169 | // back in the right order when we unwind the stack: | |
170 | // | |
171 | unsigned i; | |
172 | match_results<BidiIterator, Allocator> tm(*m_presult); | |
173 | for(i = 0; i < tm.size(); ++i) | |
174 | (*m_presult)[i].get_captures().clear(); | |
175 | // match everything else: | |
176 | r = match_all_states(); | |
177 | // now place the stored captures back: | |
178 | for(i = 0; i < tm.size(); ++i) | |
179 | { | |
180 | typedef typename sub_match<BidiIterator>::capture_sequence_type seq; | |
181 | seq& s1 = (*m_presult)[i].get_captures(); | |
182 | const seq& s2 = tm[i].captures(); | |
183 | s1.insert( | |
184 | s1.end(), | |
185 | s2.begin(), | |
186 | s2.end()); | |
187 | } | |
188 | } | |
189 | #endif | |
190 | if(r && m_have_accept) | |
191 | r = skip_until_paren(INT_MAX); | |
192 | break; | |
193 | } | |
194 | case -4: | |
195 | { | |
196 | // conditional expression: | |
197 | const re_alt* alt = static_cast<const re_alt*>(pstate->next.p); | |
198 | BOOST_ASSERT(alt->type == syntax_element_alt); | |
199 | pstate = alt->next.p; | |
200 | if(pstate->type == syntax_element_assert_backref) | |
201 | { | |
202 | if(!match_assert_backref()) | |
203 | pstate = alt->alt.p; | |
204 | break; | |
205 | } | |
206 | else | |
207 | { | |
208 | // zero width assertion, have to match this recursively: | |
209 | BOOST_ASSERT(pstate->type == syntax_element_startmark); | |
210 | bool negated = static_cast<const re_brace*>(pstate)->index == -2; | |
211 | BidiIterator saved_position = position; | |
212 | const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; | |
213 | pstate = pstate->next.p->next.p; | |
214 | bool res = match_all_states(); | |
215 | position = saved_position; | |
216 | if(negated) | |
217 | res = !res; | |
218 | if(res) | |
219 | pstate = next_pstate; | |
220 | else | |
221 | pstate = alt->alt.p; | |
222 | break; | |
223 | } | |
224 | } | |
225 | case -5: | |
226 | { | |
227 | // Reset start of $0, since we have a \K escape | |
228 | backup_subex<BidiIterator> sub(*m_presult, 0); | |
229 | m_presult->set_first(position, 0, true); | |
230 | pstate = pstate->next.p; | |
231 | r = match_all_states(); | |
232 | if(r == false) | |
233 | sub.restore(*m_presult); | |
234 | break; | |
235 | } | |
236 | default: | |
237 | { | |
238 | BOOST_ASSERT(index > 0); | |
239 | if((m_match_flags & match_nosubs) == 0) | |
240 | { | |
241 | backup_subex<BidiIterator> sub(*m_presult, index); | |
242 | m_presult->set_first(position, index); | |
243 | pstate = pstate->next.p; | |
244 | r = match_all_states(); | |
245 | if(r == false) | |
246 | sub.restore(*m_presult); | |
247 | #ifdef BOOST_REGEX_MATCH_EXTRA | |
248 | // | |
249 | // we have a match, push the capture information onto the stack: | |
250 | // | |
251 | else if(sub.get().matched && (match_extra & m_match_flags)) | |
252 | ((*m_presult)[index]).get_captures().push_back(sub.get()); | |
253 | #endif | |
254 | } | |
255 | else | |
256 | { | |
257 | pstate = pstate->next.p; | |
258 | } | |
259 | break; | |
260 | } | |
261 | } | |
262 | return r; | |
263 | } | |
264 | ||
265 | template <class BidiIterator, class Allocator, class traits> | |
266 | bool perl_matcher<BidiIterator, Allocator, traits>::match_alt() | |
267 | { | |
268 | bool take_first, take_second; | |
269 | const re_alt* jmp = static_cast<const re_alt*>(pstate); | |
270 | ||
271 | // find out which of these two alternatives we need to take: | |
272 | if(position == last) | |
273 | { | |
274 | take_first = jmp->can_be_null & mask_take; | |
275 | take_second = jmp->can_be_null & mask_skip; | |
276 | } | |
277 | else | |
278 | { | |
279 | take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); | |
280 | take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); | |
281 | } | |
282 | ||
283 | if(take_first) | |
284 | { | |
285 | // we can take the first alternative, | |
286 | // see if we need to push next alternative: | |
287 | if(take_second) | |
288 | { | |
289 | BidiIterator oldposition(position); | |
290 | const re_syntax_base* old_pstate = jmp->alt.p; | |
291 | pstate = pstate->next.p; | |
292 | bool oldcase = icase; | |
293 | m_have_then = false; | |
294 | if(!match_all_states()) | |
295 | { | |
296 | pstate = old_pstate; | |
297 | position = oldposition; | |
298 | icase = oldcase; | |
299 | if(m_have_then) | |
300 | { | |
301 | m_can_backtrack = true; | |
302 | m_have_then = false; | |
303 | return false; | |
304 | } | |
305 | } | |
306 | m_have_then = false; | |
307 | return m_can_backtrack; | |
308 | } | |
309 | pstate = pstate->next.p; | |
310 | return true; | |
311 | } | |
312 | if(take_second) | |
313 | { | |
314 | pstate = jmp->alt.p; | |
315 | return true; | |
316 | } | |
317 | return false; // neither option is possible | |
318 | } | |
319 | ||
320 | template <class BidiIterator, class Allocator, class traits> | |
321 | bool perl_matcher<BidiIterator, Allocator, traits>::match_rep() | |
322 | { | |
323 | #ifdef BOOST_MSVC | |
324 | #pragma warning(push) | |
325 | #pragma warning(disable:4127 4244) | |
326 | #endif | |
327 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
328 | // | |
329 | // Always copy the repeat count, so that the state is restored | |
330 | // when we exit this scope: | |
331 | // | |
332 | repeater_count<BidiIterator> r(rep->state_id, &next_count, position, this->recursion_stack.size() ? this->recursion_stack.back().idx : INT_MIN + 3); | |
333 | // | |
334 | // If we've had at least one repeat already, and the last one | |
335 | // matched the NULL string then set the repeat count to | |
336 | // maximum: | |
337 | // | |
338 | next_count->check_null_repeat(position, rep->max); | |
339 | ||
340 | // find out which of these two alternatives we need to take: | |
341 | bool take_first, take_second; | |
342 | if(position == last) | |
343 | { | |
344 | take_first = rep->can_be_null & mask_take; | |
345 | take_second = rep->can_be_null & mask_skip; | |
346 | } | |
347 | else | |
348 | { | |
349 | take_first = can_start(*position, rep->_map, (unsigned char)mask_take); | |
350 | take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); | |
351 | } | |
352 | ||
353 | if(next_count->get_count() < rep->min) | |
354 | { | |
355 | // we must take the repeat: | |
356 | if(take_first) | |
357 | { | |
358 | // increase the counter: | |
359 | ++(*next_count); | |
360 | pstate = rep->next.p; | |
361 | return match_all_states(); | |
362 | } | |
363 | return false; | |
364 | } | |
365 | bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); | |
366 | if(greedy) | |
367 | { | |
368 | // try and take the repeat if we can: | |
369 | if((next_count->get_count() < rep->max) && take_first) | |
370 | { | |
371 | // store position in case we fail: | |
372 | BidiIterator pos = position; | |
373 | // increase the counter: | |
374 | ++(*next_count); | |
375 | pstate = rep->next.p; | |
376 | if(match_all_states()) | |
377 | return true; | |
378 | if(!m_can_backtrack) | |
379 | return false; | |
380 | // failed repeat, reset posistion and fall through for alternative: | |
381 | position = pos; | |
382 | } | |
383 | if(take_second) | |
384 | { | |
385 | pstate = rep->alt.p; | |
386 | return true; | |
387 | } | |
388 | return false; // can't take anything, fail... | |
389 | } | |
390 | else // non-greedy | |
391 | { | |
392 | // try and skip the repeat if we can: | |
393 | if(take_second) | |
394 | { | |
395 | // store position in case we fail: | |
396 | BidiIterator pos = position; | |
397 | pstate = rep->alt.p; | |
398 | if(match_all_states()) | |
399 | return true; | |
400 | if(!m_can_backtrack) | |
401 | return false; | |
402 | // failed alternative, reset posistion and fall through for repeat: | |
403 | position = pos; | |
404 | } | |
405 | if((next_count->get_count() < rep->max) && take_first) | |
406 | { | |
407 | // increase the counter: | |
408 | ++(*next_count); | |
409 | pstate = rep->next.p; | |
410 | return match_all_states(); | |
411 | } | |
412 | } | |
413 | return false; | |
414 | #ifdef BOOST_MSVC | |
415 | #pragma warning(pop) | |
416 | #endif | |
417 | } | |
418 | ||
419 | template <class BidiIterator, class Allocator, class traits> | |
420 | bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow() | |
421 | { | |
422 | #ifdef BOOST_MSVC | |
423 | #pragma warning(push) | |
424 | #pragma warning(disable:4127) | |
425 | #endif | |
426 | unsigned count = 0; | |
427 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
428 | re_syntax_base* psingle = rep->next.p; | |
429 | // match compulsary repeats first: | |
430 | while(count < rep->min) | |
431 | { | |
432 | pstate = psingle; | |
433 | if(!match_wild()) | |
434 | return false; | |
435 | ++count; | |
436 | } | |
437 | bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); | |
438 | if(greedy) | |
439 | { | |
440 | // normal repeat: | |
441 | while(count < rep->max) | |
442 | { | |
443 | pstate = psingle; | |
444 | if(!match_wild()) | |
445 | break; | |
446 | ++count; | |
447 | } | |
448 | if((rep->leading) && (count < rep->max)) | |
449 | restart = position; | |
450 | pstate = rep; | |
451 | return backtrack_till_match(count - rep->min); | |
452 | } | |
453 | else | |
454 | { | |
455 | // non-greedy, keep trying till we get a match: | |
456 | BidiIterator save_pos; | |
457 | do | |
458 | { | |
459 | if((rep->leading) && (rep->max == UINT_MAX)) | |
460 | restart = position; | |
461 | pstate = rep->alt.p; | |
462 | save_pos = position; | |
463 | ++state_count; | |
464 | if(match_all_states()) | |
465 | return true; | |
466 | if((count >= rep->max) || !m_can_backtrack) | |
467 | return false; | |
468 | ++count; | |
469 | pstate = psingle; | |
470 | position = save_pos; | |
471 | if(!match_wild()) | |
472 | return false; | |
473 | }while(true); | |
474 | } | |
475 | #ifdef BOOST_MSVC | |
476 | #pragma warning(pop) | |
477 | #endif | |
478 | } | |
479 | ||
480 | template <class BidiIterator, class Allocator, class traits> | |
481 | bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast() | |
482 | { | |
483 | #ifdef BOOST_MSVC | |
484 | #pragma warning(push) | |
485 | #pragma warning(disable:4127) | |
486 | #endif | |
487 | if(m_match_flags & match_not_dot_null) | |
488 | return match_dot_repeat_slow(); | |
489 | if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0) | |
490 | return match_dot_repeat_slow(); | |
491 | // | |
492 | // start by working out how much we can skip: | |
493 | // | |
494 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
495 | #ifdef BOOST_MSVC | |
496 | #pragma warning(push) | |
497 | #pragma warning(disable:4267) | |
498 | #endif | |
499 | bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); | |
500 | std::size_t count = (std::min)(static_cast<std::size_t>(::boost::BOOST_REGEX_DETAIL_NS::distance(position, last)), static_cast<std::size_t>(greedy ? rep->max : rep->min)); | |
501 | if(rep->min > count) | |
502 | { | |
503 | position = last; | |
504 | return false; // not enough text left to match | |
505 | } | |
506 | std::advance(position, count); | |
507 | #ifdef BOOST_MSVC | |
508 | #pragma warning(pop) | |
509 | #endif | |
510 | if((rep->leading) && (count < rep->max) && greedy) | |
511 | restart = position; | |
512 | if(greedy) | |
513 | return backtrack_till_match(count - rep->min); | |
514 | ||
515 | // non-greedy, keep trying till we get a match: | |
516 | BidiIterator save_pos; | |
517 | do | |
518 | { | |
519 | while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) | |
520 | { | |
521 | ++position; | |
522 | ++count; | |
523 | } | |
524 | if((rep->leading) && (rep->max == UINT_MAX)) | |
525 | restart = position; | |
526 | pstate = rep->alt.p; | |
527 | save_pos = position; | |
528 | ++state_count; | |
529 | if(match_all_states()) | |
530 | return true; | |
531 | if((count >= rep->max) || !m_can_backtrack) | |
532 | return false; | |
533 | if(save_pos == last) | |
534 | return false; | |
535 | position = ++save_pos; | |
536 | ++count; | |
537 | }while(true); | |
538 | #ifdef BOOST_MSVC | |
539 | #pragma warning(pop) | |
540 | #endif | |
541 | } | |
542 | ||
543 | template <class BidiIterator, class Allocator, class traits> | |
544 | bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat() | |
545 | { | |
546 | #ifdef BOOST_MSVC | |
547 | #pragma warning(push) | |
548 | #pragma warning(disable:4127) | |
549 | #pragma warning(disable:4267) | |
550 | #endif | |
551 | #ifdef __BORLANDC__ | |
552 | #pragma option push -w-8008 -w-8066 -w-8004 | |
553 | #endif | |
554 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
555 | BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length); | |
556 | const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1); | |
557 | // | |
558 | // start by working out how much we can skip: | |
559 | // | |
560 | bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); | |
561 | std::size_t count, desired; | |
562 | if(::boost::is_random_access_iterator<BidiIterator>::value) | |
563 | { | |
564 | desired = | |
565 | (std::min)( | |
566 | (std::size_t)(greedy ? rep->max : rep->min), | |
567 | (std::size_t)::boost::BOOST_REGEX_DETAIL_NS::distance(position, last)); | |
568 | count = desired; | |
569 | ++desired; | |
570 | if(icase) | |
571 | { | |
572 | while(--desired && (traits_inst.translate_nocase(*position) == what)) | |
573 | { | |
574 | ++position; | |
575 | } | |
576 | } | |
577 | else | |
578 | { | |
579 | while(--desired && (traits_inst.translate(*position) == what)) | |
580 | { | |
581 | ++position; | |
582 | } | |
583 | } | |
584 | count = count - desired; | |
585 | } | |
586 | else | |
587 | { | |
588 | count = 0; | |
589 | desired = greedy ? rep->max : rep->min; | |
590 | while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) | |
591 | { | |
592 | ++position; | |
593 | ++count; | |
594 | } | |
595 | } | |
596 | if((rep->leading) && (count < rep->max) && greedy) | |
597 | restart = position; | |
598 | if(count < rep->min) | |
599 | return false; | |
600 | ||
601 | if(greedy) | |
602 | return backtrack_till_match(count - rep->min); | |
603 | ||
604 | // non-greedy, keep trying till we get a match: | |
605 | BidiIterator save_pos; | |
606 | do | |
607 | { | |
608 | while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) | |
609 | { | |
610 | if((traits_inst.translate(*position, icase) == what)) | |
611 | { | |
612 | ++position; | |
613 | ++count; | |
614 | } | |
615 | else | |
616 | return false; // counldn't repeat even though it was the only option | |
617 | } | |
618 | if((rep->leading) && (rep->max == UINT_MAX)) | |
619 | restart = position; | |
620 | pstate = rep->alt.p; | |
621 | save_pos = position; | |
622 | ++state_count; | |
623 | if(match_all_states()) | |
624 | return true; | |
625 | if((count >= rep->max) || !m_can_backtrack) | |
626 | return false; | |
627 | position = save_pos; | |
628 | if(position == last) | |
629 | return false; | |
630 | if(traits_inst.translate(*position, icase) == what) | |
631 | { | |
632 | ++position; | |
633 | ++count; | |
634 | } | |
635 | else | |
636 | { | |
637 | return false; | |
638 | } | |
639 | }while(true); | |
640 | #ifdef __BORLANDC__ | |
641 | #pragma option pop | |
642 | #endif | |
643 | #ifdef BOOST_MSVC | |
644 | #pragma warning(pop) | |
645 | #endif | |
646 | } | |
647 | ||
648 | template <class BidiIterator, class Allocator, class traits> | |
649 | bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat() | |
650 | { | |
651 | #ifdef BOOST_MSVC | |
652 | #pragma warning(push) | |
653 | #pragma warning(disable:4127) | |
654 | #endif | |
655 | #ifdef __BORLANDC__ | |
656 | #pragma option push -w-8008 -w-8066 -w-8004 | |
657 | #endif | |
658 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
659 | const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map; | |
660 | unsigned count = 0; | |
661 | // | |
662 | // start by working out how much we can skip: | |
663 | // | |
664 | bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); | |
665 | std::size_t desired = greedy ? rep->max : rep->min; | |
666 | if(::boost::is_random_access_iterator<BidiIterator>::value) | |
667 | { | |
668 | BidiIterator end = position; | |
669 | // Move end forward by "desired", preferably without using distance or advance if we can | |
670 | // as these can be slow for some iterator types. | |
671 | std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::BOOST_REGEX_DETAIL_NS::distance(position, last); | |
672 | if(desired >= len) | |
673 | end = last; | |
674 | else | |
675 | std::advance(end, desired); | |
676 | BidiIterator origin(position); | |
677 | while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) | |
678 | { | |
679 | ++position; | |
680 | } | |
681 | count = (unsigned)::boost::BOOST_REGEX_DETAIL_NS::distance(origin, position); | |
682 | } | |
683 | else | |
684 | { | |
685 | while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) | |
686 | { | |
687 | ++position; | |
688 | ++count; | |
689 | } | |
690 | } | |
691 | if((rep->leading) && (count < rep->max) && greedy) | |
692 | restart = position; | |
693 | if(count < rep->min) | |
694 | return false; | |
695 | ||
696 | if(greedy) | |
697 | return backtrack_till_match(count - rep->min); | |
698 | ||
699 | // non-greedy, keep trying till we get a match: | |
700 | BidiIterator save_pos; | |
701 | do | |
702 | { | |
703 | while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) | |
704 | { | |
705 | if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) | |
706 | { | |
707 | ++position; | |
708 | ++count; | |
709 | } | |
710 | else | |
711 | return false; // counldn't repeat even though it was the only option | |
712 | } | |
713 | if((rep->leading) && (rep->max == UINT_MAX)) | |
714 | restart = position; | |
715 | pstate = rep->alt.p; | |
716 | save_pos = position; | |
717 | ++state_count; | |
718 | if(match_all_states()) | |
719 | return true; | |
720 | if((count >= rep->max) || !m_can_backtrack) | |
721 | return false; | |
722 | position = save_pos; | |
723 | if(position == last) | |
724 | return false; | |
725 | if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) | |
726 | { | |
727 | ++position; | |
728 | ++count; | |
729 | } | |
730 | else | |
731 | { | |
732 | return false; | |
733 | } | |
734 | }while(true); | |
735 | #ifdef __BORLANDC__ | |
736 | #pragma option pop | |
737 | #endif | |
738 | #ifdef BOOST_MSVC | |
739 | #pragma warning(pop) | |
740 | #endif | |
741 | } | |
742 | ||
743 | template <class BidiIterator, class Allocator, class traits> | |
744 | bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat() | |
745 | { | |
746 | #ifdef BOOST_MSVC | |
747 | #pragma warning(push) | |
748 | #pragma warning(disable:4127) | |
749 | #endif | |
750 | #ifdef __BORLANDC__ | |
751 | #pragma option push -w-8008 -w-8066 -w-8004 | |
752 | #endif | |
753 | typedef typename traits::char_class_type char_class_type; | |
754 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
755 | const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p); | |
756 | unsigned count = 0; | |
757 | // | |
758 | // start by working out how much we can skip: | |
759 | // | |
760 | bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); | |
761 | std::size_t desired = greedy ? rep->max : rep->min; | |
762 | if(::boost::is_random_access_iterator<BidiIterator>::value) | |
763 | { | |
764 | BidiIterator end = position; | |
765 | // Move end forward by "desired", preferably without using distance or advance if we can | |
766 | // as these can be slow for some iterator types. | |
767 | std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::BOOST_REGEX_DETAIL_NS::distance(position, last); | |
768 | if(desired >= len) | |
769 | end = last; | |
770 | else | |
771 | std::advance(end, desired); | |
772 | BidiIterator origin(position); | |
773 | while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) | |
774 | { | |
775 | ++position; | |
776 | } | |
777 | count = (unsigned)::boost::BOOST_REGEX_DETAIL_NS::distance(origin, position); | |
778 | } | |
779 | else | |
780 | { | |
781 | while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) | |
782 | { | |
783 | ++position; | |
784 | ++count; | |
785 | } | |
786 | } | |
787 | if((rep->leading) && (count < rep->max) && greedy) | |
788 | restart = position; | |
789 | if(count < rep->min) | |
790 | return false; | |
791 | ||
792 | if(greedy) | |
793 | return backtrack_till_match(count - rep->min); | |
794 | ||
795 | // non-greedy, keep trying till we get a match: | |
796 | BidiIterator save_pos; | |
797 | do | |
798 | { | |
799 | while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) | |
800 | { | |
801 | if(position != re_is_set_member(position, last, set, re.get_data(), icase)) | |
802 | { | |
803 | ++position; | |
804 | ++count; | |
805 | } | |
806 | else | |
807 | return false; // counldn't repeat even though it was the only option | |
808 | } | |
809 | if((rep->leading) && (rep->max == UINT_MAX)) | |
810 | restart = position; | |
811 | pstate = rep->alt.p; | |
812 | save_pos = position; | |
813 | ++state_count; | |
814 | if(match_all_states()) | |
815 | return true; | |
816 | if((count >= rep->max) || !m_can_backtrack) | |
817 | return false; | |
818 | position = save_pos; | |
819 | if(position == last) | |
820 | return false; | |
821 | if(position != re_is_set_member(position, last, set, re.get_data(), icase)) | |
822 | { | |
823 | ++position; | |
824 | ++count; | |
825 | } | |
826 | else | |
827 | { | |
828 | return false; | |
829 | } | |
830 | }while(true); | |
831 | #ifdef __BORLANDC__ | |
832 | #pragma option pop | |
833 | #endif | |
834 | #ifdef BOOST_MSVC | |
835 | #pragma warning(pop) | |
836 | #endif | |
837 | } | |
838 | ||
839 | template <class BidiIterator, class Allocator, class traits> | |
840 | bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count) | |
841 | { | |
842 | #ifdef BOOST_MSVC | |
843 | #pragma warning(push) | |
844 | #pragma warning(disable:4127) | |
845 | #endif | |
846 | if(!m_can_backtrack) | |
847 | return false; | |
848 | if((m_match_flags & match_partial) && (position == last)) | |
849 | m_has_partial_match = true; | |
850 | ||
851 | const re_repeat* rep = static_cast<const re_repeat*>(pstate); | |
852 | BidiIterator backtrack = position; | |
853 | if(position == last) | |
854 | { | |
855 | if(rep->can_be_null & mask_skip) | |
856 | { | |
857 | pstate = rep->alt.p; | |
858 | if(match_all_states()) | |
859 | return true; | |
860 | } | |
861 | if(count) | |
862 | { | |
863 | position = --backtrack; | |
864 | --count; | |
865 | } | |
866 | else | |
867 | return false; | |
868 | } | |
869 | do | |
870 | { | |
871 | while(count && !can_start(*position, rep->_map, mask_skip)) | |
872 | { | |
873 | --position; | |
874 | --count; | |
875 | ++state_count; | |
876 | } | |
877 | pstate = rep->alt.p; | |
878 | backtrack = position; | |
879 | if(match_all_states()) | |
880 | return true; | |
881 | if(count == 0) | |
882 | return false; | |
883 | position = --backtrack; | |
884 | ++state_count; | |
885 | --count; | |
886 | }while(true); | |
887 | #ifdef BOOST_MSVC | |
888 | #pragma warning(pop) | |
889 | #endif | |
890 | } | |
891 | ||
892 | template <class BidiIterator, class Allocator, class traits> | |
893 | bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion() | |
894 | { | |
895 | BOOST_ASSERT(pstate->type == syntax_element_recurse); | |
896 | // | |
897 | // Set new call stack: | |
898 | // | |
899 | if(recursion_stack.capacity() == 0) | |
900 | { | |
901 | recursion_stack.reserve(50); | |
902 | } | |
903 | recursion_stack.push_back(recursion_info<results_type>()); | |
904 | recursion_stack.back().preturn_address = pstate->next.p; | |
905 | recursion_stack.back().results = *m_presult; | |
906 | recursion_stack.back().repeater_stack = next_count; | |
907 | pstate = static_cast<const re_jump*>(pstate)->alt.p; | |
908 | recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index; | |
909 | ||
910 | repeater_count<BidiIterator>* saved = next_count; | |
911 | repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those | |
912 | next_count = &r; | |
913 | bool can_backtrack = m_can_backtrack; | |
914 | bool result = match_all_states(); | |
915 | m_can_backtrack = can_backtrack; | |
916 | next_count = saved; | |
917 | ||
918 | if(!result) | |
919 | { | |
920 | next_count = recursion_stack.back().repeater_stack; | |
921 | *m_presult = recursion_stack.back().results; | |
922 | recursion_stack.pop_back(); | |
923 | return false; | |
924 | } | |
925 | return true; | |
926 | } | |
927 | ||
928 | template <class BidiIterator, class Allocator, class traits> | |
929 | bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark() | |
930 | { | |
931 | int index = static_cast<const re_brace*>(pstate)->index; | |
932 | icase = static_cast<const re_brace*>(pstate)->icase; | |
933 | if(index > 0) | |
934 | { | |
935 | if((m_match_flags & match_nosubs) == 0) | |
936 | { | |
937 | m_presult->set_second(position, index); | |
938 | } | |
939 | if(!recursion_stack.empty()) | |
940 | { | |
941 | if(index == recursion_stack.back().idx) | |
942 | { | |
943 | recursion_info<results_type> saved = recursion_stack.back(); | |
944 | recursion_stack.pop_back(); | |
945 | pstate = saved.preturn_address; | |
946 | repeater_count<BidiIterator>* saved_count = next_count; | |
947 | next_count = saved.repeater_stack; | |
948 | *m_presult = saved.results; | |
949 | if(!match_all_states()) | |
950 | { | |
951 | recursion_stack.push_back(saved); | |
952 | next_count = saved_count; | |
953 | return false; | |
954 | } | |
955 | } | |
956 | } | |
957 | } | |
958 | else if((index < 0) && (index != -4)) | |
959 | { | |
960 | // matched forward lookahead: | |
961 | pstate = 0; | |
962 | return true; | |
963 | } | |
964 | pstate = pstate ? pstate->next.p : 0; | |
965 | return true; | |
966 | } | |
967 | ||
968 | template <class BidiIterator, class Allocator, class traits> | |
969 | bool perl_matcher<BidiIterator, Allocator, traits>::match_match() | |
970 | { | |
971 | if(!recursion_stack.empty()) | |
972 | { | |
973 | BOOST_ASSERT(0 == recursion_stack.back().idx); | |
974 | const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address; | |
975 | *m_presult = recursion_stack.back().results; | |
976 | recursion_stack.pop_back(); | |
977 | if(!match_all_states()) | |
978 | { | |
979 | recursion_stack.push_back(recursion_info<results_type>()); | |
980 | recursion_stack.back().preturn_address = saved_state; | |
981 | recursion_stack.back().results = *m_presult; | |
982 | return false; | |
983 | } | |
984 | return true; | |
985 | } | |
986 | if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) | |
987 | return false; | |
988 | if((m_match_flags & match_all) && (position != last)) | |
989 | return false; | |
990 | if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) | |
991 | return false; | |
992 | m_presult->set_second(position); | |
993 | pstate = 0; | |
994 | m_has_found_match = true; | |
995 | if((m_match_flags & match_posix) == match_posix) | |
996 | { | |
997 | m_result.maybe_assign(*m_presult); | |
998 | if((m_match_flags & match_any) == 0) | |
999 | return false; | |
1000 | } | |
1001 | #ifdef BOOST_REGEX_MATCH_EXTRA | |
1002 | if(match_extra & m_match_flags) | |
1003 | { | |
1004 | for(unsigned i = 0; i < m_presult->size(); ++i) | |
1005 | if((*m_presult)[i].matched) | |
1006 | ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); | |
1007 | } | |
1008 | #endif | |
1009 | return true; | |
1010 | } | |
1011 | ||
1012 | template <class BidiIterator, class Allocator, class traits> | |
1013 | bool perl_matcher<BidiIterator, Allocator, traits>::match_commit() | |
1014 | { | |
1015 | m_can_backtrack = false; | |
1016 | int action = static_cast<const re_commit*>(pstate)->action; | |
1017 | switch(action) | |
1018 | { | |
1019 | case commit_commit: | |
1020 | restart = last; | |
1021 | break; | |
1022 | case commit_skip: | |
1023 | restart = position; | |
1024 | break; | |
1025 | } | |
1026 | pstate = pstate->next.p; | |
1027 | return true; | |
1028 | } | |
1029 | ||
1030 | template <class BidiIterator, class Allocator, class traits> | |
1031 | bool perl_matcher<BidiIterator, Allocator, traits>::match_then() | |
1032 | { | |
1033 | pstate = pstate->next.p; | |
1034 | if(match_all_states()) | |
1035 | return true; | |
1036 | m_can_backtrack = false; | |
1037 | m_have_then = true; | |
1038 | return false; | |
1039 | } | |
1040 | ||
1041 | template <class BidiIterator, class Allocator, class traits> | |
1042 | bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case() | |
1043 | { | |
1044 | // change our case sensitivity: | |
1045 | bool oldcase = this->icase; | |
1046 | this->icase = static_cast<const re_case*>(pstate)->icase; | |
1047 | pstate = pstate->next.p; | |
1048 | bool result = match_all_states(); | |
1049 | this->icase = oldcase; | |
1050 | return result; | |
1051 | } | |
1052 | ||
1053 | ||
1054 | ||
1055 | template <class BidiIterator, class Allocator, class traits> | |
1056 | bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool have_match) | |
1057 | { | |
1058 | while(pstate) | |
1059 | { | |
1060 | if(pstate->type == syntax_element_endmark) | |
1061 | { | |
1062 | if(static_cast<const re_brace*>(pstate)->index == index) | |
1063 | { | |
1064 | if(have_match) | |
1065 | return this->match_endmark(); | |
1066 | pstate = pstate->next.p; | |
1067 | return true; | |
1068 | } | |
1069 | else | |
1070 | { | |
1071 | // Unenclosed closing ), occurs when (*ACCEPT) is inside some other | |
1072 | // parenthesis which may or may not have other side effects associated with it. | |
1073 | bool r = match_endmark(); | |
1074 | m_have_accept = true; | |
1075 | if(!pstate) | |
1076 | return r; | |
1077 | } | |
1078 | continue; | |
1079 | } | |
1080 | else if(pstate->type == syntax_element_match) | |
1081 | return true; | |
1082 | else if(pstate->type == syntax_element_startmark) | |
1083 | { | |
1084 | int idx = static_cast<const re_brace*>(pstate)->index; | |
1085 | pstate = pstate->next.p; | |
1086 | skip_until_paren(idx, false); | |
1087 | continue; | |
1088 | } | |
1089 | pstate = pstate->next.p; | |
1090 | } | |
1091 | return true; | |
1092 | } | |
1093 | ||
1094 | ||
1095 | } // namespace BOOST_REGEX_DETAIL_NS | |
1096 | } // namespace boost | |
1097 | #ifdef BOOST_MSVC | |
1098 | #pragma warning(pop) | |
1099 | #endif | |
1100 | ||
1101 | #ifdef BOOST_MSVC | |
1102 | #pragma warning(push) | |
1103 | #pragma warning(disable: 4103) | |
1104 | #endif | |
1105 | #ifdef BOOST_HAS_ABI_HEADERS | |
1106 | # include BOOST_ABI_SUFFIX | |
1107 | #endif | |
1108 | #ifdef BOOST_MSVC | |
1109 | #pragma warning(pop) | |
1110 | #endif | |
1111 | ||
1112 | #endif | |
1113 |