]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | /* |
2 | * | |
3 | * Copyright (c) 2002 | |
4 | * John Maddock | |
5 | * | |
6 | * Use, modification and distribution are subject to the | |
7 | * Boost Software License, Version 1.0. (See accompanying file | |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | * | |
10 | */ | |
11 | ||
12 | /* | |
13 | * LOCATION: see http://www.boost.org for most recent version. | |
14 | * FILE perl_matcher_common.cpp | |
15 | * VERSION see <boost/version.hpp> | |
16 | * DESCRIPTION: Definitions of perl_matcher member functions that are | |
17 | * common to both the recursive and non-recursive versions. | |
18 | */ | |
19 | ||
20 | #ifndef BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP | |
21 | #define BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP | |
22 | ||
23 | #ifdef BOOST_REGEX_MSVC | |
24 | # pragma warning(push) | |
25 | #pragma warning(disable:4459) | |
26 | #if BOOST_REGEX_MSVC < 1910 | |
27 | #pragma warning(disable:4800) | |
28 | #endif | |
29 | #endif | |
30 | ||
31 | namespace boost{ | |
32 | namespace BOOST_REGEX_DETAIL_NS{ | |
33 | ||
34 | #ifdef BOOST_REGEX_MSVC | |
35 | # pragma warning(push) | |
36 | #pragma warning(disable:26812) | |
37 | #endif | |
38 | template <class BidiIterator, class Allocator, class traits> | |
39 | void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f) | |
40 | { | |
41 | typedef typename std::iterator_traits<BidiIterator>::iterator_category category; | |
42 | typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type; | |
43 | ||
44 | if(e.empty()) | |
45 | { | |
46 | // precondition failure: e is not a valid regex. | |
47 | std::invalid_argument ex("Invalid regular expression object"); | |
48 | #ifndef BOOST_REGEX_STANDALONE | |
49 | boost::throw_exception(ex); | |
50 | #else | |
51 | throw e; | |
52 | #endif | |
53 | } | |
54 | pstate = 0; | |
55 | m_match_flags = f; | |
56 | estimate_max_state_count(static_cast<category*>(0)); | |
57 | expression_flag_type re_f = re.flags(); | |
58 | icase = re_f & regex_constants::icase; | |
59 | if(!(m_match_flags & (match_perl|match_posix))) | |
60 | { | |
61 | if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) | |
62 | m_match_flags |= match_perl; | |
63 | else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) | |
64 | m_match_flags |= match_perl; | |
65 | else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal)) | |
66 | m_match_flags |= match_perl; | |
67 | else | |
68 | m_match_flags |= match_posix; | |
69 | } | |
70 | if(m_match_flags & match_posix) | |
71 | { | |
72 | m_temp_match.reset(new match_results<BidiIterator, Allocator>()); | |
73 | m_presult = m_temp_match.get(); | |
74 | } | |
75 | else | |
76 | m_presult = &m_result; | |
77 | m_stack_base = 0; | |
78 | m_backup_state = 0; | |
79 | // find the value to use for matching word boundaries: | |
80 | m_word_mask = re.get_data().m_word_mask; | |
81 | // find bitmask to use for matching '.': | |
82 | match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline); | |
83 | // Disable match_any if requested in the state machine: | |
84 | if(e.get_data().m_disable_match_any) | |
85 | m_match_flags &= regex_constants::match_not_any; | |
86 | } | |
87 | #ifdef BOOST_REGEX_MSVC | |
88 | # pragma warning(pop) | |
89 | #endif | |
90 | ||
91 | template <class BidiIterator, class Allocator, class traits> | |
92 | void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*) | |
93 | { | |
94 | // | |
95 | // How many states should we allow our machine to visit before giving up? | |
96 | // This is a heuristic: it takes the greater of O(N^2) and O(NS^2) | |
97 | // where N is the length of the string, and S is the number of states | |
98 | // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2) | |
99 | // but these take unreasonably amounts of time to bale out in pathological | |
100 | // cases. | |
101 | // | |
102 | // Calculate NS^2 first: | |
103 | // | |
104 | static const std::ptrdiff_t k = 100000; | |
105 | std::ptrdiff_t dist = std::distance(base, last); | |
106 | if(dist == 0) | |
107 | dist = 1; | |
108 | std::ptrdiff_t states = re.size(); | |
109 | if(states == 0) | |
110 | states = 1; | |
111 | if ((std::numeric_limits<std::ptrdiff_t>::max)() / states < states) | |
112 | { | |
113 | max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2); | |
114 | return; | |
115 | } | |
116 | states *= states; | |
117 | if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states) | |
118 | { | |
119 | max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2); | |
120 | return; | |
121 | } | |
122 | states *= dist; | |
123 | if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states) | |
124 | { | |
125 | max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2); | |
126 | return; | |
127 | } | |
128 | states += k; | |
129 | ||
130 | max_state_count = states; | |
131 | ||
132 | // | |
133 | // Now calculate N^2: | |
134 | // | |
135 | states = dist; | |
136 | if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states) | |
137 | { | |
138 | max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2); | |
139 | return; | |
140 | } | |
141 | states *= dist; | |
142 | if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states) | |
143 | { | |
144 | max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2); | |
145 | return; | |
146 | } | |
147 | states += k; | |
148 | // | |
149 | // N^2 can be a very large number indeed, to prevent things getting out | |
150 | // of control, cap the max states: | |
151 | // | |
152 | if(states > BOOST_REGEX_MAX_STATE_COUNT) | |
153 | states = BOOST_REGEX_MAX_STATE_COUNT; | |
154 | // | |
155 | // If (the possibly capped) N^2 is larger than our first estimate, | |
156 | // use this instead: | |
157 | // | |
158 | if(states > max_state_count) | |
159 | max_state_count = states; | |
160 | } | |
161 | ||
162 | template <class BidiIterator, class Allocator, class traits> | |
163 | inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*) | |
164 | { | |
165 | // we don't know how long the sequence is: | |
166 | max_state_count = BOOST_REGEX_MAX_STATE_COUNT; | |
167 | } | |
168 | ||
169 | template <class BidiIterator, class Allocator, class traits> | |
170 | inline bool perl_matcher<BidiIterator, Allocator, traits>::match() | |
171 | { | |
172 | return match_imp(); | |
173 | } | |
174 | ||
175 | template <class BidiIterator, class Allocator, class traits> | |
176 | bool perl_matcher<BidiIterator, Allocator, traits>::match_imp() | |
177 | { | |
178 | // initialise our stack if we are non-recursive: | |
179 | save_state_init init(&m_stack_base, &m_backup_state); | |
180 | used_block_count = BOOST_REGEX_MAX_BLOCKS; | |
181 | #if !defined(BOOST_NO_EXCEPTIONS) | |
182 | try{ | |
183 | #endif | |
184 | ||
185 | // reset our state machine: | |
186 | position = base; | |
187 | search_base = base; | |
188 | state_count = 0; | |
189 | m_match_flags |= regex_constants::match_all; | |
190 | m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last); | |
191 | m_presult->set_base(base); | |
192 | m_presult->set_named_subs(this->re.get_named_subs()); | |
193 | if(m_match_flags & match_posix) | |
194 | m_result = *m_presult; | |
195 | verify_options(re.flags(), m_match_flags); | |
196 | if(0 == match_prefix()) | |
197 | return false; | |
198 | return (m_result[0].second == last) && (m_result[0].first == base); | |
199 | ||
200 | #if !defined(BOOST_NO_EXCEPTIONS) | |
201 | } | |
202 | catch(...) | |
203 | { | |
204 | // unwind all pushed states, apart from anything else this | |
205 | // ensures that all the states are correctly destructed | |
206 | // not just the memory freed. | |
207 | while(unwind(true)){} | |
208 | throw; | |
209 | } | |
210 | #endif | |
211 | } | |
212 | ||
213 | template <class BidiIterator, class Allocator, class traits> | |
214 | inline bool perl_matcher<BidiIterator, Allocator, traits>::find() | |
215 | { | |
216 | return find_imp(); | |
217 | } | |
218 | ||
219 | template <class BidiIterator, class Allocator, class traits> | |
220 | bool perl_matcher<BidiIterator, Allocator, traits>::find_imp() | |
221 | { | |
222 | static matcher_proc_type const s_find_vtable[7] = | |
223 | { | |
224 | &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any, | |
225 | &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word, | |
226 | &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line, | |
227 | &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf, | |
228 | &perl_matcher<BidiIterator, Allocator, traits>::match_prefix, | |
229 | &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit, | |
230 | &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit, | |
231 | }; | |
232 | ||
233 | // initialise our stack if we are non-recursive: | |
234 | save_state_init init(&m_stack_base, &m_backup_state); | |
235 | used_block_count = BOOST_REGEX_MAX_BLOCKS; | |
236 | #if !defined(BOOST_NO_EXCEPTIONS) | |
237 | try{ | |
238 | #endif | |
239 | ||
240 | state_count = 0; | |
241 | if((m_match_flags & regex_constants::match_init) == 0) | |
242 | { | |
243 | // reset our state machine: | |
244 | search_base = position = base; | |
245 | pstate = re.get_first_state(); | |
246 | m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last); | |
247 | m_presult->set_base(base); | |
248 | m_presult->set_named_subs(this->re.get_named_subs()); | |
249 | m_match_flags |= regex_constants::match_init; | |
250 | } | |
251 | else | |
252 | { | |
253 | // start again: | |
254 | search_base = position = m_result[0].second; | |
255 | // If last match was null and match_not_null was not set then increment | |
256 | // our start position, otherwise we go into an infinite loop: | |
257 | if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0)) | |
258 | { | |
259 | if(position == last) | |
260 | return false; | |
261 | else | |
262 | ++position; | |
263 | } | |
264 | // reset $` start: | |
265 | m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last); | |
266 | //if((base != search_base) && (base == backstop)) | |
267 | // m_match_flags |= match_prev_avail; | |
268 | } | |
269 | if(m_match_flags & match_posix) | |
270 | { | |
271 | m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last); | |
272 | m_result.set_base(base); | |
273 | } | |
274 | ||
275 | verify_options(re.flags(), m_match_flags); | |
276 | // find out what kind of expression we have: | |
277 | unsigned type = (m_match_flags & match_continuous) ? | |
278 | static_cast<unsigned int>(regbase::restart_continue) | |
279 | : static_cast<unsigned int>(re.get_restart_type()); | |
280 | ||
281 | // call the appropriate search routine: | |
282 | matcher_proc_type proc = s_find_vtable[type]; | |
283 | return (this->*proc)(); | |
284 | ||
285 | #if !defined(BOOST_NO_EXCEPTIONS) | |
286 | } | |
287 | catch(...) | |
288 | { | |
289 | // unwind all pushed states, apart from anything else this | |
290 | // ensures that all the states are correctly destructed | |
291 | // not just the memory freed. | |
292 | while(unwind(true)){} | |
293 | throw; | |
294 | } | |
295 | #endif | |
296 | } | |
297 | ||
298 | template <class BidiIterator, class Allocator, class traits> | |
299 | bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix() | |
300 | { | |
301 | m_has_partial_match = false; | |
302 | m_has_found_match = false; | |
303 | pstate = re.get_first_state(); | |
304 | m_presult->set_first(position); | |
305 | restart = position; | |
306 | match_all_states(); | |
307 | if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial)) | |
308 | { | |
309 | m_has_found_match = true; | |
310 | m_presult->set_second(last, 0, false); | |
311 | position = last; | |
312 | if((m_match_flags & match_posix) == match_posix) | |
313 | { | |
314 | m_result.maybe_assign(*m_presult); | |
315 | } | |
316 | } | |
317 | #ifdef BOOST_REGEX_MATCH_EXTRA | |
318 | if(m_has_found_match && (match_extra & m_match_flags)) | |
319 | { | |
320 | // | |
321 | // we have a match, reverse the capture information: | |
322 | // | |
323 | for(unsigned i = 0; i < m_presult->size(); ++i) | |
324 | { | |
325 | typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures(); | |
326 | std::reverse(seq.begin(), seq.end()); | |
327 | } | |
328 | } | |
329 | #endif | |
330 | if(!m_has_found_match) | |
331 | position = restart; // reset search postion | |
332 | return m_has_found_match; | |
333 | } | |
334 | ||
335 | template <class BidiIterator, class Allocator, class traits> | |
336 | bool perl_matcher<BidiIterator, Allocator, traits>::match_literal() | |
337 | { | |
338 | unsigned int len = static_cast<const re_literal*>(pstate)->length; | |
339 | const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1); | |
340 | // | |
341 | // compare string with what we stored in | |
342 | // our records: | |
343 | for(unsigned int i = 0; i < len; ++i, ++position) | |
344 | { | |
345 | if((position == last) || (traits_inst.translate(*position, icase) != what[i])) | |
346 | return false; | |
347 | } | |
348 | pstate = pstate->next.p; | |
349 | return true; | |
350 | } | |
351 | ||
352 | template <class BidiIterator, class Allocator, class traits> | |
353 | bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line() | |
354 | { | |
355 | if(position == backstop) | |
356 | { | |
357 | if((m_match_flags & match_prev_avail) == 0) | |
358 | { | |
359 | if((m_match_flags & match_not_bol) == 0) | |
360 | { | |
361 | pstate = pstate->next.p; | |
362 | return true; | |
363 | } | |
364 | return false; | |
365 | } | |
366 | } | |
367 | else if(m_match_flags & match_single_line) | |
368 | return false; | |
369 | ||
370 | // check the previous value character: | |
371 | BidiIterator t(position); | |
372 | --t; | |
373 | if(position != last) | |
374 | { | |
375 | if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) ) | |
376 | { | |
377 | pstate = pstate->next.p; | |
378 | return true; | |
379 | } | |
380 | } | |
381 | else if(is_separator(*t)) | |
382 | { | |
383 | pstate = pstate->next.p; | |
384 | return true; | |
385 | } | |
386 | return false; | |
387 | } | |
388 | ||
389 | template <class BidiIterator, class Allocator, class traits> | |
390 | bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line() | |
391 | { | |
392 | if(position != last) | |
393 | { | |
394 | if(m_match_flags & match_single_line) | |
395 | return false; | |
396 | // we're not yet at the end so *first is always valid: | |
397 | if(is_separator(*position)) | |
398 | { | |
399 | if((position != backstop) || (m_match_flags & match_prev_avail)) | |
400 | { | |
401 | // check that we're not in the middle of \r\n sequence | |
402 | BidiIterator t(position); | |
403 | --t; | |
404 | if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) | |
405 | { | |
406 | return false; | |
407 | } | |
408 | } | |
409 | pstate = pstate->next.p; | |
410 | return true; | |
411 | } | |
412 | } | |
413 | else if((m_match_flags & match_not_eol) == 0) | |
414 | { | |
415 | pstate = pstate->next.p; | |
416 | return true; | |
417 | } | |
418 | return false; | |
419 | } | |
420 | ||
421 | template <class BidiIterator, class Allocator, class traits> | |
422 | bool perl_matcher<BidiIterator, Allocator, traits>::match_wild() | |
423 | { | |
424 | if(position == last) | |
425 | return false; | |
426 | if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0)) | |
427 | return false; | |
428 | if((*position == char_type(0)) && (m_match_flags & match_not_dot_null)) | |
429 | return false; | |
430 | pstate = pstate->next.p; | |
431 | ++position; | |
432 | return true; | |
433 | } | |
434 | ||
435 | template <class BidiIterator, class Allocator, class traits> | |
436 | bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary() | |
437 | { | |
438 | bool b; // indcates whether next character is a word character | |
439 | if(position != last) | |
440 | { | |
441 | // prev and this character must be opposites: | |
442 | b = traits_inst.isctype(*position, m_word_mask); | |
443 | } | |
444 | else | |
445 | { | |
446 | if (m_match_flags & match_not_eow) | |
447 | return false; | |
448 | b = false; | |
449 | } | |
450 | if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) | |
451 | { | |
452 | if(m_match_flags & match_not_bow) | |
453 | return false; | |
454 | else | |
455 | b ^= false; | |
456 | } | |
457 | else | |
458 | { | |
459 | --position; | |
460 | b ^= traits_inst.isctype(*position, m_word_mask); | |
461 | ++position; | |
462 | } | |
463 | if(b) | |
464 | { | |
465 | pstate = pstate->next.p; | |
466 | return true; | |
467 | } | |
468 | return false; // no match if we get to here... | |
469 | } | |
470 | ||
471 | template <class BidiIterator, class Allocator, class traits> | |
472 | bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word() | |
473 | { | |
474 | bool b = !match_word_boundary(); | |
475 | if(b) | |
476 | pstate = pstate->next.p; | |
477 | return b; | |
478 | /* | |
479 | if(position == last) | |
480 | return false; | |
481 | // both prev and this character must be m_word_mask: | |
482 | bool prev = traits_inst.isctype(*position, m_word_mask); | |
483 | { | |
484 | bool b; | |
485 | if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) | |
486 | return false; | |
487 | else | |
488 | { | |
489 | --position; | |
490 | b = traits_inst.isctype(*position, m_word_mask); | |
491 | ++position; | |
492 | } | |
493 | if(b == prev) | |
494 | { | |
495 | pstate = pstate->next.p; | |
496 | return true; | |
497 | } | |
498 | } | |
499 | return false; | |
500 | */ | |
501 | } | |
502 | ||
503 | template <class BidiIterator, class Allocator, class traits> | |
504 | bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start() | |
505 | { | |
506 | if(position == last) | |
507 | return false; // can't be starting a word if we're already at the end of input | |
508 | if(!traits_inst.isctype(*position, m_word_mask)) | |
509 | return false; // next character isn't a word character | |
510 | if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) | |
511 | { | |
512 | if(m_match_flags & match_not_bow) | |
513 | return false; // no previous input | |
514 | } | |
515 | else | |
516 | { | |
517 | // otherwise inside buffer: | |
518 | BidiIterator t(position); | |
519 | --t; | |
520 | if(traits_inst.isctype(*t, m_word_mask)) | |
521 | return false; // previous character not non-word | |
522 | } | |
523 | // OK we have a match: | |
524 | pstate = pstate->next.p; | |
525 | return true; | |
526 | } | |
527 | ||
528 | template <class BidiIterator, class Allocator, class traits> | |
529 | bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end() | |
530 | { | |
531 | if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) | |
532 | return false; // start of buffer can't be end of word | |
533 | BidiIterator t(position); | |
534 | --t; | |
535 | if(traits_inst.isctype(*t, m_word_mask) == false) | |
536 | return false; // previous character wasn't a word character | |
537 | ||
538 | if(position == last) | |
539 | { | |
540 | if(m_match_flags & match_not_eow) | |
541 | return false; // end of buffer but not end of word | |
542 | } | |
543 | else | |
544 | { | |
545 | // otherwise inside buffer: | |
546 | if(traits_inst.isctype(*position, m_word_mask)) | |
547 | return false; // next character is a word character | |
548 | } | |
549 | pstate = pstate->next.p; | |
550 | return true; // if we fall through to here then we've succeeded | |
551 | } | |
552 | ||
553 | template <class BidiIterator, class Allocator, class traits> | |
554 | bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start() | |
555 | { | |
556 | if((position != backstop) || (m_match_flags & match_not_bob)) | |
557 | return false; | |
558 | // OK match: | |
559 | pstate = pstate->next.p; | |
560 | return true; | |
561 | } | |
562 | ||
563 | template <class BidiIterator, class Allocator, class traits> | |
564 | bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end() | |
565 | { | |
566 | if((position != last) || (m_match_flags & match_not_eob)) | |
567 | return false; | |
568 | // OK match: | |
569 | pstate = pstate->next.p; | |
570 | return true; | |
571 | } | |
572 | ||
573 | template <class BidiIterator, class Allocator, class traits> | |
574 | bool perl_matcher<BidiIterator, Allocator, traits>::match_backref() | |
575 | { | |
576 | // | |
577 | // Compare with what we previously matched. | |
578 | // Note that this succeeds if the backref did not partisipate | |
579 | // in the match, this is in line with ECMAScript, but not Perl | |
580 | // or PCRE. | |
581 | // | |
582 | int index = static_cast<const re_brace*>(pstate)->index; | |
583 | if(index >= hash_value_mask) | |
584 | { | |
585 | named_subexpressions::range_type r = re.get_data().equal_range(index); | |
586 | BOOST_REGEX_ASSERT(r.first != r.second); | |
587 | do | |
588 | { | |
589 | index = r.first->index; | |
590 | ++r.first; | |
591 | }while((r.first != r.second) && ((*m_presult)[index].matched != true)); | |
592 | } | |
593 | ||
594 | if((m_match_flags & match_perl) && !(*m_presult)[index].matched) | |
595 | return false; | |
596 | ||
597 | BidiIterator i = (*m_presult)[index].first; | |
598 | BidiIterator j = (*m_presult)[index].second; | |
599 | while(i != j) | |
600 | { | |
601 | if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase))) | |
602 | return false; | |
603 | ++i; | |
604 | ++position; | |
605 | } | |
606 | pstate = pstate->next.p; | |
607 | return true; | |
608 | } | |
609 | ||
610 | template <class BidiIterator, class Allocator, class traits> | |
611 | bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set() | |
612 | { | |
613 | typedef typename traits::char_class_type char_class_type; | |
614 | // let the traits class do the work: | |
615 | if(position == last) | |
616 | return false; | |
617 | BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase); | |
618 | if(t != position) | |
619 | { | |
620 | pstate = pstate->next.p; | |
621 | position = t; | |
622 | return true; | |
623 | } | |
624 | return false; | |
625 | } | |
626 | ||
627 | template <class BidiIterator, class Allocator, class traits> | |
628 | bool perl_matcher<BidiIterator, Allocator, traits>::match_set() | |
629 | { | |
630 | if(position == last) | |
631 | return false; | |
632 | if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))]) | |
633 | { | |
634 | pstate = pstate->next.p; | |
635 | ++position; | |
636 | return true; | |
637 | } | |
638 | return false; | |
639 | } | |
640 | ||
641 | template <class BidiIterator, class Allocator, class traits> | |
642 | bool perl_matcher<BidiIterator, Allocator, traits>::match_jump() | |
643 | { | |
644 | pstate = static_cast<const re_jump*>(pstate)->alt.p; | |
645 | return true; | |
646 | } | |
647 | ||
648 | template <class BidiIterator, class Allocator, class traits> | |
649 | bool perl_matcher<BidiIterator, Allocator, traits>::match_combining() | |
650 | { | |
651 | if(position == last) | |
652 | return false; | |
653 | if(is_combining(traits_inst.translate(*position, icase))) | |
654 | return false; | |
655 | ++position; | |
656 | while((position != last) && is_combining(traits_inst.translate(*position, icase))) | |
657 | ++position; | |
658 | pstate = pstate->next.p; | |
659 | return true; | |
660 | } | |
661 | ||
662 | template <class BidiIterator, class Allocator, class traits> | |
663 | bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end() | |
664 | { | |
665 | if(m_match_flags & match_not_eob) | |
666 | return false; | |
667 | BidiIterator p(position); | |
668 | while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p; | |
669 | if(p != last) | |
670 | return false; | |
671 | pstate = pstate->next.p; | |
672 | return true; | |
673 | } | |
674 | ||
675 | template <class BidiIterator, class Allocator, class traits> | |
676 | bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue() | |
677 | { | |
678 | if(position == search_base) | |
679 | { | |
680 | pstate = pstate->next.p; | |
681 | return true; | |
682 | } | |
683 | return false; | |
684 | } | |
685 | ||
686 | template <class BidiIterator, class Allocator, class traits> | |
687 | bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep() | |
688 | { | |
689 | #ifdef BOOST_REGEX_MSVC | |
690 | #pragma warning(push) | |
691 | #pragma warning(disable:4127) | |
692 | #endif | |
693 | if( ::boost::is_random_access_iterator<BidiIterator>::value) | |
694 | { | |
695 | std::ptrdiff_t maxlen = std::distance(backstop, position); | |
696 | if(maxlen < static_cast<const re_brace*>(pstate)->index) | |
697 | return false; | |
698 | std::advance(position, -static_cast<const re_brace*>(pstate)->index); | |
699 | } | |
700 | else | |
701 | { | |
702 | int c = static_cast<const re_brace*>(pstate)->index; | |
703 | while(c--) | |
704 | { | |
705 | if(position == backstop) | |
706 | return false; | |
707 | --position; | |
708 | } | |
709 | } | |
710 | pstate = pstate->next.p; | |
711 | return true; | |
712 | #ifdef BOOST_REGEX_MSVC | |
713 | #pragma warning(pop) | |
714 | #endif | |
715 | } | |
716 | ||
717 | template <class BidiIterator, class Allocator, class traits> | |
718 | inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref() | |
719 | { | |
720 | // return true if marked sub-expression N has been matched: | |
721 | int index = static_cast<const re_brace*>(pstate)->index; | |
722 | bool result = false; | |
723 | if(index == 9999) | |
724 | { | |
725 | // Magic value for a (DEFINE) block: | |
726 | return false; | |
727 | } | |
728 | else if(index > 0) | |
729 | { | |
730 | // Have we matched subexpression "index"? | |
731 | // Check if index is a hash value: | |
732 | if(index >= hash_value_mask) | |
733 | { | |
734 | named_subexpressions::range_type r = re.get_data().equal_range(index); | |
735 | while(r.first != r.second) | |
736 | { | |
737 | if((*m_presult)[r.first->index].matched) | |
738 | { | |
739 | result = true; | |
740 | break; | |
741 | } | |
742 | ++r.first; | |
743 | } | |
744 | } | |
745 | else | |
746 | { | |
747 | result = (*m_presult)[index].matched; | |
748 | } | |
749 | pstate = pstate->next.p; | |
750 | } | |
751 | else | |
752 | { | |
753 | // Have we recursed into subexpression "index"? | |
754 | // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. | |
755 | int idx = -(index+1); | |
756 | if(idx >= hash_value_mask) | |
757 | { | |
758 | named_subexpressions::range_type r = re.get_data().equal_range(idx); | |
759 | int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx; | |
760 | while(r.first != r.second) | |
761 | { | |
762 | result |= (stack_index == r.first->index); | |
763 | if(result)break; | |
764 | ++r.first; | |
765 | } | |
766 | } | |
767 | else | |
768 | { | |
769 | result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0)); | |
770 | } | |
771 | pstate = pstate->next.p; | |
772 | } | |
773 | return result; | |
774 | } | |
775 | ||
776 | template <class BidiIterator, class Allocator, class traits> | |
777 | bool perl_matcher<BidiIterator, Allocator, traits>::match_fail() | |
778 | { | |
779 | // Just force a backtrack: | |
780 | return false; | |
781 | } | |
782 | ||
783 | template <class BidiIterator, class Allocator, class traits> | |
784 | bool perl_matcher<BidiIterator, Allocator, traits>::match_accept() | |
785 | { | |
786 | if(!recursion_stack.empty()) | |
787 | { | |
788 | return skip_until_paren(recursion_stack.back().idx); | |
789 | } | |
790 | else | |
791 | { | |
792 | return skip_until_paren(INT_MAX); | |
793 | } | |
794 | } | |
795 | ||
796 | template <class BidiIterator, class Allocator, class traits> | |
797 | bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any() | |
798 | { | |
799 | #ifdef BOOST_REGEX_MSVC | |
800 | #pragma warning(push) | |
801 | #pragma warning(disable:4127) | |
802 | #endif | |
803 | const unsigned char* _map = re.get_map(); | |
804 | while(true) | |
805 | { | |
806 | // skip everything we can't match: | |
807 | while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) ) | |
808 | ++position; | |
809 | if(position == last) | |
810 | { | |
811 | // run out of characters, try a null match if possible: | |
812 | if(re.can_be_null()) | |
813 | return match_prefix(); | |
814 | break; | |
815 | } | |
816 | // now try and obtain a match: | |
817 | if(match_prefix()) | |
818 | return true; | |
819 | if(position == last) | |
820 | return false; | |
821 | ++position; | |
822 | } | |
823 | return false; | |
824 | #ifdef BOOST_REGEX_MSVC | |
825 | #pragma warning(pop) | |
826 | #endif | |
827 | } | |
828 | ||
829 | template <class BidiIterator, class Allocator, class traits> | |
830 | bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word() | |
831 | { | |
832 | #ifdef BOOST_REGEX_MSVC | |
833 | #pragma warning(push) | |
834 | #pragma warning(disable:4127) | |
835 | #endif | |
836 | // do search optimised for word starts: | |
837 | const unsigned char* _map = re.get_map(); | |
838 | if((m_match_flags & match_prev_avail) || (position != base)) | |
839 | --position; | |
840 | else if(match_prefix()) | |
841 | return true; | |
842 | do | |
843 | { | |
844 | while((position != last) && traits_inst.isctype(*position, m_word_mask)) | |
845 | ++position; | |
846 | while((position != last) && !traits_inst.isctype(*position, m_word_mask)) | |
847 | ++position; | |
848 | if(position == last) | |
849 | break; | |
850 | ||
851 | if(can_start(*position, _map, (unsigned char)mask_any) ) | |
852 | { | |
853 | if(match_prefix()) | |
854 | return true; | |
855 | } | |
856 | if(position == last) | |
857 | break; | |
858 | } while(true); | |
859 | return false; | |
860 | #ifdef BOOST_REGEX_MSVC | |
861 | #pragma warning(pop) | |
862 | #endif | |
863 | } | |
864 | ||
865 | template <class BidiIterator, class Allocator, class traits> | |
866 | bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line() | |
867 | { | |
868 | // do search optimised for line starts: | |
869 | const unsigned char* _map = re.get_map(); | |
870 | if(match_prefix()) | |
871 | return true; | |
872 | while(position != last) | |
873 | { | |
874 | while((position != last) && !is_separator(*position)) | |
875 | ++position; | |
876 | if(position == last) | |
877 | return false; | |
878 | ++position; | |
879 | if(position == last) | |
880 | { | |
881 | if(re.can_be_null() && match_prefix()) | |
882 | return true; | |
883 | return false; | |
884 | } | |
885 | ||
886 | if( can_start(*position, _map, (unsigned char)mask_any) ) | |
887 | { | |
888 | if(match_prefix()) | |
889 | return true; | |
890 | } | |
891 | if(position == last) | |
892 | return false; | |
893 | //++position; | |
894 | } | |
895 | return false; | |
896 | } | |
897 | ||
898 | template <class BidiIterator, class Allocator, class traits> | |
899 | bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf() | |
900 | { | |
901 | if((position == base) && ((m_match_flags & match_not_bob) == 0)) | |
902 | return match_prefix(); | |
903 | return false; | |
904 | } | |
905 | ||
906 | template <class BidiIterator, class Allocator, class traits> | |
907 | bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit() | |
908 | { | |
909 | return false; | |
910 | } | |
911 | ||
912 | } // namespace BOOST_REGEX_DETAIL_NS | |
913 | ||
914 | } // namespace boost | |
915 | ||
916 | #ifdef BOOST_REGEX_MSVC | |
917 | # pragma warning(pop) | |
918 | #endif | |
919 | ||
920 | #endif | |
921 |