]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/regex/v4/basic_regex_parser.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / regex / v4 / basic_regex_parser.hpp
1 /*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
17 */
18
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
21
22 #ifdef BOOST_MSVC
23 #pragma warning(push)
24 #pragma warning(disable: 4103)
25 #endif
26 #ifdef BOOST_HAS_ABI_HEADERS
27 # include BOOST_ABI_PREFIX
28 #endif
29 #ifdef BOOST_MSVC
30 #pragma warning(pop)
31 #endif
32
33 namespace boost{
34 namespace BOOST_REGEX_DETAIL_NS{
35
36 #ifdef BOOST_MSVC
37 #pragma warning(push)
38 #pragma warning(disable:4244 4800)
39 #endif
40
41 inline boost::intmax_t umax(mpl::false_ const&)
42 {
43 // Get out clause here, just in case numeric_limits is unspecialized:
44 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
45 }
46 inline boost::intmax_t umax(mpl::true_ const&)
47 {
48 return (std::numeric_limits<std::size_t>::max)();
49 }
50
51 inline boost::intmax_t umax()
52 {
53 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
54 }
55
56 template <class charT, class traits>
57 class basic_regex_parser : public basic_regex_creator<charT, traits>
58 {
59 public:
60 basic_regex_parser(regex_data<charT, traits>* data);
61 void parse(const charT* p1, const charT* p2, unsigned flags);
62 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
63 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
64 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
65 {
66 fail(error_code, position, message, position);
67 }
68
69 bool parse_all();
70 bool parse_basic();
71 bool parse_extended();
72 bool parse_literal();
73 bool parse_open_paren();
74 bool parse_basic_escape();
75 bool parse_extended_escape();
76 bool parse_match_any();
77 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
78 bool parse_repeat_range(bool isbasic);
79 bool parse_alt();
80 bool parse_set();
81 bool parse_backref();
82 void parse_set_literal(basic_char_set<charT, traits>& char_set);
83 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
84 bool parse_QE();
85 bool parse_perl_extension();
86 bool parse_perl_verb();
87 bool match_verb(const char*);
88 bool add_emacs_code(bool negate);
89 bool unwind_alts(std::ptrdiff_t last_paren_start);
90 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
91 charT unescape_character();
92 regex_constants::syntax_option_type parse_options();
93
94 private:
95 typedef bool (basic_regex_parser::*parser_proc_type)();
96 typedef typename traits::string_type string_type;
97 typedef typename traits::char_class_type char_class_type;
98 parser_proc_type m_parser_proc; // the main parser to use
99 const charT* m_base; // the start of the string being parsed
100 const charT* m_end; // the end of the string being parsed
101 const charT* m_position; // our current parser position
102 unsigned m_mark_count; // how many sub-expressions we have
103 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
104 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
105 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
106 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
107 bool m_has_case_change; // true if somewhere in the current block the case has changed
108 unsigned m_recursion_count; // How many times we've called parse_all.
109 #if defined(BOOST_MSVC) && defined(_M_IX86)
110 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
111 // that can not otherwise be suppressed)...
112 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
113 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
114 #else
115 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
116 #endif
117
118 basic_regex_parser& operator=(const basic_regex_parser&);
119 basic_regex_parser(const basic_regex_parser&);
120 };
121
122 template <class charT, class traits>
123 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
124 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
125 {
126 }
127
128 template <class charT, class traits>
129 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
130 {
131 // pass l_flags on to base class:
132 this->init(l_flags);
133 // set up pointers:
134 m_position = m_base = p1;
135 m_end = p2;
136 // empty strings are errors:
137 if((p1 == p2) &&
138 (
139 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
140 || (l_flags & regbase::no_empty_expressions)
141 )
142 )
143 {
144 fail(regex_constants::error_empty, 0);
145 return;
146 }
147 // select which parser to use:
148 switch(l_flags & regbase::main_option_type)
149 {
150 case regbase::perl_syntax_group:
151 {
152 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
153 //
154 // Add a leading paren with index zero to give recursions a target:
155 //
156 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
157 br->index = 0;
158 br->icase = this->flags() & regbase::icase;
159 break;
160 }
161 case regbase::basic_syntax_group:
162 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
163 break;
164 case regbase::literal:
165 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
166 break;
167 default:
168 // Ooops, someone has managed to set more than one of the main option flags,
169 // so this must be an error:
170 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
171 return;
172 }
173
174 // parse all our characters:
175 bool result = parse_all();
176 //
177 // Unwind our alternatives:
178 //
179 unwind_alts(-1);
180 // reset l_flags as a global scope (?imsx) may have altered them:
181 this->flags(l_flags);
182 // if we haven't gobbled up all the characters then we must
183 // have had an unexpected ')' :
184 if(!result)
185 {
186 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
187 return;
188 }
189 // if an error has been set then give up now:
190 if(this->m_pdata->m_status)
191 return;
192 // fill in our sub-expression count:
193 this->m_pdata->m_mark_count = 1 + m_mark_count;
194 this->finalize(p1, p2);
195 }
196
197 template <class charT, class traits>
198 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
199 {
200 // get the error message:
201 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
202 fail(error_code, position, message);
203 }
204
205 template <class charT, class traits>
206 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
207 {
208 if(0 == this->m_pdata->m_status) // update the error code if not already set
209 this->m_pdata->m_status = error_code;
210 m_position = m_end; // don't bother parsing anything else
211
212 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
213 //
214 // Augment error message with the regular expression text:
215 //
216 if(start_pos == position)
217 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
218 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
219 if(error_code != regex_constants::error_empty)
220 {
221 if((start_pos != 0) || (end_pos != (m_end - m_base)))
222 message += " The error occurred while parsing the regular expression fragment: '";
223 else
224 message += " The error occurred while parsing the regular expression: '";
225 if(start_pos != end_pos)
226 {
227 message += std::string(m_base + start_pos, m_base + position);
228 message += ">>>HERE>>>";
229 message += std::string(m_base + position, m_base + end_pos);
230 }
231 message += "'.";
232 }
233 #endif
234
235 #ifndef BOOST_NO_EXCEPTIONS
236 if(0 == (this->flags() & regex_constants::no_except))
237 {
238 boost::regex_error e(message, error_code, position);
239 e.raise();
240 }
241 #else
242 (void)position; // suppress warnings.
243 #endif
244 }
245
246 template <class charT, class traits>
247 bool basic_regex_parser<charT, traits>::parse_all()
248 {
249 if (++m_recursion_count > 400)
250 {
251 // exceeded internal limits
252 fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
253 }
254 bool result = true;
255 while(result && (m_position != m_end))
256 {
257 result = (this->*m_parser_proc)();
258 }
259 --m_recursion_count;
260 return result;
261 }
262
263 #ifdef BOOST_MSVC
264 #pragma warning(push)
265 #pragma warning(disable:4702)
266 #endif
267 template <class charT, class traits>
268 bool basic_regex_parser<charT, traits>::parse_basic()
269 {
270 switch(this->m_traits.syntax_type(*m_position))
271 {
272 case regex_constants::syntax_escape:
273 return parse_basic_escape();
274 case regex_constants::syntax_dot:
275 return parse_match_any();
276 case regex_constants::syntax_caret:
277 ++m_position;
278 this->append_state(syntax_element_start_line);
279 break;
280 case regex_constants::syntax_dollar:
281 ++m_position;
282 this->append_state(syntax_element_end_line);
283 break;
284 case regex_constants::syntax_star:
285 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
286 return parse_literal();
287 else
288 {
289 ++m_position;
290 return parse_repeat();
291 }
292 case regex_constants::syntax_plus:
293 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
294 return parse_literal();
295 else
296 {
297 ++m_position;
298 return parse_repeat(1);
299 }
300 case regex_constants::syntax_question:
301 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
302 return parse_literal();
303 else
304 {
305 ++m_position;
306 return parse_repeat(0, 1);
307 }
308 case regex_constants::syntax_open_set:
309 return parse_set();
310 case regex_constants::syntax_newline:
311 if(this->flags() & regbase::newline_alt)
312 return parse_alt();
313 else
314 return parse_literal();
315 default:
316 return parse_literal();
317 }
318 return true;
319 }
320
321 template <class charT, class traits>
322 bool basic_regex_parser<charT, traits>::parse_extended()
323 {
324 bool result = true;
325 switch(this->m_traits.syntax_type(*m_position))
326 {
327 case regex_constants::syntax_open_mark:
328 return parse_open_paren();
329 case regex_constants::syntax_close_mark:
330 return false;
331 case regex_constants::syntax_escape:
332 return parse_extended_escape();
333 case regex_constants::syntax_dot:
334 return parse_match_any();
335 case regex_constants::syntax_caret:
336 ++m_position;
337 this->append_state(
338 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
339 break;
340 case regex_constants::syntax_dollar:
341 ++m_position;
342 this->append_state(
343 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
344 break;
345 case regex_constants::syntax_star:
346 if(m_position == this->m_base)
347 {
348 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
349 return false;
350 }
351 ++m_position;
352 return parse_repeat();
353 case regex_constants::syntax_question:
354 if(m_position == this->m_base)
355 {
356 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
357 return false;
358 }
359 ++m_position;
360 return parse_repeat(0,1);
361 case regex_constants::syntax_plus:
362 if(m_position == this->m_base)
363 {
364 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
365 return false;
366 }
367 ++m_position;
368 return parse_repeat(1);
369 case regex_constants::syntax_open_brace:
370 ++m_position;
371 return parse_repeat_range(false);
372 case regex_constants::syntax_close_brace:
373 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
374 {
375 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
376 return false;
377 }
378 result = parse_literal();
379 break;
380 case regex_constants::syntax_or:
381 return parse_alt();
382 case regex_constants::syntax_open_set:
383 return parse_set();
384 case regex_constants::syntax_newline:
385 if(this->flags() & regbase::newline_alt)
386 return parse_alt();
387 else
388 return parse_literal();
389 case regex_constants::syntax_hash:
390 //
391 // If we have a mod_x flag set, then skip until
392 // we get to a newline character:
393 //
394 if((this->flags()
395 & (regbase::no_perl_ex|regbase::mod_x))
396 == regbase::mod_x)
397 {
398 while((m_position != m_end) && !is_separator(*m_position++)){}
399 return true;
400 }
401 BOOST_FALLTHROUGH;
402 default:
403 result = parse_literal();
404 break;
405 }
406 return result;
407 }
408 #ifdef BOOST_MSVC
409 #pragma warning(pop)
410 #endif
411
412 template <class charT, class traits>
413 bool basic_regex_parser<charT, traits>::parse_literal()
414 {
415 // append this as a literal provided it's not a space character
416 // or the perl option regbase::mod_x is not set:
417 if(
418 ((this->flags()
419 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
420 != regbase::mod_x)
421 || !this->m_traits.isctype(*m_position, this->m_mask_space))
422 this->append_literal(*m_position);
423 ++m_position;
424 return true;
425 }
426
427 template <class charT, class traits>
428 bool basic_regex_parser<charT, traits>::parse_open_paren()
429 {
430 //
431 // skip the '(' and error check:
432 //
433 if(++m_position == m_end)
434 {
435 fail(regex_constants::error_paren, m_position - m_base);
436 return false;
437 }
438 //
439 // begin by checking for a perl-style (?...) extension:
440 //
441 if(
442 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
443 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
444 )
445 {
446 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
447 return parse_perl_extension();
448 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
449 return parse_perl_verb();
450 }
451 //
452 // update our mark count, and append the required state:
453 //
454 unsigned markid = 0;
455 if(0 == (this->flags() & regbase::nosubs))
456 {
457 markid = ++m_mark_count;
458 #ifndef BOOST_NO_STD_DISTANCE
459 if(this->flags() & regbase::save_subexpression_location)
460 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
461 #else
462 if(this->flags() & regbase::save_subexpression_location)
463 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
464 #endif
465 }
466 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
467 pb->index = markid;
468 pb->icase = this->flags() & regbase::icase;
469 std::ptrdiff_t last_paren_start = this->getoffset(pb);
470 // back up insertion point for alternations, and set new point:
471 std::ptrdiff_t last_alt_point = m_alt_insert_point;
472 this->m_pdata->m_data.align();
473 m_alt_insert_point = this->m_pdata->m_data.size();
474 //
475 // back up the current flags in case we have a nested (?imsx) group:
476 //
477 regex_constants::syntax_option_type opts = this->flags();
478 bool old_case_change = m_has_case_change;
479 m_has_case_change = false; // no changes to this scope as yet...
480 //
481 // Back up branch reset data in case we have a nested (?|...)
482 //
483 int mark_reset = m_mark_reset;
484 m_mark_reset = -1;
485 //
486 // now recursively add more states, this will terminate when we get to a
487 // matching ')' :
488 //
489 parse_all();
490 //
491 // Unwind pushed alternatives:
492 //
493 if(0 == unwind_alts(last_paren_start))
494 return false;
495 //
496 // restore flags:
497 //
498 if(m_has_case_change)
499 {
500 // the case has changed in one or more of the alternatives
501 // within the scoped (...) block: we have to add a state
502 // to reset the case sensitivity:
503 static_cast<re_case*>(
504 this->append_state(syntax_element_toggle_case, sizeof(re_case))
505 )->icase = opts & regbase::icase;
506 }
507 this->flags(opts);
508 m_has_case_change = old_case_change;
509 //
510 // restore branch reset:
511 //
512 m_mark_reset = mark_reset;
513 //
514 // we either have a ')' or we have run out of characters prematurely:
515 //
516 if(m_position == m_end)
517 {
518 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
519 return false;
520 }
521 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
522 return false;
523 #ifndef BOOST_NO_STD_DISTANCE
524 if(markid && (this->flags() & regbase::save_subexpression_location))
525 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
526 #else
527 if(markid && (this->flags() & regbase::save_subexpression_location))
528 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
529 #endif
530 ++m_position;
531 //
532 // append closing parenthesis state:
533 //
534 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
535 pb->index = markid;
536 pb->icase = this->flags() & regbase::icase;
537 this->m_paren_start = last_paren_start;
538 //
539 // restore the alternate insertion point:
540 //
541 this->m_alt_insert_point = last_alt_point;
542 //
543 // allow backrefs to this mark:
544 //
545 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
546 this->m_backrefs |= 1u << (markid - 1);
547
548 return true;
549 }
550
551 template <class charT, class traits>
552 bool basic_regex_parser<charT, traits>::parse_basic_escape()
553 {
554 if(++m_position == m_end)
555 {
556 fail(regex_constants::error_paren, m_position - m_base);
557 return false;
558 }
559 bool result = true;
560 switch(this->m_traits.escape_syntax_type(*m_position))
561 {
562 case regex_constants::syntax_open_mark:
563 return parse_open_paren();
564 case regex_constants::syntax_close_mark:
565 return false;
566 case regex_constants::syntax_plus:
567 if(this->flags() & regex_constants::bk_plus_qm)
568 {
569 ++m_position;
570 return parse_repeat(1);
571 }
572 else
573 return parse_literal();
574 case regex_constants::syntax_question:
575 if(this->flags() & regex_constants::bk_plus_qm)
576 {
577 ++m_position;
578 return parse_repeat(0, 1);
579 }
580 else
581 return parse_literal();
582 case regex_constants::syntax_open_brace:
583 if(this->flags() & regbase::no_intervals)
584 return parse_literal();
585 ++m_position;
586 return parse_repeat_range(true);
587 case regex_constants::syntax_close_brace:
588 if(this->flags() & regbase::no_intervals)
589 return parse_literal();
590 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
591 return false;
592 case regex_constants::syntax_or:
593 if(this->flags() & regbase::bk_vbar)
594 return parse_alt();
595 else
596 result = parse_literal();
597 break;
598 case regex_constants::syntax_digit:
599 return parse_backref();
600 case regex_constants::escape_type_start_buffer:
601 if(this->flags() & regbase::emacs_ex)
602 {
603 ++m_position;
604 this->append_state(syntax_element_buffer_start);
605 }
606 else
607 result = parse_literal();
608 break;
609 case regex_constants::escape_type_end_buffer:
610 if(this->flags() & regbase::emacs_ex)
611 {
612 ++m_position;
613 this->append_state(syntax_element_buffer_end);
614 }
615 else
616 result = parse_literal();
617 break;
618 case regex_constants::escape_type_word_assert:
619 if(this->flags() & regbase::emacs_ex)
620 {
621 ++m_position;
622 this->append_state(syntax_element_word_boundary);
623 }
624 else
625 result = parse_literal();
626 break;
627 case regex_constants::escape_type_not_word_assert:
628 if(this->flags() & regbase::emacs_ex)
629 {
630 ++m_position;
631 this->append_state(syntax_element_within_word);
632 }
633 else
634 result = parse_literal();
635 break;
636 case regex_constants::escape_type_left_word:
637 if(this->flags() & regbase::emacs_ex)
638 {
639 ++m_position;
640 this->append_state(syntax_element_word_start);
641 }
642 else
643 result = parse_literal();
644 break;
645 case regex_constants::escape_type_right_word:
646 if(this->flags() & regbase::emacs_ex)
647 {
648 ++m_position;
649 this->append_state(syntax_element_word_end);
650 }
651 else
652 result = parse_literal();
653 break;
654 default:
655 if(this->flags() & regbase::emacs_ex)
656 {
657 bool negate = true;
658 switch(*m_position)
659 {
660 case 'w':
661 negate = false;
662 BOOST_FALLTHROUGH;
663 case 'W':
664 {
665 basic_char_set<charT, traits> char_set;
666 if(negate)
667 char_set.negate();
668 char_set.add_class(this->m_word_mask);
669 if(0 == this->append_set(char_set))
670 {
671 fail(regex_constants::error_ctype, m_position - m_base);
672 return false;
673 }
674 ++m_position;
675 return true;
676 }
677 case 's':
678 negate = false;
679 BOOST_FALLTHROUGH;
680 case 'S':
681 return add_emacs_code(negate);
682 case 'c':
683 case 'C':
684 // not supported yet:
685 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
686 return false;
687 default:
688 break;
689 }
690 }
691 result = parse_literal();
692 break;
693 }
694 return result;
695 }
696
697 template <class charT, class traits>
698 bool basic_regex_parser<charT, traits>::parse_extended_escape()
699 {
700 ++m_position;
701 if(m_position == m_end)
702 {
703 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
704 return false;
705 }
706 bool negate = false; // in case this is a character class escape: \w \d etc
707 switch(this->m_traits.escape_syntax_type(*m_position))
708 {
709 case regex_constants::escape_type_not_class:
710 negate = true;
711 BOOST_FALLTHROUGH;
712 case regex_constants::escape_type_class:
713 {
714 escape_type_class_jump:
715 typedef typename traits::char_class_type m_type;
716 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
717 if(m != 0)
718 {
719 basic_char_set<charT, traits> char_set;
720 if(negate)
721 char_set.negate();
722 char_set.add_class(m);
723 if(0 == this->append_set(char_set))
724 {
725 fail(regex_constants::error_ctype, m_position - m_base);
726 return false;
727 }
728 ++m_position;
729 return true;
730 }
731 //
732 // not a class, just a regular unknown escape:
733 //
734 this->append_literal(unescape_character());
735 break;
736 }
737 case regex_constants::syntax_digit:
738 return parse_backref();
739 case regex_constants::escape_type_left_word:
740 ++m_position;
741 this->append_state(syntax_element_word_start);
742 break;
743 case regex_constants::escape_type_right_word:
744 ++m_position;
745 this->append_state(syntax_element_word_end);
746 break;
747 case regex_constants::escape_type_start_buffer:
748 ++m_position;
749 this->append_state(syntax_element_buffer_start);
750 break;
751 case regex_constants::escape_type_end_buffer:
752 ++m_position;
753 this->append_state(syntax_element_buffer_end);
754 break;
755 case regex_constants::escape_type_word_assert:
756 ++m_position;
757 this->append_state(syntax_element_word_boundary);
758 break;
759 case regex_constants::escape_type_not_word_assert:
760 ++m_position;
761 this->append_state(syntax_element_within_word);
762 break;
763 case regex_constants::escape_type_Z:
764 ++m_position;
765 this->append_state(syntax_element_soft_buffer_end);
766 break;
767 case regex_constants::escape_type_Q:
768 return parse_QE();
769 case regex_constants::escape_type_C:
770 return parse_match_any();
771 case regex_constants::escape_type_X:
772 ++m_position;
773 this->append_state(syntax_element_combining);
774 break;
775 case regex_constants::escape_type_G:
776 ++m_position;
777 this->append_state(syntax_element_restart_continue);
778 break;
779 case regex_constants::escape_type_not_property:
780 negate = true;
781 BOOST_FALLTHROUGH;
782 case regex_constants::escape_type_property:
783 {
784 ++m_position;
785 char_class_type m;
786 if(m_position == m_end)
787 {
788 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
789 return false;
790 }
791 // maybe have \p{ddd}
792 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
793 {
794 const charT* base = m_position;
795 // skip forward until we find enclosing brace:
796 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
797 ++m_position;
798 if(m_position == m_end)
799 {
800 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
801 return false;
802 }
803 m = this->m_traits.lookup_classname(++base, m_position++);
804 }
805 else
806 {
807 m = this->m_traits.lookup_classname(m_position, m_position+1);
808 ++m_position;
809 }
810 if(m != 0)
811 {
812 basic_char_set<charT, traits> char_set;
813 if(negate)
814 char_set.negate();
815 char_set.add_class(m);
816 if(0 == this->append_set(char_set))
817 {
818 fail(regex_constants::error_ctype, m_position - m_base);
819 return false;
820 }
821 return true;
822 }
823 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
824 return false;
825 }
826 case regex_constants::escape_type_reset_start_mark:
827 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
828 {
829 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
830 pb->index = -5;
831 pb->icase = this->flags() & regbase::icase;
832 this->m_pdata->m_data.align();
833 ++m_position;
834 return true;
835 }
836 goto escape_type_class_jump;
837 case regex_constants::escape_type_line_ending:
838 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
839 {
840 const charT* e = get_escape_R_string<charT>();
841 const charT* old_position = m_position;
842 const charT* old_end = m_end;
843 const charT* old_base = m_base;
844 m_position = e;
845 m_base = e;
846 m_end = e + traits::length(e);
847 bool r = parse_all();
848 m_position = ++old_position;
849 m_end = old_end;
850 m_base = old_base;
851 return r;
852 }
853 goto escape_type_class_jump;
854 case regex_constants::escape_type_extended_backref:
855 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
856 {
857 bool have_brace = false;
858 bool negative = false;
859 static const char* incomplete_message = "Incomplete \\g escape found.";
860 if(++m_position == m_end)
861 {
862 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
863 return false;
864 }
865 // maybe have \g{ddd}
866 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
867 regex_constants::syntax_type syn_end = 0;
868 if((syn == regex_constants::syntax_open_brace)
869 || (syn == regex_constants::escape_type_left_word)
870 || (syn == regex_constants::escape_type_end_buffer))
871 {
872 if(++m_position == m_end)
873 {
874 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
875 return false;
876 }
877 have_brace = true;
878 switch(syn)
879 {
880 case regex_constants::syntax_open_brace:
881 syn_end = regex_constants::syntax_close_brace;
882 break;
883 case regex_constants::escape_type_left_word:
884 syn_end = regex_constants::escape_type_right_word;
885 break;
886 default:
887 syn_end = regex_constants::escape_type_end_buffer;
888 break;
889 }
890 }
891 negative = (*m_position == static_cast<charT>('-'));
892 if((negative) && (++m_position == m_end))
893 {
894 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
895 return false;
896 }
897 const charT* pc = m_position;
898 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
899 if((i < 0) && syn_end)
900 {
901 // Check for a named capture, get the leftmost one if there is more than one:
902 const charT* base = m_position;
903 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
904 {
905 ++m_position;
906 }
907 i = hash_value_from_capture_name(base, m_position);
908 pc = m_position;
909 }
910 if(negative)
911 i = 1 + m_mark_count - i;
912 if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
913 {
914 m_position = pc;
915 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
916 pb->index = i;
917 pb->icase = this->flags() & regbase::icase;
918 }
919 else
920 {
921 fail(regex_constants::error_backref, m_position - m_base);
922 return false;
923 }
924 m_position = pc;
925 if(have_brace)
926 {
927 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
928 {
929 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
930 return false;
931 }
932 ++m_position;
933 }
934 return true;
935 }
936 goto escape_type_class_jump;
937 case regex_constants::escape_type_control_v:
938 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
939 goto escape_type_class_jump;
940 BOOST_FALLTHROUGH;
941 default:
942 this->append_literal(unescape_character());
943 break;
944 }
945 return true;
946 }
947
948 template <class charT, class traits>
949 bool basic_regex_parser<charT, traits>::parse_match_any()
950 {
951 //
952 // we have a '.' that can match any character:
953 //
954 ++m_position;
955 static_cast<re_dot*>(
956 this->append_state(syntax_element_wild, sizeof(re_dot))
957 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
958 ? BOOST_REGEX_DETAIL_NS::force_not_newline
959 : this->flags() & regbase::mod_s ?
960 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
961 return true;
962 }
963
964 template <class charT, class traits>
965 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
966 {
967 bool greedy = true;
968 bool pocessive = false;
969 std::size_t insert_point;
970 //
971 // when we get to here we may have a non-greedy ? mark still to come:
972 //
973 if((m_position != m_end)
974 && (
975 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
976 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
977 )
978 )
979 {
980 // OK we have a perl or emacs regex, check for a '?':
981 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
982 {
983 // whitespace skip:
984 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
985 ++m_position;
986 }
987 if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
988 {
989 greedy = false;
990 ++m_position;
991 }
992 // for perl regexes only check for pocessive ++ repeats.
993 if((m_position != m_end)
994 && (0 == (this->flags() & regbase::main_option_type))
995 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
996 {
997 pocessive = true;
998 ++m_position;
999 }
1000 }
1001 if(0 == this->m_last_state)
1002 {
1003 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
1004 return false;
1005 }
1006 if(this->m_last_state->type == syntax_element_endmark)
1007 {
1008 // insert a repeat before the '(' matching the last ')':
1009 insert_point = this->m_paren_start;
1010 }
1011 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
1012 {
1013 // the last state was a literal with more than one character, split it in two:
1014 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1015 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1016 lit->length -= 1;
1017 // now append new state:
1018 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1019 lit->length = 1;
1020 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1021 insert_point = this->getoffset(this->m_last_state);
1022 }
1023 else
1024 {
1025 // repeat the last state whatever it was, need to add some error checking here:
1026 switch(this->m_last_state->type)
1027 {
1028 case syntax_element_start_line:
1029 case syntax_element_end_line:
1030 case syntax_element_word_boundary:
1031 case syntax_element_within_word:
1032 case syntax_element_word_start:
1033 case syntax_element_word_end:
1034 case syntax_element_buffer_start:
1035 case syntax_element_buffer_end:
1036 case syntax_element_alt:
1037 case syntax_element_soft_buffer_end:
1038 case syntax_element_restart_continue:
1039 case syntax_element_jump:
1040 case syntax_element_startmark:
1041 case syntax_element_backstep:
1042 // can't legally repeat any of the above:
1043 fail(regex_constants::error_badrepeat, m_position - m_base);
1044 return false;
1045 default:
1046 // do nothing...
1047 break;
1048 }
1049 insert_point = this->getoffset(this->m_last_state);
1050 }
1051 //
1052 // OK we now know what to repeat, so insert the repeat around it:
1053 //
1054 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1055 rep->min = low;
1056 rep->max = high;
1057 rep->greedy = greedy;
1058 rep->leading = false;
1059 // store our repeater position for later:
1060 std::ptrdiff_t rep_off = this->getoffset(rep);
1061 // and append a back jump to the repeat:
1062 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1063 jmp->alt.i = rep_off - this->getoffset(jmp);
1064 this->m_pdata->m_data.align();
1065 // now fill in the alt jump for the repeat:
1066 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1067 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1068 //
1069 // If the repeat is pocessive then bracket the repeat with a (?>...)
1070 // independent sub-expression construct:
1071 //
1072 if(pocessive)
1073 {
1074 if(m_position != m_end)
1075 {
1076 //
1077 // Check for illegal following quantifier, we have to do this here, because
1078 // the extra states we insert below circumvents our usual error checking :-(
1079 //
1080 bool contin = false;
1081 do
1082 {
1083 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
1084 {
1085 // whitespace skip:
1086 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1087 ++m_position;
1088 }
1089 if (m_position != m_end)
1090 {
1091 switch (this->m_traits.syntax_type(*m_position))
1092 {
1093 case regex_constants::syntax_star:
1094 case regex_constants::syntax_plus:
1095 case regex_constants::syntax_question:
1096 case regex_constants::syntax_open_brace:
1097 fail(regex_constants::error_badrepeat, m_position - m_base);
1098 return false;
1099 case regex_constants::syntax_open_mark:
1100 // Do we have a comment? If so we need to skip it here...
1101 if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
1102 && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
1103 {
1104 while ((m_position != m_end)
1105 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
1106 }
1107 contin = true;
1108 }
1109 else
1110 contin = false;
1111 }
1112 }
1113 else
1114 contin = false;
1115 } while (contin);
1116 }
1117 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1118 pb->index = -3;
1119 pb->icase = this->flags() & regbase::icase;
1120 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1121 this->m_pdata->m_data.align();
1122 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1123 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1124 pb->index = -3;
1125 pb->icase = this->flags() & regbase::icase;
1126 }
1127 return true;
1128 }
1129
1130 template <class charT, class traits>
1131 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1132 {
1133 static const char* incomplete_message = "Missing } in quantified repetition.";
1134 //
1135 // parse a repeat-range:
1136 //
1137 std::size_t min, max;
1138 boost::intmax_t v;
1139 // skip whitespace:
1140 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1141 ++m_position;
1142 if(this->m_position == this->m_end)
1143 {
1144 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1145 {
1146 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1147 return false;
1148 }
1149 // Treat the opening '{' as a literal character, rewind to start of error:
1150 --m_position;
1151 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1152 return parse_literal();
1153 }
1154 // get min:
1155 v = this->m_traits.toi(m_position, m_end, 10);
1156 // skip whitespace:
1157 if((v < 0) || (v > umax()))
1158 {
1159 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1160 {
1161 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1162 return false;
1163 }
1164 // Treat the opening '{' as a literal character, rewind to start of error:
1165 --m_position;
1166 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1167 return parse_literal();
1168 }
1169 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1170 ++m_position;
1171 if(this->m_position == this->m_end)
1172 {
1173 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1174 {
1175 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1176 return false;
1177 }
1178 // Treat the opening '{' as a literal character, rewind to start of error:
1179 --m_position;
1180 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1181 return parse_literal();
1182 }
1183 min = static_cast<std::size_t>(v);
1184 // see if we have a comma:
1185 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1186 {
1187 // move on and error check:
1188 ++m_position;
1189 // skip whitespace:
1190 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1191 ++m_position;
1192 if(this->m_position == this->m_end)
1193 {
1194 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1195 {
1196 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1197 return false;
1198 }
1199 // Treat the opening '{' as a literal character, rewind to start of error:
1200 --m_position;
1201 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1202 return parse_literal();
1203 }
1204 // get the value if any:
1205 v = this->m_traits.toi(m_position, m_end, 10);
1206 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1207 }
1208 else
1209 {
1210 // no comma, max = min:
1211 max = min;
1212 }
1213 // skip whitespace:
1214 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1215 ++m_position;
1216 // OK now check trailing }:
1217 if(this->m_position == this->m_end)
1218 {
1219 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1220 {
1221 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1222 return false;
1223 }
1224 // Treat the opening '{' as a literal character, rewind to start of error:
1225 --m_position;
1226 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1227 return parse_literal();
1228 }
1229 if(isbasic)
1230 {
1231 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1232 {
1233 ++m_position;
1234 if(this->m_position == this->m_end)
1235 {
1236 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1237 return false;
1238 }
1239 }
1240 else
1241 {
1242 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1243 return false;
1244 }
1245 }
1246 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1247 ++m_position;
1248 else
1249 {
1250 // Treat the opening '{' as a literal character, rewind to start of error:
1251 --m_position;
1252 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1253 return parse_literal();
1254 }
1255 //
1256 // finally go and add the repeat, unless error:
1257 //
1258 if(min > max)
1259 {
1260 // Backtrack to error location:
1261 m_position -= 2;
1262 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1263 ++m_position;
1264 fail(regex_constants::error_badbrace, m_position - m_base);
1265 return false;
1266 }
1267 return parse_repeat(min, max);
1268 }
1269
1270 template <class charT, class traits>
1271 bool basic_regex_parser<charT, traits>::parse_alt()
1272 {
1273 //
1274 // error check: if there have been no previous states,
1275 // or if the last state was a '(' then error:
1276 //
1277 if(
1278 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1279 &&
1280 !(
1281 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1282 &&
1283 ((this->flags() & regbase::no_empty_expressions) == 0)
1284 )
1285 )
1286 {
1287 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1288 return false;
1289 }
1290 //
1291 // Reset mark count if required:
1292 //
1293 if(m_max_mark < m_mark_count)
1294 m_max_mark = m_mark_count;
1295 if(m_mark_reset >= 0)
1296 m_mark_count = m_mark_reset;
1297
1298 ++m_position;
1299 //
1300 // we need to append a trailing jump:
1301 //
1302 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1303 std::ptrdiff_t jump_offset = this->getoffset(pj);
1304 //
1305 // now insert the alternative:
1306 //
1307 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1308 jump_offset += re_alt_size;
1309 this->m_pdata->m_data.align();
1310 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1311 //
1312 // update m_alt_insert_point so that the next alternate gets
1313 // inserted at the start of the second of the two we've just created:
1314 //
1315 this->m_alt_insert_point = this->m_pdata->m_data.size();
1316 //
1317 // the start of this alternative must have a case changes state
1318 // if the current block has messed around with case changes:
1319 //
1320 if(m_has_case_change)
1321 {
1322 static_cast<re_case*>(
1323 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1324 )->icase = this->m_icase;
1325 }
1326 //
1327 // push the alternative onto our stack, a recursive
1328 // implementation here is easier to understand (and faster
1329 // as it happens), but causes all kinds of stack overflow problems
1330 // on programs with small stacks (COM+).
1331 //
1332 m_alt_jumps.push_back(jump_offset);
1333 return true;
1334 }
1335
1336 template <class charT, class traits>
1337 bool basic_regex_parser<charT, traits>::parse_set()
1338 {
1339 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1340 ++m_position;
1341 if(m_position == m_end)
1342 {
1343 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1344 return false;
1345 }
1346 basic_char_set<charT, traits> char_set;
1347
1348 const charT* base = m_position; // where the '[' was
1349 const charT* item_base = m_position; // where the '[' or '^' was
1350
1351 while(m_position != m_end)
1352 {
1353 switch(this->m_traits.syntax_type(*m_position))
1354 {
1355 case regex_constants::syntax_caret:
1356 if(m_position == base)
1357 {
1358 char_set.negate();
1359 ++m_position;
1360 item_base = m_position;
1361 }
1362 else
1363 parse_set_literal(char_set);
1364 break;
1365 case regex_constants::syntax_close_set:
1366 if(m_position == item_base)
1367 {
1368 parse_set_literal(char_set);
1369 break;
1370 }
1371 else
1372 {
1373 ++m_position;
1374 if(0 == this->append_set(char_set))
1375 {
1376 fail(regex_constants::error_ctype, m_position - m_base);
1377 return false;
1378 }
1379 }
1380 return true;
1381 case regex_constants::syntax_open_set:
1382 if(parse_inner_set(char_set))
1383 break;
1384 return true;
1385 case regex_constants::syntax_escape:
1386 {
1387 //
1388 // look ahead and see if this is a character class shortcut
1389 // \d \w \s etc...
1390 //
1391 ++m_position;
1392 if(this->m_traits.escape_syntax_type(*m_position)
1393 == regex_constants::escape_type_class)
1394 {
1395 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1396 if(m != 0)
1397 {
1398 char_set.add_class(m);
1399 ++m_position;
1400 break;
1401 }
1402 }
1403 else if(this->m_traits.escape_syntax_type(*m_position)
1404 == regex_constants::escape_type_not_class)
1405 {
1406 // negated character class:
1407 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1408 if(m != 0)
1409 {
1410 char_set.add_negated_class(m);
1411 ++m_position;
1412 break;
1413 }
1414 }
1415 // not a character class, just a regular escape:
1416 --m_position;
1417 parse_set_literal(char_set);
1418 break;
1419 }
1420 default:
1421 parse_set_literal(char_set);
1422 break;
1423 }
1424 }
1425 return m_position != m_end;
1426 }
1427
1428 template <class charT, class traits>
1429 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1430 {
1431 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1432 //
1433 // we have either a character class [:name:]
1434 // a collating element [.name.]
1435 // or an equivalence class [=name=]
1436 //
1437 if(m_end == ++m_position)
1438 {
1439 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1440 return false;
1441 }
1442 switch(this->m_traits.syntax_type(*m_position))
1443 {
1444 case regex_constants::syntax_dot:
1445 //
1446 // a collating element is treated as a literal:
1447 //
1448 --m_position;
1449 parse_set_literal(char_set);
1450 return true;
1451 case regex_constants::syntax_colon:
1452 {
1453 // check that character classes are actually enabled:
1454 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1455 == (regbase::basic_syntax_group | regbase::no_char_classes))
1456 {
1457 --m_position;
1458 parse_set_literal(char_set);
1459 return true;
1460 }
1461 // skip the ':'
1462 if(m_end == ++m_position)
1463 {
1464 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1465 return false;
1466 }
1467 const charT* name_first = m_position;
1468 // skip at least one character, then find the matching ':]'
1469 if(m_end == ++m_position)
1470 {
1471 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1472 return false;
1473 }
1474 while((m_position != m_end)
1475 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1476 ++m_position;
1477 const charT* name_last = m_position;
1478 if(m_end == m_position)
1479 {
1480 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1481 return false;
1482 }
1483 if((m_end == ++m_position)
1484 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1485 {
1486 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1487 return false;
1488 }
1489 //
1490 // check for negated class:
1491 //
1492 bool negated = false;
1493 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1494 {
1495 ++name_first;
1496 negated = true;
1497 }
1498 typedef typename traits::char_class_type m_type;
1499 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1500 if(m == 0)
1501 {
1502 if(char_set.empty() && (name_last - name_first == 1))
1503 {
1504 // maybe a special case:
1505 ++m_position;
1506 if( (m_position != m_end)
1507 && (this->m_traits.syntax_type(*m_position)
1508 == regex_constants::syntax_close_set))
1509 {
1510 if(this->m_traits.escape_syntax_type(*name_first)
1511 == regex_constants::escape_type_left_word)
1512 {
1513 ++m_position;
1514 this->append_state(syntax_element_word_start);
1515 return false;
1516 }
1517 if(this->m_traits.escape_syntax_type(*name_first)
1518 == regex_constants::escape_type_right_word)
1519 {
1520 ++m_position;
1521 this->append_state(syntax_element_word_end);
1522 return false;
1523 }
1524 }
1525 }
1526 fail(regex_constants::error_ctype, name_first - m_base);
1527 return false;
1528 }
1529 if(negated == false)
1530 char_set.add_class(m);
1531 else
1532 char_set.add_negated_class(m);
1533 ++m_position;
1534 break;
1535 }
1536 case regex_constants::syntax_equal:
1537 {
1538 // skip the '='
1539 if(m_end == ++m_position)
1540 {
1541 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1542 return false;
1543 }
1544 const charT* name_first = m_position;
1545 // skip at least one character, then find the matching '=]'
1546 if(m_end == ++m_position)
1547 {
1548 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1549 return false;
1550 }
1551 while((m_position != m_end)
1552 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1553 ++m_position;
1554 const charT* name_last = m_position;
1555 if(m_end == m_position)
1556 {
1557 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1558 return false;
1559 }
1560 if((m_end == ++m_position)
1561 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1562 {
1563 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1564 return false;
1565 }
1566 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1567 if((0 == m.size()) || (m.size() > 2))
1568 {
1569 fail(regex_constants::error_collate, name_first - m_base);
1570 return false;
1571 }
1572 digraph<charT> d;
1573 d.first = m[0];
1574 if(m.size() > 1)
1575 d.second = m[1];
1576 else
1577 d.second = 0;
1578 char_set.add_equivalent(d);
1579 ++m_position;
1580 break;
1581 }
1582 default:
1583 --m_position;
1584 parse_set_literal(char_set);
1585 break;
1586 }
1587 return true;
1588 }
1589
1590 template <class charT, class traits>
1591 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1592 {
1593 digraph<charT> start_range(get_next_set_literal(char_set));
1594 if(m_end == m_position)
1595 {
1596 fail(regex_constants::error_brack, m_position - m_base);
1597 return;
1598 }
1599 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1600 {
1601 // we have a range:
1602 if(m_end == ++m_position)
1603 {
1604 fail(regex_constants::error_brack, m_position - m_base);
1605 return;
1606 }
1607 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1608 {
1609 digraph<charT> end_range = get_next_set_literal(char_set);
1610 char_set.add_range(start_range, end_range);
1611 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1612 {
1613 if(m_end == ++m_position)
1614 {
1615 fail(regex_constants::error_brack, m_position - m_base);
1616 return;
1617 }
1618 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1619 {
1620 // trailing - :
1621 --m_position;
1622 return;
1623 }
1624 fail(regex_constants::error_range, m_position - m_base);
1625 return;
1626 }
1627 return;
1628 }
1629 --m_position;
1630 }
1631 char_set.add_single(start_range);
1632 }
1633
1634 template <class charT, class traits>
1635 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1636 {
1637 digraph<charT> result;
1638 switch(this->m_traits.syntax_type(*m_position))
1639 {
1640 case regex_constants::syntax_dash:
1641 if(!char_set.empty())
1642 {
1643 // see if we are at the end of the set:
1644 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1645 {
1646 fail(regex_constants::error_range, m_position - m_base);
1647 return result;
1648 }
1649 --m_position;
1650 }
1651 result.first = *m_position++;
1652 return result;
1653 case regex_constants::syntax_escape:
1654 // check to see if escapes are supported first:
1655 if(this->flags() & regex_constants::no_escape_in_lists)
1656 {
1657 result = *m_position++;
1658 break;
1659 }
1660 ++m_position;
1661 result = unescape_character();
1662 break;
1663 case regex_constants::syntax_open_set:
1664 {
1665 if(m_end == ++m_position)
1666 {
1667 fail(regex_constants::error_collate, m_position - m_base);
1668 return result;
1669 }
1670 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1671 {
1672 --m_position;
1673 result.first = *m_position;
1674 ++m_position;
1675 return result;
1676 }
1677 if(m_end == ++m_position)
1678 {
1679 fail(regex_constants::error_collate, m_position - m_base);
1680 return result;
1681 }
1682 const charT* name_first = m_position;
1683 // skip at least one character, then find the matching ':]'
1684 if(m_end == ++m_position)
1685 {
1686 fail(regex_constants::error_collate, name_first - m_base);
1687 return result;
1688 }
1689 while((m_position != m_end)
1690 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1691 ++m_position;
1692 const charT* name_last = m_position;
1693 if(m_end == m_position)
1694 {
1695 fail(regex_constants::error_collate, name_first - m_base);
1696 return result;
1697 }
1698 if((m_end == ++m_position)
1699 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1700 {
1701 fail(regex_constants::error_collate, name_first - m_base);
1702 return result;
1703 }
1704 ++m_position;
1705 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1706 if(s.empty() || (s.size() > 2))
1707 {
1708 fail(regex_constants::error_collate, name_first - m_base);
1709 return result;
1710 }
1711 result.first = s[0];
1712 if(s.size() > 1)
1713 result.second = s[1];
1714 else
1715 result.second = 0;
1716 return result;
1717 }
1718 default:
1719 result = *m_position++;
1720 }
1721 return result;
1722 }
1723
1724 //
1725 // does a value fit in the specified charT type?
1726 //
1727 template <class charT>
1728 bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1729 {
1730 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1731 }
1732 template <class charT>
1733 bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1734 {
1735 return true; // v will alsways fit in a charT
1736 }
1737 template <class charT>
1738 bool valid_value(charT c, boost::intmax_t v)
1739 {
1740 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1741 }
1742
1743 template <class charT, class traits>
1744 charT basic_regex_parser<charT, traits>::unescape_character()
1745 {
1746 #ifdef BOOST_MSVC
1747 #pragma warning(push)
1748 #pragma warning(disable:4127)
1749 #endif
1750 charT result(0);
1751 if(m_position == m_end)
1752 {
1753 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1754 return false;
1755 }
1756 switch(this->m_traits.escape_syntax_type(*m_position))
1757 {
1758 case regex_constants::escape_type_control_a:
1759 result = charT('\a');
1760 break;
1761 case regex_constants::escape_type_e:
1762 result = charT(27);
1763 break;
1764 case regex_constants::escape_type_control_f:
1765 result = charT('\f');
1766 break;
1767 case regex_constants::escape_type_control_n:
1768 result = charT('\n');
1769 break;
1770 case regex_constants::escape_type_control_r:
1771 result = charT('\r');
1772 break;
1773 case regex_constants::escape_type_control_t:
1774 result = charT('\t');
1775 break;
1776 case regex_constants::escape_type_control_v:
1777 result = charT('\v');
1778 break;
1779 case regex_constants::escape_type_word_assert:
1780 result = charT('\b');
1781 break;
1782 case regex_constants::escape_type_ascii_control:
1783 ++m_position;
1784 if(m_position == m_end)
1785 {
1786 // Rewind to start of escape:
1787 --m_position;
1788 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1789 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1790 return result;
1791 }
1792 result = static_cast<charT>(*m_position % 32);
1793 break;
1794 case regex_constants::escape_type_hex:
1795 ++m_position;
1796 if(m_position == m_end)
1797 {
1798 // Rewind to start of escape:
1799 --m_position;
1800 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1801 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1802 return result;
1803 }
1804 // maybe have \x{ddd}
1805 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1806 {
1807 ++m_position;
1808 if(m_position == m_end)
1809 {
1810 // Rewind to start of escape:
1811 --m_position;
1812 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1813 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1814 return result;
1815 }
1816 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1817 if((m_position == m_end)
1818 || (i < 0)
1819 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1820 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1821 {
1822 // Rewind to start of escape:
1823 --m_position;
1824 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1825 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1826 return result;
1827 }
1828 ++m_position;
1829 result = charT(i);
1830 }
1831 else
1832 {
1833 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1834 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1835 if((i < 0)
1836 || !valid_value(charT(0), i))
1837 {
1838 // Rewind to start of escape:
1839 --m_position;
1840 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1841 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1842 return result;
1843 }
1844 result = charT(i);
1845 }
1846 return result;
1847 case regex_constants::syntax_digit:
1848 {
1849 // an octal escape sequence, the first character must be a zero
1850 // followed by up to 3 octal digits:
1851 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1852 const charT* bp = m_position;
1853 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1854 if(val != 0)
1855 {
1856 // Rewind to start of escape:
1857 --m_position;
1858 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1859 // Oops not an octal escape after all:
1860 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1861 return result;
1862 }
1863 val = this->m_traits.toi(m_position, m_position + len, 8);
1864 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1865 {
1866 // Rewind to start of escape:
1867 --m_position;
1868 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1869 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1870 return result;
1871 }
1872 return static_cast<charT>(val);
1873 }
1874 case regex_constants::escape_type_named_char:
1875 {
1876 ++m_position;
1877 if(m_position == m_end)
1878 {
1879 // Rewind to start of escape:
1880 --m_position;
1881 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1882 fail(regex_constants::error_escape, m_position - m_base);
1883 return false;
1884 }
1885 // maybe have \N{name}
1886 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1887 {
1888 const charT* base = m_position;
1889 // skip forward until we find enclosing brace:
1890 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1891 ++m_position;
1892 if(m_position == m_end)
1893 {
1894 // Rewind to start of escape:
1895 --m_position;
1896 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1897 fail(regex_constants::error_escape, m_position - m_base);
1898 return false;
1899 }
1900 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1901 if(s.empty())
1902 {
1903 // Rewind to start of escape:
1904 --m_position;
1905 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1906 fail(regex_constants::error_collate, m_position - m_base);
1907 return false;
1908 }
1909 if(s.size() == 1)
1910 {
1911 return s[0];
1912 }
1913 }
1914 // fall through is a failure:
1915 // Rewind to start of escape:
1916 --m_position;
1917 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1918 fail(regex_constants::error_escape, m_position - m_base);
1919 return false;
1920 }
1921 default:
1922 result = *m_position;
1923 break;
1924 }
1925 ++m_position;
1926 return result;
1927 #ifdef BOOST_MSVC
1928 #pragma warning(pop)
1929 #endif
1930 }
1931
1932 template <class charT, class traits>
1933 bool basic_regex_parser<charT, traits>::parse_backref()
1934 {
1935 BOOST_ASSERT(m_position != m_end);
1936 const charT* pc = m_position;
1937 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1938 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1939 {
1940 // not a backref at all but an octal escape sequence:
1941 charT c = unescape_character();
1942 this->append_literal(c);
1943 }
1944 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1945 {
1946 m_position = pc;
1947 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1948 pb->index = i;
1949 pb->icase = this->flags() & regbase::icase;
1950 }
1951 else
1952 {
1953 // Rewind to start of escape:
1954 --m_position;
1955 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1956 fail(regex_constants::error_backref, m_position - m_base);
1957 return false;
1958 }
1959 return true;
1960 }
1961
1962 template <class charT, class traits>
1963 bool basic_regex_parser<charT, traits>::parse_QE()
1964 {
1965 #ifdef BOOST_MSVC
1966 #pragma warning(push)
1967 #pragma warning(disable:4127)
1968 #endif
1969 //
1970 // parse a \Q...\E sequence:
1971 //
1972 ++m_position; // skip the Q
1973 const charT* start = m_position;
1974 const charT* end;
1975 do
1976 {
1977 while((m_position != m_end)
1978 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1979 ++m_position;
1980 if(m_position == m_end)
1981 {
1982 // a \Q...\E sequence may terminate with the end of the expression:
1983 end = m_position;
1984 break;
1985 }
1986 if(++m_position == m_end) // skip the escape
1987 {
1988 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1989 return false;
1990 }
1991 // check to see if it's a \E:
1992 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1993 {
1994 ++m_position;
1995 end = m_position - 2;
1996 break;
1997 }
1998 // otherwise go round again:
1999 }while(true);
2000 //
2001 // now add all the character between the two escapes as literals:
2002 //
2003 while(start != end)
2004 {
2005 this->append_literal(*start);
2006 ++start;
2007 }
2008 return true;
2009 #ifdef BOOST_MSVC
2010 #pragma warning(pop)
2011 #endif
2012 }
2013
2014 template <class charT, class traits>
2015 bool basic_regex_parser<charT, traits>::parse_perl_extension()
2016 {
2017 if(++m_position == m_end)
2018 {
2019 // Rewind to start of (? sequence:
2020 --m_position;
2021 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2022 fail(regex_constants::error_perl_extension, m_position - m_base);
2023 return false;
2024 }
2025 //
2026 // treat comments as a special case, as these
2027 // are the only ones that don't start with a leading
2028 // startmark state:
2029 //
2030 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
2031 {
2032 while((m_position != m_end)
2033 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
2034 {}
2035 return true;
2036 }
2037 //
2038 // backup some state, and prepare the way:
2039 //
2040 int markid = 0;
2041 std::ptrdiff_t jump_offset = 0;
2042 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2043 pb->icase = this->flags() & regbase::icase;
2044 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2045 // back up insertion point for alternations, and set new point:
2046 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2047 this->m_pdata->m_data.align();
2048 m_alt_insert_point = this->m_pdata->m_data.size();
2049 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2050 bool restore_flags = true;
2051 regex_constants::syntax_option_type old_flags = this->flags();
2052 bool old_case_change = m_has_case_change;
2053 m_has_case_change = false;
2054 charT name_delim;
2055 int mark_reset = m_mark_reset;
2056 int max_mark = m_max_mark;
2057 m_mark_reset = -1;
2058 m_max_mark = m_mark_count;
2059 boost::intmax_t v;
2060 //
2061 // select the actual extension used:
2062 //
2063 switch(this->m_traits.syntax_type(*m_position))
2064 {
2065 case regex_constants::syntax_or:
2066 m_mark_reset = m_mark_count;
2067 BOOST_FALLTHROUGH;
2068 case regex_constants::syntax_colon:
2069 //
2070 // a non-capturing mark:
2071 //
2072 pb->index = markid = 0;
2073 ++m_position;
2074 break;
2075 case regex_constants::syntax_digit:
2076 {
2077 //
2078 // a recursive subexpression:
2079 //
2080 v = this->m_traits.toi(m_position, m_end, 10);
2081 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2082 {
2083 // Rewind to start of (? sequence:
2084 --m_position;
2085 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2086 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2087 return false;
2088 }
2089 insert_recursion:
2090 pb->index = markid = 0;
2091 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2092 pr->alt.i = v;
2093 pr->state_id = 0;
2094 static_cast<re_case*>(
2095 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2096 )->icase = this->flags() & regbase::icase;
2097 break;
2098 }
2099 case regex_constants::syntax_plus:
2100 //
2101 // A forward-relative recursive subexpression:
2102 //
2103 ++m_position;
2104 v = this->m_traits.toi(m_position, m_end, 10);
2105 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2106 {
2107 // Rewind to start of (? sequence:
2108 --m_position;
2109 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2110 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2111 return false;
2112 }
2113 if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
2114 {
2115 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2116 return false;
2117 }
2118 v += m_mark_count;
2119 goto insert_recursion;
2120 case regex_constants::syntax_dash:
2121 //
2122 // Possibly a backward-relative recursive subexpression:
2123 //
2124 ++m_position;
2125 v = this->m_traits.toi(m_position, m_end, 10);
2126 if(v <= 0)
2127 {
2128 --m_position;
2129 // Oops not a relative recursion at all, but a (?-imsx) group:
2130 goto option_group_jump;
2131 }
2132 v = m_mark_count + 1 - v;
2133 if(v <= 0)
2134 {
2135 // Rewind to start of (? sequence:
2136 --m_position;
2137 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2138 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2139 return false;
2140 }
2141 goto insert_recursion;
2142 case regex_constants::syntax_equal:
2143 pb->index = markid = -1;
2144 ++m_position;
2145 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2146 this->m_pdata->m_data.align();
2147 m_alt_insert_point = this->m_pdata->m_data.size();
2148 break;
2149 case regex_constants::syntax_not:
2150 pb->index = markid = -2;
2151 ++m_position;
2152 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2153 this->m_pdata->m_data.align();
2154 m_alt_insert_point = this->m_pdata->m_data.size();
2155 break;
2156 case regex_constants::escape_type_left_word:
2157 {
2158 // a lookbehind assertion:
2159 if(++m_position == m_end)
2160 {
2161 // Rewind to start of (? sequence:
2162 --m_position;
2163 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2164 fail(regex_constants::error_perl_extension, m_position - m_base);
2165 return false;
2166 }
2167 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2168 if(t == regex_constants::syntax_not)
2169 pb->index = markid = -2;
2170 else if(t == regex_constants::syntax_equal)
2171 pb->index = markid = -1;
2172 else
2173 {
2174 // Probably a named capture which also starts (?< :
2175 name_delim = '>';
2176 --m_position;
2177 goto named_capture_jump;
2178 }
2179 ++m_position;
2180 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2181 this->append_state(syntax_element_backstep, sizeof(re_brace));
2182 this->m_pdata->m_data.align();
2183 m_alt_insert_point = this->m_pdata->m_data.size();
2184 break;
2185 }
2186 case regex_constants::escape_type_right_word:
2187 //
2188 // an independent sub-expression:
2189 //
2190 pb->index = markid = -3;
2191 ++m_position;
2192 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2193 this->m_pdata->m_data.align();
2194 m_alt_insert_point = this->m_pdata->m_data.size();
2195 break;
2196 case regex_constants::syntax_open_mark:
2197 {
2198 // a conditional expression:
2199 pb->index = markid = -4;
2200 if(++m_position == m_end)
2201 {
2202 // Rewind to start of (? sequence:
2203 --m_position;
2204 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2205 fail(regex_constants::error_perl_extension, m_position - m_base);
2206 return false;
2207 }
2208 v = this->m_traits.toi(m_position, m_end, 10);
2209 if(m_position == m_end)
2210 {
2211 // Rewind to start of (? sequence:
2212 --m_position;
2213 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2214 fail(regex_constants::error_perl_extension, m_position - m_base);
2215 return false;
2216 }
2217 if(*m_position == charT('R'))
2218 {
2219 if(++m_position == m_end)
2220 {
2221 // Rewind to start of (? sequence:
2222 --m_position;
2223 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2224 fail(regex_constants::error_perl_extension, m_position - m_base);
2225 return false;
2226 }
2227 if(*m_position == charT('&'))
2228 {
2229 const charT* base = ++m_position;
2230 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2231 ++m_position;
2232 if(m_position == m_end)
2233 {
2234 // Rewind to start of (? sequence:
2235 --m_position;
2236 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2237 fail(regex_constants::error_perl_extension, m_position - m_base);
2238 return false;
2239 }
2240 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2241 }
2242 else
2243 {
2244 v = -this->m_traits.toi(m_position, m_end, 10);
2245 }
2246 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2247 br->index = v < 0 ? (v - 1) : 0;
2248 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2249 {
2250 // Rewind to start of (? sequence:
2251 --m_position;
2252 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2253 fail(regex_constants::error_perl_extension, m_position - m_base);
2254 return false;
2255 }
2256 if(++m_position == m_end)
2257 {
2258 // Rewind to start of (? sequence:
2259 --m_position;
2260 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2261 fail(regex_constants::error_perl_extension, m_position - m_base);
2262 return false;
2263 }
2264 }
2265 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2266 {
2267 const charT* base = ++m_position;
2268 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2269 ++m_position;
2270 if(m_position == m_end)
2271 {
2272 // Rewind to start of (? sequence:
2273 --m_position;
2274 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2275 fail(regex_constants::error_perl_extension, m_position - m_base);
2276 return false;
2277 }
2278 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2279 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2280 br->index = v;
2281 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2282 {
2283 // Rewind to start of (? sequence:
2284 --m_position;
2285 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2286 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2287 return false;
2288 }
2289 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2290 {
2291 // Rewind to start of (? sequence:
2292 --m_position;
2293 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2294 fail(regex_constants::error_perl_extension, m_position - m_base);
2295 return false;
2296 }
2297 if(++m_position == m_end)
2298 {
2299 // Rewind to start of (? sequence:
2300 --m_position;
2301 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2302 fail(regex_constants::error_perl_extension, m_position - m_base);
2303 return false;
2304 }
2305 }
2306 else if(*m_position == charT('D'))
2307 {
2308 const char* def = "DEFINE";
2309 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2310 ++m_position, ++def;
2311 if((m_position == m_end) || *def)
2312 {
2313 // Rewind to start of (? sequence:
2314 --m_position;
2315 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2316 fail(regex_constants::error_perl_extension, m_position - m_base);
2317 return false;
2318 }
2319 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2320 br->index = 9999; // special magic value!
2321 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2322 {
2323 // Rewind to start of (? sequence:
2324 --m_position;
2325 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2326 fail(regex_constants::error_perl_extension, m_position - m_base);
2327 return false;
2328 }
2329 if(++m_position == m_end)
2330 {
2331 // Rewind to start of (? sequence:
2332 --m_position;
2333 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2334 fail(regex_constants::error_perl_extension, m_position - m_base);
2335 return false;
2336 }
2337 }
2338 else if(v > 0)
2339 {
2340 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2341 br->index = v;
2342 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2343 {
2344 // Rewind to start of (? sequence:
2345 --m_position;
2346 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2347 fail(regex_constants::error_perl_extension, m_position - m_base);
2348 return false;
2349 }
2350 if(++m_position == m_end)
2351 {
2352 // Rewind to start of (? sequence:
2353 --m_position;
2354 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2355 fail(regex_constants::error_perl_extension, m_position - m_base);
2356 return false;
2357 }
2358 }
2359 else
2360 {
2361 // verify that we have a lookahead or lookbehind assert:
2362 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2363 {
2364 // Rewind to start of (? sequence:
2365 --m_position;
2366 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2367 fail(regex_constants::error_perl_extension, m_position - m_base);
2368 return false;
2369 }
2370 if(++m_position == m_end)
2371 {
2372 // Rewind to start of (? sequence:
2373 --m_position;
2374 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2375 fail(regex_constants::error_perl_extension, m_position - m_base);
2376 return false;
2377 }
2378 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2379 {
2380 if(++m_position == m_end)
2381 {
2382 // Rewind to start of (? sequence:
2383 --m_position;
2384 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2385 fail(regex_constants::error_perl_extension, m_position - m_base);
2386 return false;
2387 }
2388 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2389 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2390 {
2391 // Rewind to start of (? sequence:
2392 --m_position;
2393 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2394 fail(regex_constants::error_perl_extension, m_position - m_base);
2395 return false;
2396 }
2397 m_position -= 3;
2398 }
2399 else
2400 {
2401 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2402 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2403 {
2404 // Rewind to start of (? sequence:
2405 --m_position;
2406 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2407 fail(regex_constants::error_perl_extension, m_position - m_base);
2408 return false;
2409 }
2410 m_position -= 2;
2411 }
2412 }
2413 break;
2414 }
2415 case regex_constants::syntax_close_mark:
2416 // Rewind to start of (? sequence:
2417 --m_position;
2418 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2419 fail(regex_constants::error_perl_extension, m_position - m_base);
2420 return false;
2421 case regex_constants::escape_type_end_buffer:
2422 {
2423 name_delim = *m_position;
2424 named_capture_jump:
2425 markid = 0;
2426 if(0 == (this->flags() & regbase::nosubs))
2427 {
2428 markid = ++m_mark_count;
2429 #ifndef BOOST_NO_STD_DISTANCE
2430 if(this->flags() & regbase::save_subexpression_location)
2431 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2432 #else
2433 if(this->flags() & regbase::save_subexpression_location)
2434 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2435 #endif
2436 }
2437 pb->index = markid;
2438 const charT* base = ++m_position;
2439 if(m_position == m_end)
2440 {
2441 // Rewind to start of (? sequence:
2442 --m_position;
2443 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2444 fail(regex_constants::error_perl_extension, m_position - m_base);
2445 return false;
2446 }
2447 while((m_position != m_end) && (*m_position != name_delim))
2448 ++m_position;
2449 if(m_position == m_end)
2450 {
2451 // Rewind to start of (? sequence:
2452 --m_position;
2453 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2454 fail(regex_constants::error_perl_extension, m_position - m_base);
2455 return false;
2456 }
2457 this->m_pdata->set_name(base, m_position, markid);
2458 ++m_position;
2459 break;
2460 }
2461 default:
2462 if(*m_position == charT('R'))
2463 {
2464 ++m_position;
2465 v = 0;
2466 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2467 {
2468 // Rewind to start of (? sequence:
2469 --m_position;
2470 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2471 fail(regex_constants::error_perl_extension, m_position - m_base);
2472 return false;
2473 }
2474 goto insert_recursion;
2475 }
2476 if(*m_position == charT('&'))
2477 {
2478 ++m_position;
2479 const charT* base = m_position;
2480 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2481 ++m_position;
2482 if(m_position == m_end)
2483 {
2484 // Rewind to start of (? sequence:
2485 --m_position;
2486 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2487 fail(regex_constants::error_perl_extension, m_position - m_base);
2488 return false;
2489 }
2490 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2491 goto insert_recursion;
2492 }
2493 if(*m_position == charT('P'))
2494 {
2495 ++m_position;
2496 if(m_position == m_end)
2497 {
2498 // Rewind to start of (? sequence:
2499 --m_position;
2500 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2501 fail(regex_constants::error_perl_extension, m_position - m_base);
2502 return false;
2503 }
2504 if(*m_position == charT('>'))
2505 {
2506 ++m_position;
2507 const charT* base = m_position;
2508 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2509 ++m_position;
2510 if(m_position == m_end)
2511 {
2512 // Rewind to start of (? sequence:
2513 --m_position;
2514 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2515 fail(regex_constants::error_perl_extension, m_position - m_base);
2516 return false;
2517 }
2518 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2519 goto insert_recursion;
2520 }
2521 }
2522 //
2523 // lets assume that we have a (?imsx) group and try and parse it:
2524 //
2525 option_group_jump:
2526 regex_constants::syntax_option_type opts = parse_options();
2527 if(m_position == m_end)
2528 {
2529 // Rewind to start of (? sequence:
2530 --m_position;
2531 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2532 fail(regex_constants::error_perl_extension, m_position - m_base);
2533 return false;
2534 }
2535 // make a note of whether we have a case change:
2536 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2537 pb->index = markid = 0;
2538 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2539 {
2540 // update flags and carry on as normal:
2541 this->flags(opts);
2542 restore_flags = false;
2543 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2544 }
2545 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2546 {
2547 // update flags and carry on until the matching ')' is found:
2548 this->flags(opts);
2549 ++m_position;
2550 }
2551 else
2552 {
2553 // Rewind to start of (? sequence:
2554 --m_position;
2555 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2556 fail(regex_constants::error_perl_extension, m_position - m_base);
2557 return false;
2558 }
2559
2560 // finally append a case change state if we need it:
2561 if(m_has_case_change)
2562 {
2563 static_cast<re_case*>(
2564 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2565 )->icase = opts & regbase::icase;
2566 }
2567
2568 }
2569 //
2570 // now recursively add more states, this will terminate when we get to a
2571 // matching ')' :
2572 //
2573 parse_all();
2574 //
2575 // Unwind alternatives:
2576 //
2577 if(0 == unwind_alts(last_paren_start))
2578 {
2579 // Rewind to start of (? sequence:
2580 --m_position;
2581 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2582 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2583 return false;
2584 }
2585 //
2586 // we either have a ')' or we have run out of characters prematurely:
2587 //
2588 if(m_position == m_end)
2589 {
2590 // Rewind to start of (? sequence:
2591 --m_position;
2592 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2593 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2594 return false;
2595 }
2596 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2597 ++m_position;
2598 //
2599 // restore the flags:
2600 //
2601 if(restore_flags)
2602 {
2603 // append a case change state if we need it:
2604 if(m_has_case_change)
2605 {
2606 static_cast<re_case*>(
2607 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2608 )->icase = old_flags & regbase::icase;
2609 }
2610 this->flags(old_flags);
2611 }
2612 //
2613 // set up the jump pointer if we have one:
2614 //
2615 if(jump_offset)
2616 {
2617 this->m_pdata->m_data.align();
2618 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2619 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2620 if((this->m_last_state == jmp) && (markid != -2))
2621 {
2622 // Oops... we didn't have anything inside the assertion.
2623 // Note we don't get here for negated forward lookahead as (?!)
2624 // does have some uses.
2625 // Rewind to start of (? sequence:
2626 --m_position;
2627 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2628 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2629 return false;
2630 }
2631 }
2632 //
2633 // verify that if this is conditional expression, that we do have
2634 // an alternative, if not add one:
2635 //
2636 if(markid == -4)
2637 {
2638 re_syntax_base* b = this->getaddress(expected_alt_point);
2639 // Make sure we have exactly one alternative following this state:
2640 if(b->type != syntax_element_alt)
2641 {
2642 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2643 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2644 }
2645 else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2646 {
2647 // Can't have seen more than one alternative:
2648 // Rewind to start of (? sequence:
2649 --m_position;
2650 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2651 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2652 return false;
2653 }
2654 else
2655 {
2656 // We must *not* have seen an alternative inside a (DEFINE) block:
2657 b = this->getaddress(b->next.i, b);
2658 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2659 {
2660 // Rewind to start of (? sequence:
2661 --m_position;
2662 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2663 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2664 return false;
2665 }
2666 }
2667 // check for invalid repetition of next state:
2668 b = this->getaddress(expected_alt_point);
2669 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2670 if((b->type != syntax_element_assert_backref)
2671 && (b->type != syntax_element_startmark))
2672 {
2673 // Rewind to start of (? sequence:
2674 --m_position;
2675 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2676 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2677 return false;
2678 }
2679 }
2680 //
2681 // append closing parenthesis state:
2682 //
2683 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2684 pb->index = markid;
2685 pb->icase = this->flags() & regbase::icase;
2686 this->m_paren_start = last_paren_start;
2687 //
2688 // restore the alternate insertion point:
2689 //
2690 this->m_alt_insert_point = last_alt_point;
2691 //
2692 // and the case change data:
2693 //
2694 m_has_case_change = old_case_change;
2695 //
2696 // And the mark_reset data:
2697 //
2698 if(m_max_mark > m_mark_count)
2699 {
2700 m_mark_count = m_max_mark;
2701 }
2702 m_mark_reset = mark_reset;
2703 m_max_mark = max_mark;
2704
2705
2706 if(markid > 0)
2707 {
2708 #ifndef BOOST_NO_STD_DISTANCE
2709 if(this->flags() & regbase::save_subexpression_location)
2710 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2711 #else
2712 if(this->flags() & regbase::save_subexpression_location)
2713 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2714 #endif
2715 //
2716 // allow backrefs to this mark:
2717 //
2718 if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
2719 this->m_backrefs |= 1u << (markid - 1);
2720 }
2721 return true;
2722 }
2723
2724 template <class charT, class traits>
2725 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2726 {
2727 while(*verb)
2728 {
2729 if(static_cast<charT>(*verb) != *m_position)
2730 {
2731 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2732 fail(regex_constants::error_perl_extension, m_position - m_base);
2733 return false;
2734 }
2735 if(++m_position == m_end)
2736 {
2737 --m_position;
2738 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2739 fail(regex_constants::error_perl_extension, m_position - m_base);
2740 return false;
2741 }
2742 ++verb;
2743 }
2744 return true;
2745 }
2746
2747 template <class charT, class traits>
2748 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2749 {
2750 if(++m_position == m_end)
2751 {
2752 // Rewind to start of (* sequence:
2753 --m_position;
2754 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2755 fail(regex_constants::error_perl_extension, m_position - m_base);
2756 return false;
2757 }
2758 switch(*m_position)
2759 {
2760 case 'F':
2761 if(++m_position == m_end)
2762 {
2763 // Rewind to start of (* sequence:
2764 --m_position;
2765 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2766 fail(regex_constants::error_perl_extension, m_position - m_base);
2767 return false;
2768 }
2769 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2770 {
2771 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2772 {
2773 // Rewind to start of (* sequence:
2774 --m_position;
2775 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2776 fail(regex_constants::error_perl_extension, m_position - m_base);
2777 return false;
2778 }
2779 ++m_position;
2780 this->append_state(syntax_element_fail);
2781 return true;
2782 }
2783 break;
2784 case 'A':
2785 if(++m_position == m_end)
2786 {
2787 // Rewind to start of (* sequence:
2788 --m_position;
2789 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2790 fail(regex_constants::error_perl_extension, m_position - m_base);
2791 return false;
2792 }
2793 if(match_verb("CCEPT"))
2794 {
2795 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2796 {
2797 // Rewind to start of (* sequence:
2798 --m_position;
2799 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2800 fail(regex_constants::error_perl_extension, m_position - m_base);
2801 return false;
2802 }
2803 ++m_position;
2804 this->append_state(syntax_element_accept);
2805 return true;
2806 }
2807 break;
2808 case 'C':
2809 if(++m_position == m_end)
2810 {
2811 // Rewind to start of (* sequence:
2812 --m_position;
2813 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2814 fail(regex_constants::error_perl_extension, m_position - m_base);
2815 return false;
2816 }
2817 if(match_verb("OMMIT"))
2818 {
2819 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2820 {
2821 // Rewind to start of (* sequence:
2822 --m_position;
2823 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2824 fail(regex_constants::error_perl_extension, m_position - m_base);
2825 return false;
2826 }
2827 ++m_position;
2828 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2829 this->m_pdata->m_disable_match_any = true;
2830 return true;
2831 }
2832 break;
2833 case 'P':
2834 if(++m_position == m_end)
2835 {
2836 // Rewind to start of (* sequence:
2837 --m_position;
2838 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2839 fail(regex_constants::error_perl_extension, m_position - m_base);
2840 return false;
2841 }
2842 if(match_verb("RUNE"))
2843 {
2844 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2845 {
2846 // Rewind to start of (* sequence:
2847 --m_position;
2848 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2849 fail(regex_constants::error_perl_extension, m_position - m_base);
2850 return false;
2851 }
2852 ++m_position;
2853 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2854 this->m_pdata->m_disable_match_any = true;
2855 return true;
2856 }
2857 break;
2858 case 'S':
2859 if(++m_position == m_end)
2860 {
2861 // Rewind to start of (* sequence:
2862 --m_position;
2863 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2864 fail(regex_constants::error_perl_extension, m_position - m_base);
2865 return false;
2866 }
2867 if(match_verb("KIP"))
2868 {
2869 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2870 {
2871 // Rewind to start of (* sequence:
2872 --m_position;
2873 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2874 fail(regex_constants::error_perl_extension, m_position - m_base);
2875 return false;
2876 }
2877 ++m_position;
2878 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2879 this->m_pdata->m_disable_match_any = true;
2880 return true;
2881 }
2882 break;
2883 case 'T':
2884 if(++m_position == m_end)
2885 {
2886 // Rewind to start of (* sequence:
2887 --m_position;
2888 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2889 fail(regex_constants::error_perl_extension, m_position - m_base);
2890 return false;
2891 }
2892 if(match_verb("HEN"))
2893 {
2894 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2895 {
2896 // Rewind to start of (* sequence:
2897 --m_position;
2898 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2899 fail(regex_constants::error_perl_extension, m_position - m_base);
2900 return false;
2901 }
2902 ++m_position;
2903 this->append_state(syntax_element_then);
2904 this->m_pdata->m_disable_match_any = true;
2905 return true;
2906 }
2907 break;
2908 }
2909 // Rewind to start of (* sequence:
2910 --m_position;
2911 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2912 fail(regex_constants::error_perl_extension, m_position - m_base);
2913 return false;
2914 }
2915
2916 template <class charT, class traits>
2917 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2918 {
2919 //
2920 // parses an emacs style \sx or \Sx construct.
2921 //
2922 if(++m_position == m_end)
2923 {
2924 // Rewind to start of sequence:
2925 --m_position;
2926 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2927 fail(regex_constants::error_escape, m_position - m_base);
2928 return false;
2929 }
2930 basic_char_set<charT, traits> char_set;
2931 if(negate)
2932 char_set.negate();
2933
2934 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2935
2936 switch(*m_position)
2937 {
2938 case 's':
2939 case ' ':
2940 char_set.add_class(this->m_mask_space);
2941 break;
2942 case 'w':
2943 char_set.add_class(this->m_word_mask);
2944 break;
2945 case '_':
2946 char_set.add_single(digraph<charT>(charT('$')));
2947 char_set.add_single(digraph<charT>(charT('&')));
2948 char_set.add_single(digraph<charT>(charT('*')));
2949 char_set.add_single(digraph<charT>(charT('+')));
2950 char_set.add_single(digraph<charT>(charT('-')));
2951 char_set.add_single(digraph<charT>(charT('_')));
2952 char_set.add_single(digraph<charT>(charT('<')));
2953 char_set.add_single(digraph<charT>(charT('>')));
2954 break;
2955 case '.':
2956 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2957 break;
2958 case '(':
2959 char_set.add_single(digraph<charT>(charT('(')));
2960 char_set.add_single(digraph<charT>(charT('[')));
2961 char_set.add_single(digraph<charT>(charT('{')));
2962 break;
2963 case ')':
2964 char_set.add_single(digraph<charT>(charT(')')));
2965 char_set.add_single(digraph<charT>(charT(']')));
2966 char_set.add_single(digraph<charT>(charT('}')));
2967 break;
2968 case '"':
2969 char_set.add_single(digraph<charT>(charT('"')));
2970 char_set.add_single(digraph<charT>(charT('\'')));
2971 char_set.add_single(digraph<charT>(charT('`')));
2972 break;
2973 case '\'':
2974 char_set.add_single(digraph<charT>(charT('\'')));
2975 char_set.add_single(digraph<charT>(charT(',')));
2976 char_set.add_single(digraph<charT>(charT('#')));
2977 break;
2978 case '<':
2979 char_set.add_single(digraph<charT>(charT(';')));
2980 break;
2981 case '>':
2982 char_set.add_single(digraph<charT>(charT('\n')));
2983 char_set.add_single(digraph<charT>(charT('\f')));
2984 break;
2985 default:
2986 fail(regex_constants::error_ctype, m_position - m_base);
2987 return false;
2988 }
2989 if(0 == this->append_set(char_set))
2990 {
2991 fail(regex_constants::error_ctype, m_position - m_base);
2992 return false;
2993 }
2994 ++m_position;
2995 return true;
2996 }
2997
2998 template <class charT, class traits>
2999 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
3000 {
3001 // we have a (?imsx-imsx) group, convert it into a set of flags:
3002 regex_constants::syntax_option_type f = this->flags();
3003 bool breakout = false;
3004 do
3005 {
3006 switch(*m_position)
3007 {
3008 case 's':
3009 f |= regex_constants::mod_s;
3010 f &= ~regex_constants::no_mod_s;
3011 break;
3012 case 'm':
3013 f &= ~regex_constants::no_mod_m;
3014 break;
3015 case 'i':
3016 f |= regex_constants::icase;
3017 break;
3018 case 'x':
3019 f |= regex_constants::mod_x;
3020 break;
3021 default:
3022 breakout = true;
3023 continue;
3024 }
3025 if(++m_position == m_end)
3026 {
3027 // Rewind to start of (? sequence:
3028 --m_position;
3029 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3030 fail(regex_constants::error_paren, m_position - m_base);
3031 return false;
3032 }
3033 }
3034 while(!breakout);
3035
3036 breakout = false;
3037
3038 if(*m_position == static_cast<charT>('-'))
3039 {
3040 if(++m_position == m_end)
3041 {
3042 // Rewind to start of (? sequence:
3043 --m_position;
3044 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3045 fail(regex_constants::error_paren, m_position - m_base);
3046 return false;
3047 }
3048 do
3049 {
3050 switch(*m_position)
3051 {
3052 case 's':
3053 f &= ~regex_constants::mod_s;
3054 f |= regex_constants::no_mod_s;
3055 break;
3056 case 'm':
3057 f |= regex_constants::no_mod_m;
3058 break;
3059 case 'i':
3060 f &= ~regex_constants::icase;
3061 break;
3062 case 'x':
3063 f &= ~regex_constants::mod_x;
3064 break;
3065 default:
3066 breakout = true;
3067 continue;
3068 }
3069 if(++m_position == m_end)
3070 {
3071 // Rewind to start of (? sequence:
3072 --m_position;
3073 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3074 fail(regex_constants::error_paren, m_position - m_base);
3075 return false;
3076 }
3077 }
3078 while(!breakout);
3079 }
3080 return f;
3081 }
3082
3083 template <class charT, class traits>
3084 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3085 {
3086 //
3087 // If we didn't actually add any states after the last
3088 // alternative then that's an error:
3089 //
3090 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3091 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3092 &&
3093 !(
3094 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3095 &&
3096 ((this->flags() & regbase::no_empty_expressions) == 0)
3097 )
3098 )
3099 {
3100 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3101 return false;
3102 }
3103 //
3104 // Fix up our alternatives:
3105 //
3106 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3107 {
3108 //
3109 // fix up the jump to point to the end of the states
3110 // that we've just added:
3111 //
3112 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3113 m_alt_jumps.pop_back();
3114 this->m_pdata->m_data.align();
3115 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3116 BOOST_ASSERT(jmp->type == syntax_element_jump);
3117 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3118 }
3119 return true;
3120 }
3121
3122 #ifdef BOOST_MSVC
3123 #pragma warning(pop)
3124 #endif
3125
3126 } // namespace BOOST_REGEX_DETAIL_NS
3127 } // namespace boost
3128
3129 #ifdef BOOST_MSVC
3130 #pragma warning(push)
3131 #pragma warning(disable: 4103)
3132 #endif
3133 #ifdef BOOST_HAS_ABI_HEADERS
3134 # include BOOST_ABI_SUFFIX
3135 #endif
3136 #ifdef BOOST_MSVC
3137 #pragma warning(pop)
3138 #endif
3139
3140 #endif