]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/regex/v4/basic_regex_parser.hpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / boost / regex / v4 / basic_regex_parser.hpp
CommitLineData
7c673cae
FG
1/*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
17 */
18
19#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
21
22#ifdef BOOST_MSVC
23#pragma warning(push)
24#pragma warning(disable: 4103)
20effc67
TL
25#if BOOST_MSVC >= 1800
26#pragma warning(disable: 26812)
27#endif
7c673cae
FG
28#endif
29#ifdef BOOST_HAS_ABI_HEADERS
30# include BOOST_ABI_PREFIX
31#endif
32#ifdef BOOST_MSVC
33#pragma warning(pop)
34#endif
35
36namespace boost{
37namespace BOOST_REGEX_DETAIL_NS{
38
39#ifdef BOOST_MSVC
40#pragma warning(push)
92f5a8d4
TL
41#pragma warning(disable:4244)
42#if BOOST_MSVC < 1910
43#pragma warning(disable:4800)
44#endif
7c673cae
FG
45#endif
46
47inline boost::intmax_t umax(mpl::false_ const&)
48{
49 // Get out clause here, just in case numeric_limits is unspecialized:
50 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
51}
52inline boost::intmax_t umax(mpl::true_ const&)
53{
54 return (std::numeric_limits<std::size_t>::max)();
55}
56
57inline boost::intmax_t umax()
58{
59 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
60}
61
62template <class charT, class traits>
63class basic_regex_parser : public basic_regex_creator<charT, traits>
64{
65public:
66 basic_regex_parser(regex_data<charT, traits>* data);
67 void parse(const charT* p1, const charT* p2, unsigned flags);
68 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
69 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
70 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
71 {
72 fail(error_code, position, message, position);
73 }
74
75 bool parse_all();
76 bool parse_basic();
77 bool parse_extended();
78 bool parse_literal();
79 bool parse_open_paren();
80 bool parse_basic_escape();
81 bool parse_extended_escape();
82 bool parse_match_any();
83 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
84 bool parse_repeat_range(bool isbasic);
85 bool parse_alt();
86 bool parse_set();
87 bool parse_backref();
88 void parse_set_literal(basic_char_set<charT, traits>& char_set);
89 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
90 bool parse_QE();
91 bool parse_perl_extension();
92 bool parse_perl_verb();
93 bool match_verb(const char*);
94 bool add_emacs_code(bool negate);
95 bool unwind_alts(std::ptrdiff_t last_paren_start);
96 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
97 charT unescape_character();
98 regex_constants::syntax_option_type parse_options();
99
100private:
101 typedef bool (basic_regex_parser::*parser_proc_type)();
102 typedef typename traits::string_type string_type;
103 typedef typename traits::char_class_type char_class_type;
104 parser_proc_type m_parser_proc; // the main parser to use
105 const charT* m_base; // the start of the string being parsed
106 const charT* m_end; // the end of the string being parsed
107 const charT* m_position; // our current parser position
108 unsigned m_mark_count; // how many sub-expressions we have
109 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
110 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
111 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
112 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
113 bool m_has_case_change; // true if somewhere in the current block the case has changed
b32b8144 114 unsigned m_recursion_count; // How many times we've called parse_all.
7c673cae
FG
115#if defined(BOOST_MSVC) && defined(_M_IX86)
116 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
117 // that can not otherwise be suppressed)...
118 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
119 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
120#else
121 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
122#endif
123
124 basic_regex_parser& operator=(const basic_regex_parser&);
125 basic_regex_parser(const basic_regex_parser&);
126};
127
128template <class charT, class traits>
129basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
20effc67
TL
130 : basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
131 m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
7c673cae
FG
132{
133}
134
135template <class charT, class traits>
136void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
137{
138 // pass l_flags on to base class:
139 this->init(l_flags);
140 // set up pointers:
141 m_position = m_base = p1;
142 m_end = p2;
143 // empty strings are errors:
144 if((p1 == p2) &&
145 (
146 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
147 || (l_flags & regbase::no_empty_expressions)
148 )
149 )
150 {
151 fail(regex_constants::error_empty, 0);
152 return;
153 }
154 // select which parser to use:
155 switch(l_flags & regbase::main_option_type)
156 {
157 case regbase::perl_syntax_group:
158 {
159 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
160 //
161 // Add a leading paren with index zero to give recursions a target:
162 //
163 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
164 br->index = 0;
165 br->icase = this->flags() & regbase::icase;
166 break;
167 }
168 case regbase::basic_syntax_group:
169 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
170 break;
171 case regbase::literal:
172 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
173 break;
174 default:
1e59de90 175 // Oops, someone has managed to set more than one of the main option flags,
7c673cae
FG
176 // so this must be an error:
177 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
178 return;
179 }
180
181 // parse all our characters:
182 bool result = parse_all();
183 //
184 // Unwind our alternatives:
185 //
186 unwind_alts(-1);
187 // reset l_flags as a global scope (?imsx) may have altered them:
188 this->flags(l_flags);
189 // if we haven't gobbled up all the characters then we must
190 // have had an unexpected ')' :
191 if(!result)
192 {
92f5a8d4 193 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
7c673cae
FG
194 return;
195 }
196 // if an error has been set then give up now:
197 if(this->m_pdata->m_status)
198 return;
199 // fill in our sub-expression count:
20effc67 200 this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
7c673cae
FG
201 this->finalize(p1, p2);
202}
203
204template <class charT, class traits>
205void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
206{
207 // get the error message:
208 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
209 fail(error_code, position, message);
210}
211
212template <class charT, class traits>
213void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
214{
215 if(0 == this->m_pdata->m_status) // update the error code if not already set
216 this->m_pdata->m_status = error_code;
217 m_position = m_end; // don't bother parsing anything else
218
219#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
220 //
221 // Augment error message with the regular expression text:
222 //
223 if(start_pos == position)
224 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
225 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
226 if(error_code != regex_constants::error_empty)
227 {
228 if((start_pos != 0) || (end_pos != (m_end - m_base)))
229 message += " The error occurred while parsing the regular expression fragment: '";
230 else
231 message += " The error occurred while parsing the regular expression: '";
232 if(start_pos != end_pos)
233 {
234 message += std::string(m_base + start_pos, m_base + position);
235 message += ">>>HERE>>>";
236 message += std::string(m_base + position, m_base + end_pos);
237 }
238 message += "'.";
239 }
240#endif
241
242#ifndef BOOST_NO_EXCEPTIONS
243 if(0 == (this->flags() & regex_constants::no_except))
244 {
245 boost::regex_error e(message, error_code, position);
246 e.raise();
247 }
248#else
249 (void)position; // suppress warnings.
250#endif
251}
252
253template <class charT, class traits>
254bool basic_regex_parser<charT, traits>::parse_all()
255{
b32b8144
FG
256 if (++m_recursion_count > 400)
257 {
258 // exceeded internal limits
259 fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
260 }
7c673cae
FG
261 bool result = true;
262 while(result && (m_position != m_end))
263 {
264 result = (this->*m_parser_proc)();
265 }
b32b8144 266 --m_recursion_count;
7c673cae
FG
267 return result;
268}
269
270#ifdef BOOST_MSVC
271#pragma warning(push)
272#pragma warning(disable:4702)
273#endif
274template <class charT, class traits>
275bool basic_regex_parser<charT, traits>::parse_basic()
276{
277 switch(this->m_traits.syntax_type(*m_position))
278 {
279 case regex_constants::syntax_escape:
280 return parse_basic_escape();
281 case regex_constants::syntax_dot:
282 return parse_match_any();
283 case regex_constants::syntax_caret:
284 ++m_position;
285 this->append_state(syntax_element_start_line);
286 break;
287 case regex_constants::syntax_dollar:
288 ++m_position;
289 this->append_state(syntax_element_end_line);
290 break;
291 case regex_constants::syntax_star:
292 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
293 return parse_literal();
294 else
295 {
296 ++m_position;
297 return parse_repeat();
298 }
299 case regex_constants::syntax_plus:
300 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
301 return parse_literal();
302 else
303 {
304 ++m_position;
305 return parse_repeat(1);
306 }
307 case regex_constants::syntax_question:
308 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
309 return parse_literal();
310 else
311 {
312 ++m_position;
313 return parse_repeat(0, 1);
314 }
315 case regex_constants::syntax_open_set:
316 return parse_set();
317 case regex_constants::syntax_newline:
318 if(this->flags() & regbase::newline_alt)
319 return parse_alt();
320 else
321 return parse_literal();
322 default:
323 return parse_literal();
324 }
325 return true;
326}
327
20effc67
TL
328#ifdef BOOST_MSVC
329# pragma warning(push)
330#if BOOST_MSVC >= 1800
331#pragma warning(disable:26812)
332#endif
333#endif
7c673cae
FG
334template <class charT, class traits>
335bool basic_regex_parser<charT, traits>::parse_extended()
336{
337 bool result = true;
338 switch(this->m_traits.syntax_type(*m_position))
339 {
340 case regex_constants::syntax_open_mark:
341 return parse_open_paren();
342 case regex_constants::syntax_close_mark:
343 return false;
344 case regex_constants::syntax_escape:
345 return parse_extended_escape();
346 case regex_constants::syntax_dot:
347 return parse_match_any();
348 case regex_constants::syntax_caret:
349 ++m_position;
350 this->append_state(
351 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
352 break;
353 case regex_constants::syntax_dollar:
354 ++m_position;
355 this->append_state(
356 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
357 break;
358 case regex_constants::syntax_star:
359 if(m_position == this->m_base)
360 {
361 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
362 return false;
363 }
364 ++m_position;
365 return parse_repeat();
366 case regex_constants::syntax_question:
367 if(m_position == this->m_base)
368 {
369 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
370 return false;
371 }
372 ++m_position;
373 return parse_repeat(0,1);
374 case regex_constants::syntax_plus:
375 if(m_position == this->m_base)
376 {
377 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
378 return false;
379 }
380 ++m_position;
381 return parse_repeat(1);
382 case regex_constants::syntax_open_brace:
383 ++m_position;
384 return parse_repeat_range(false);
385 case regex_constants::syntax_close_brace:
386 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
387 {
388 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
389 return false;
390 }
391 result = parse_literal();
392 break;
393 case regex_constants::syntax_or:
394 return parse_alt();
395 case regex_constants::syntax_open_set:
396 return parse_set();
397 case regex_constants::syntax_newline:
398 if(this->flags() & regbase::newline_alt)
399 return parse_alt();
400 else
401 return parse_literal();
402 case regex_constants::syntax_hash:
403 //
404 // If we have a mod_x flag set, then skip until
405 // we get to a newline character:
406 //
407 if((this->flags()
408 & (regbase::no_perl_ex|regbase::mod_x))
409 == regbase::mod_x)
410 {
411 while((m_position != m_end) && !is_separator(*m_position++)){}
412 return true;
413 }
414 BOOST_FALLTHROUGH;
415 default:
416 result = parse_literal();
417 break;
418 }
419 return result;
420}
421#ifdef BOOST_MSVC
20effc67
TL
422# pragma warning(pop)
423#endif
424#ifdef BOOST_MSVC
7c673cae
FG
425#pragma warning(pop)
426#endif
427
428template <class charT, class traits>
429bool basic_regex_parser<charT, traits>::parse_literal()
430{
431 // append this as a literal provided it's not a space character
432 // or the perl option regbase::mod_x is not set:
433 if(
434 ((this->flags()
435 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
436 != regbase::mod_x)
437 || !this->m_traits.isctype(*m_position, this->m_mask_space))
438 this->append_literal(*m_position);
439 ++m_position;
440 return true;
441}
442
443template <class charT, class traits>
444bool basic_regex_parser<charT, traits>::parse_open_paren()
445{
446 //
447 // skip the '(' and error check:
448 //
449 if(++m_position == m_end)
450 {
451 fail(regex_constants::error_paren, m_position - m_base);
452 return false;
453 }
454 //
455 // begin by checking for a perl-style (?...) extension:
456 //
457 if(
458 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
459 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
460 )
461 {
462 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
463 return parse_perl_extension();
464 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
465 return parse_perl_verb();
466 }
467 //
468 // update our mark count, and append the required state:
469 //
470 unsigned markid = 0;
471 if(0 == (this->flags() & regbase::nosubs))
472 {
473 markid = ++m_mark_count;
474#ifndef BOOST_NO_STD_DISTANCE
475 if(this->flags() & regbase::save_subexpression_location)
476 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
477#else
478 if(this->flags() & regbase::save_subexpression_location)
479 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
480#endif
481 }
482 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
483 pb->index = markid;
484 pb->icase = this->flags() & regbase::icase;
485 std::ptrdiff_t last_paren_start = this->getoffset(pb);
486 // back up insertion point for alternations, and set new point:
487 std::ptrdiff_t last_alt_point = m_alt_insert_point;
488 this->m_pdata->m_data.align();
489 m_alt_insert_point = this->m_pdata->m_data.size();
490 //
491 // back up the current flags in case we have a nested (?imsx) group:
492 //
493 regex_constants::syntax_option_type opts = this->flags();
494 bool old_case_change = m_has_case_change;
495 m_has_case_change = false; // no changes to this scope as yet...
496 //
497 // Back up branch reset data in case we have a nested (?|...)
498 //
499 int mark_reset = m_mark_reset;
500 m_mark_reset = -1;
501 //
502 // now recursively add more states, this will terminate when we get to a
503 // matching ')' :
504 //
505 parse_all();
506 //
507 // Unwind pushed alternatives:
508 //
509 if(0 == unwind_alts(last_paren_start))
510 return false;
511 //
512 // restore flags:
513 //
514 if(m_has_case_change)
515 {
516 // the case has changed in one or more of the alternatives
517 // within the scoped (...) block: we have to add a state
518 // to reset the case sensitivity:
519 static_cast<re_case*>(
520 this->append_state(syntax_element_toggle_case, sizeof(re_case))
521 )->icase = opts & regbase::icase;
522 }
523 this->flags(opts);
524 m_has_case_change = old_case_change;
525 //
526 // restore branch reset:
527 //
528 m_mark_reset = mark_reset;
529 //
530 // we either have a ')' or we have run out of characters prematurely:
531 //
532 if(m_position == m_end)
533 {
534 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
535 return false;
536 }
b32b8144
FG
537 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
538 return false;
7c673cae
FG
539#ifndef BOOST_NO_STD_DISTANCE
540 if(markid && (this->flags() & regbase::save_subexpression_location))
541 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
542#else
543 if(markid && (this->flags() & regbase::save_subexpression_location))
544 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
545#endif
546 ++m_position;
547 //
548 // append closing parenthesis state:
549 //
550 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
551 pb->index = markid;
552 pb->icase = this->flags() & regbase::icase;
553 this->m_paren_start = last_paren_start;
554 //
555 // restore the alternate insertion point:
556 //
557 this->m_alt_insert_point = last_alt_point;
558 //
559 // allow backrefs to this mark:
560 //
20effc67
TL
561 if(markid > 0)
562 this->m_backrefs.set(markid);
7c673cae
FG
563
564 return true;
565}
566
567template <class charT, class traits>
568bool basic_regex_parser<charT, traits>::parse_basic_escape()
569{
570 if(++m_position == m_end)
571 {
572 fail(regex_constants::error_paren, m_position - m_base);
573 return false;
574 }
575 bool result = true;
576 switch(this->m_traits.escape_syntax_type(*m_position))
577 {
578 case regex_constants::syntax_open_mark:
579 return parse_open_paren();
580 case regex_constants::syntax_close_mark:
581 return false;
582 case regex_constants::syntax_plus:
583 if(this->flags() & regex_constants::bk_plus_qm)
584 {
585 ++m_position;
586 return parse_repeat(1);
587 }
588 else
589 return parse_literal();
590 case regex_constants::syntax_question:
591 if(this->flags() & regex_constants::bk_plus_qm)
592 {
593 ++m_position;
594 return parse_repeat(0, 1);
595 }
596 else
597 return parse_literal();
598 case regex_constants::syntax_open_brace:
599 if(this->flags() & regbase::no_intervals)
600 return parse_literal();
601 ++m_position;
602 return parse_repeat_range(true);
603 case regex_constants::syntax_close_brace:
604 if(this->flags() & regbase::no_intervals)
605 return parse_literal();
606 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
607 return false;
608 case regex_constants::syntax_or:
609 if(this->flags() & regbase::bk_vbar)
610 return parse_alt();
611 else
612 result = parse_literal();
613 break;
614 case regex_constants::syntax_digit:
615 return parse_backref();
616 case regex_constants::escape_type_start_buffer:
617 if(this->flags() & regbase::emacs_ex)
618 {
619 ++m_position;
620 this->append_state(syntax_element_buffer_start);
621 }
622 else
623 result = parse_literal();
624 break;
625 case regex_constants::escape_type_end_buffer:
626 if(this->flags() & regbase::emacs_ex)
627 {
628 ++m_position;
629 this->append_state(syntax_element_buffer_end);
630 }
631 else
632 result = parse_literal();
633 break;
634 case regex_constants::escape_type_word_assert:
635 if(this->flags() & regbase::emacs_ex)
636 {
637 ++m_position;
638 this->append_state(syntax_element_word_boundary);
639 }
640 else
641 result = parse_literal();
642 break;
643 case regex_constants::escape_type_not_word_assert:
644 if(this->flags() & regbase::emacs_ex)
645 {
646 ++m_position;
647 this->append_state(syntax_element_within_word);
648 }
649 else
650 result = parse_literal();
651 break;
652 case regex_constants::escape_type_left_word:
653 if(this->flags() & regbase::emacs_ex)
654 {
655 ++m_position;
656 this->append_state(syntax_element_word_start);
657 }
658 else
659 result = parse_literal();
660 break;
661 case regex_constants::escape_type_right_word:
662 if(this->flags() & regbase::emacs_ex)
663 {
664 ++m_position;
665 this->append_state(syntax_element_word_end);
666 }
667 else
668 result = parse_literal();
669 break;
670 default:
671 if(this->flags() & regbase::emacs_ex)
672 {
673 bool negate = true;
674 switch(*m_position)
675 {
676 case 'w':
677 negate = false;
678 BOOST_FALLTHROUGH;
679 case 'W':
680 {
681 basic_char_set<charT, traits> char_set;
682 if(negate)
683 char_set.negate();
684 char_set.add_class(this->m_word_mask);
685 if(0 == this->append_set(char_set))
686 {
687 fail(regex_constants::error_ctype, m_position - m_base);
688 return false;
689 }
690 ++m_position;
691 return true;
692 }
693 case 's':
694 negate = false;
695 BOOST_FALLTHROUGH;
696 case 'S':
697 return add_emacs_code(negate);
698 case 'c':
699 case 'C':
700 // not supported yet:
701 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
702 return false;
703 default:
704 break;
705 }
706 }
707 result = parse_literal();
708 break;
709 }
710 return result;
711}
712
713template <class charT, class traits>
714bool basic_regex_parser<charT, traits>::parse_extended_escape()
715{
716 ++m_position;
717 if(m_position == m_end)
718 {
719 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
720 return false;
721 }
722 bool negate = false; // in case this is a character class escape: \w \d etc
723 switch(this->m_traits.escape_syntax_type(*m_position))
724 {
725 case regex_constants::escape_type_not_class:
726 negate = true;
727 BOOST_FALLTHROUGH;
728 case regex_constants::escape_type_class:
729 {
730escape_type_class_jump:
731 typedef typename traits::char_class_type m_type;
732 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
733 if(m != 0)
734 {
735 basic_char_set<charT, traits> char_set;
736 if(negate)
737 char_set.negate();
738 char_set.add_class(m);
739 if(0 == this->append_set(char_set))
740 {
741 fail(regex_constants::error_ctype, m_position - m_base);
742 return false;
743 }
744 ++m_position;
745 return true;
746 }
747 //
748 // not a class, just a regular unknown escape:
749 //
750 this->append_literal(unescape_character());
751 break;
752 }
753 case regex_constants::syntax_digit:
754 return parse_backref();
755 case regex_constants::escape_type_left_word:
756 ++m_position;
757 this->append_state(syntax_element_word_start);
758 break;
759 case regex_constants::escape_type_right_word:
760 ++m_position;
761 this->append_state(syntax_element_word_end);
762 break;
763 case regex_constants::escape_type_start_buffer:
764 ++m_position;
765 this->append_state(syntax_element_buffer_start);
766 break;
767 case regex_constants::escape_type_end_buffer:
768 ++m_position;
769 this->append_state(syntax_element_buffer_end);
770 break;
771 case regex_constants::escape_type_word_assert:
772 ++m_position;
773 this->append_state(syntax_element_word_boundary);
774 break;
775 case regex_constants::escape_type_not_word_assert:
776 ++m_position;
777 this->append_state(syntax_element_within_word);
778 break;
779 case regex_constants::escape_type_Z:
780 ++m_position;
781 this->append_state(syntax_element_soft_buffer_end);
782 break;
783 case regex_constants::escape_type_Q:
784 return parse_QE();
785 case regex_constants::escape_type_C:
786 return parse_match_any();
787 case regex_constants::escape_type_X:
788 ++m_position;
789 this->append_state(syntax_element_combining);
790 break;
791 case regex_constants::escape_type_G:
792 ++m_position;
793 this->append_state(syntax_element_restart_continue);
794 break;
795 case regex_constants::escape_type_not_property:
796 negate = true;
797 BOOST_FALLTHROUGH;
798 case regex_constants::escape_type_property:
799 {
800 ++m_position;
801 char_class_type m;
802 if(m_position == m_end)
803 {
804 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
805 return false;
806 }
807 // maybe have \p{ddd}
808 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
809 {
810 const charT* base = m_position;
811 // skip forward until we find enclosing brace:
812 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
813 ++m_position;
814 if(m_position == m_end)
815 {
816 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
817 return false;
818 }
819 m = this->m_traits.lookup_classname(++base, m_position++);
820 }
821 else
822 {
823 m = this->m_traits.lookup_classname(m_position, m_position+1);
824 ++m_position;
825 }
826 if(m != 0)
827 {
828 basic_char_set<charT, traits> char_set;
829 if(negate)
830 char_set.negate();
831 char_set.add_class(m);
832 if(0 == this->append_set(char_set))
833 {
834 fail(regex_constants::error_ctype, m_position - m_base);
835 return false;
836 }
837 return true;
838 }
839 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
840 return false;
841 }
842 case regex_constants::escape_type_reset_start_mark:
843 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
844 {
845 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
846 pb->index = -5;
847 pb->icase = this->flags() & regbase::icase;
848 this->m_pdata->m_data.align();
849 ++m_position;
850 return true;
851 }
852 goto escape_type_class_jump;
853 case regex_constants::escape_type_line_ending:
854 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
855 {
856 const charT* e = get_escape_R_string<charT>();
857 const charT* old_position = m_position;
858 const charT* old_end = m_end;
859 const charT* old_base = m_base;
860 m_position = e;
861 m_base = e;
862 m_end = e + traits::length(e);
863 bool r = parse_all();
864 m_position = ++old_position;
865 m_end = old_end;
866 m_base = old_base;
867 return r;
868 }
869 goto escape_type_class_jump;
870 case regex_constants::escape_type_extended_backref:
871 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
872 {
873 bool have_brace = false;
874 bool negative = false;
20effc67 875 static const char incomplete_message[] = "Incomplete \\g escape found.";
7c673cae
FG
876 if(++m_position == m_end)
877 {
878 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
879 return false;
880 }
881 // maybe have \g{ddd}
882 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
883 regex_constants::syntax_type syn_end = 0;
884 if((syn == regex_constants::syntax_open_brace)
885 || (syn == regex_constants::escape_type_left_word)
886 || (syn == regex_constants::escape_type_end_buffer))
887 {
888 if(++m_position == m_end)
889 {
890 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
891 return false;
892 }
893 have_brace = true;
894 switch(syn)
895 {
896 case regex_constants::syntax_open_brace:
897 syn_end = regex_constants::syntax_close_brace;
898 break;
899 case regex_constants::escape_type_left_word:
900 syn_end = regex_constants::escape_type_right_word;
901 break;
902 default:
903 syn_end = regex_constants::escape_type_end_buffer;
904 break;
905 }
906 }
907 negative = (*m_position == static_cast<charT>('-'));
908 if((negative) && (++m_position == m_end))
909 {
910 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
911 return false;
912 }
913 const charT* pc = m_position;
914 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
915 if((i < 0) && syn_end)
916 {
917 // Check for a named capture, get the leftmost one if there is more than one:
918 const charT* base = m_position;
919 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
920 {
921 ++m_position;
922 }
923 i = hash_value_from_capture_name(base, m_position);
924 pc = m_position;
925 }
926 if(negative)
20effc67
TL
927 i = 1 + (static_cast<boost::intmax_t>(m_mark_count) - i);
928 if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test(i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id(i)))))
7c673cae
FG
929 {
930 m_position = pc;
931 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
932 pb->index = i;
933 pb->icase = this->flags() & regbase::icase;
934 }
935 else
936 {
937 fail(regex_constants::error_backref, m_position - m_base);
938 return false;
939 }
940 m_position = pc;
941 if(have_brace)
942 {
943 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
944 {
945 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
946 return false;
947 }
948 ++m_position;
949 }
950 return true;
951 }
952 goto escape_type_class_jump;
953 case regex_constants::escape_type_control_v:
954 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
955 goto escape_type_class_jump;
956 BOOST_FALLTHROUGH;
957 default:
958 this->append_literal(unescape_character());
959 break;
960 }
961 return true;
962}
963
964template <class charT, class traits>
965bool basic_regex_parser<charT, traits>::parse_match_any()
966{
967 //
968 // we have a '.' that can match any character:
969 //
970 ++m_position;
971 static_cast<re_dot*>(
972 this->append_state(syntax_element_wild, sizeof(re_dot))
973 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
974 ? BOOST_REGEX_DETAIL_NS::force_not_newline
975 : this->flags() & regbase::mod_s ?
976 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
977 return true;
978}
979
980template <class charT, class traits>
981bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
982{
983 bool greedy = true;
1e59de90 984 bool possessive = false;
7c673cae
FG
985 std::size_t insert_point;
986 //
987 // when we get to here we may have a non-greedy ? mark still to come:
988 //
989 if((m_position != m_end)
990 && (
991 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
992 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
993 )
994 )
995 {
996 // OK we have a perl or emacs regex, check for a '?':
b32b8144
FG
997 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
998 {
999 // whitespace skip:
1000 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1001 ++m_position;
1002 }
1003 if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
7c673cae
FG
1004 {
1005 greedy = false;
1006 ++m_position;
1007 }
1e59de90 1008 // for perl regexes only check for possessive ++ repeats.
7c673cae
FG
1009 if((m_position != m_end)
1010 && (0 == (this->flags() & regbase::main_option_type))
1011 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
1012 {
1e59de90 1013 possessive = true;
7c673cae
FG
1014 ++m_position;
1015 }
1016 }
1017 if(0 == this->m_last_state)
1018 {
1019 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
1020 return false;
1021 }
1022 if(this->m_last_state->type == syntax_element_endmark)
1023 {
1024 // insert a repeat before the '(' matching the last ')':
1025 insert_point = this->m_paren_start;
1026 }
1027 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
1028 {
1029 // the last state was a literal with more than one character, split it in two:
1030 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1031 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1032 lit->length -= 1;
1033 // now append new state:
1034 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1035 lit->length = 1;
1036 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1037 insert_point = this->getoffset(this->m_last_state);
1038 }
1039 else
1040 {
1041 // repeat the last state whatever it was, need to add some error checking here:
1042 switch(this->m_last_state->type)
1043 {
1044 case syntax_element_start_line:
1045 case syntax_element_end_line:
1046 case syntax_element_word_boundary:
1047 case syntax_element_within_word:
1048 case syntax_element_word_start:
1049 case syntax_element_word_end:
1050 case syntax_element_buffer_start:
1051 case syntax_element_buffer_end:
1052 case syntax_element_alt:
1053 case syntax_element_soft_buffer_end:
1054 case syntax_element_restart_continue:
1055 case syntax_element_jump:
1056 case syntax_element_startmark:
1057 case syntax_element_backstep:
1e59de90 1058 case syntax_element_toggle_case:
7c673cae
FG
1059 // can't legally repeat any of the above:
1060 fail(regex_constants::error_badrepeat, m_position - m_base);
1061 return false;
1062 default:
1063 // do nothing...
1064 break;
1065 }
1066 insert_point = this->getoffset(this->m_last_state);
1067 }
1068 //
1069 // OK we now know what to repeat, so insert the repeat around it:
1070 //
1071 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1072 rep->min = low;
1073 rep->max = high;
1074 rep->greedy = greedy;
1075 rep->leading = false;
1076 // store our repeater position for later:
1077 std::ptrdiff_t rep_off = this->getoffset(rep);
1078 // and append a back jump to the repeat:
1079 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1080 jmp->alt.i = rep_off - this->getoffset(jmp);
1081 this->m_pdata->m_data.align();
1082 // now fill in the alt jump for the repeat:
1083 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1084 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1085 //
1e59de90 1086 // If the repeat is possessive then bracket the repeat with a (?>...)
7c673cae
FG
1087 // independent sub-expression construct:
1088 //
1e59de90 1089 if(possessive)
7c673cae
FG
1090 {
1091 if(m_position != m_end)
1092 {
1093 //
1094 // Check for illegal following quantifier, we have to do this here, because
1095 // the extra states we insert below circumvents our usual error checking :-(
1096 //
b32b8144
FG
1097 bool contin = false;
1098 do
7c673cae 1099 {
b32b8144
FG
1100 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
1101 {
1102 // whitespace skip:
1103 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1104 ++m_position;
1105 }
1106 if (m_position != m_end)
1107 {
1108 switch (this->m_traits.syntax_type(*m_position))
1109 {
1110 case regex_constants::syntax_star:
1111 case regex_constants::syntax_plus:
1112 case regex_constants::syntax_question:
1113 case regex_constants::syntax_open_brace:
1114 fail(regex_constants::error_badrepeat, m_position - m_base);
1115 return false;
1116 case regex_constants::syntax_open_mark:
1117 // Do we have a comment? If so we need to skip it here...
1118 if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
1119 && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
1120 {
1121 while ((m_position != m_end)
1122 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
1123 }
1124 contin = true;
1125 }
1126 else
1127 contin = false;
1e59de90
TL
1128 break;
1129 default:
1130 contin = false;
b32b8144
FG
1131 }
1132 }
1133 else
1134 contin = false;
1135 } while (contin);
7c673cae
FG
1136 }
1137 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1138 pb->index = -3;
1139 pb->icase = this->flags() & regbase::icase;
1140 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1141 this->m_pdata->m_data.align();
1142 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1143 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1144 pb->index = -3;
1145 pb->icase = this->flags() & regbase::icase;
1146 }
1147 return true;
1148}
1149
1150template <class charT, class traits>
1151bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1152{
20effc67 1153 static const char incomplete_message[] = "Missing } in quantified repetition.";
7c673cae
FG
1154 //
1155 // parse a repeat-range:
1156 //
1157 std::size_t min, max;
1158 boost::intmax_t v;
1159 // skip whitespace:
1160 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1161 ++m_position;
1162 if(this->m_position == this->m_end)
1163 {
1164 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1165 {
1166 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1167 return false;
1168 }
1169 // Treat the opening '{' as a literal character, rewind to start of error:
1170 --m_position;
1171 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1172 return parse_literal();
1173 }
1174 // get min:
1175 v = this->m_traits.toi(m_position, m_end, 10);
1176 // skip whitespace:
1177 if((v < 0) || (v > umax()))
1178 {
1179 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1180 {
1181 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1182 return false;
1183 }
1184 // Treat the opening '{' as a literal character, rewind to start of error:
1185 --m_position;
1186 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1187 return parse_literal();
1188 }
1189 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1190 ++m_position;
1191 if(this->m_position == this->m_end)
1192 {
1193 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1194 {
1195 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1196 return false;
1197 }
1198 // Treat the opening '{' as a literal character, rewind to start of error:
1199 --m_position;
1200 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1201 return parse_literal();
1202 }
1203 min = static_cast<std::size_t>(v);
1204 // see if we have a comma:
1205 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1206 {
1207 // move on and error check:
1208 ++m_position;
1209 // skip whitespace:
1210 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1211 ++m_position;
1212 if(this->m_position == this->m_end)
1213 {
1214 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1215 {
1216 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1217 return false;
1218 }
1219 // Treat the opening '{' as a literal character, rewind to start of error:
1220 --m_position;
1221 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1222 return parse_literal();
1223 }
1224 // get the value if any:
1225 v = this->m_traits.toi(m_position, m_end, 10);
1226 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1227 }
1228 else
1229 {
1230 // no comma, max = min:
1231 max = min;
1232 }
1233 // skip whitespace:
1234 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1235 ++m_position;
1236 // OK now check trailing }:
1237 if(this->m_position == this->m_end)
1238 {
1239 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1240 {
1241 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1242 return false;
1243 }
1244 // Treat the opening '{' as a literal character, rewind to start of error:
1245 --m_position;
1246 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1247 return parse_literal();
1248 }
1249 if(isbasic)
1250 {
1251 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1252 {
1253 ++m_position;
1254 if(this->m_position == this->m_end)
1255 {
1256 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1257 return false;
1258 }
1259 }
1260 else
1261 {
1262 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1263 return false;
1264 }
1265 }
1266 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1267 ++m_position;
1268 else
1269 {
1270 // Treat the opening '{' as a literal character, rewind to start of error:
1271 --m_position;
1272 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1273 return parse_literal();
1274 }
1275 //
1276 // finally go and add the repeat, unless error:
1277 //
1278 if(min > max)
1279 {
1280 // Backtrack to error location:
1281 m_position -= 2;
1282 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1283 ++m_position;
1284 fail(regex_constants::error_badbrace, m_position - m_base);
1285 return false;
1286 }
1287 return parse_repeat(min, max);
1288}
1289
1290template <class charT, class traits>
1291bool basic_regex_parser<charT, traits>::parse_alt()
1292{
1293 //
1294 // error check: if there have been no previous states,
1295 // or if the last state was a '(' then error:
1296 //
1297 if(
1298 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1299 &&
1300 !(
1301 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1302 &&
1303 ((this->flags() & regbase::no_empty_expressions) == 0)
1304 )
1305 )
1306 {
1307 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1308 return false;
1309 }
1310 //
1311 // Reset mark count if required:
1312 //
1313 if(m_max_mark < m_mark_count)
1314 m_max_mark = m_mark_count;
1315 if(m_mark_reset >= 0)
1316 m_mark_count = m_mark_reset;
1317
1318 ++m_position;
1319 //
1320 // we need to append a trailing jump:
1321 //
1322 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1323 std::ptrdiff_t jump_offset = this->getoffset(pj);
1324 //
1325 // now insert the alternative:
1326 //
1327 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1328 jump_offset += re_alt_size;
1329 this->m_pdata->m_data.align();
1330 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1331 //
1332 // update m_alt_insert_point so that the next alternate gets
1333 // inserted at the start of the second of the two we've just created:
1334 //
1335 this->m_alt_insert_point = this->m_pdata->m_data.size();
1336 //
1337 // the start of this alternative must have a case changes state
1338 // if the current block has messed around with case changes:
1339 //
1340 if(m_has_case_change)
1341 {
1342 static_cast<re_case*>(
1343 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1344 )->icase = this->m_icase;
1345 }
1346 //
1347 // push the alternative onto our stack, a recursive
1348 // implementation here is easier to understand (and faster
1349 // as it happens), but causes all kinds of stack overflow problems
1350 // on programs with small stacks (COM+).
1351 //
1352 m_alt_jumps.push_back(jump_offset);
1353 return true;
1354}
1355
1356template <class charT, class traits>
1357bool basic_regex_parser<charT, traits>::parse_set()
1358{
20effc67 1359 static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
7c673cae
FG
1360 ++m_position;
1361 if(m_position == m_end)
1362 {
1363 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1364 return false;
1365 }
1366 basic_char_set<charT, traits> char_set;
1367
1368 const charT* base = m_position; // where the '[' was
1369 const charT* item_base = m_position; // where the '[' or '^' was
1370
1371 while(m_position != m_end)
1372 {
1373 switch(this->m_traits.syntax_type(*m_position))
1374 {
1375 case regex_constants::syntax_caret:
1376 if(m_position == base)
1377 {
1378 char_set.negate();
1379 ++m_position;
1380 item_base = m_position;
1381 }
1382 else
1383 parse_set_literal(char_set);
1384 break;
1385 case regex_constants::syntax_close_set:
1386 if(m_position == item_base)
1387 {
1388 parse_set_literal(char_set);
1389 break;
1390 }
1391 else
1392 {
1393 ++m_position;
1394 if(0 == this->append_set(char_set))
1395 {
1396 fail(regex_constants::error_ctype, m_position - m_base);
1397 return false;
1398 }
1399 }
1400 return true;
1401 case regex_constants::syntax_open_set:
1402 if(parse_inner_set(char_set))
1403 break;
1404 return true;
1405 case regex_constants::syntax_escape:
1406 {
1407 //
1408 // look ahead and see if this is a character class shortcut
1409 // \d \w \s etc...
1410 //
1411 ++m_position;
1412 if(this->m_traits.escape_syntax_type(*m_position)
1413 == regex_constants::escape_type_class)
1414 {
1415 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1416 if(m != 0)
1417 {
1418 char_set.add_class(m);
1419 ++m_position;
1420 break;
1421 }
1422 }
1423 else if(this->m_traits.escape_syntax_type(*m_position)
1424 == regex_constants::escape_type_not_class)
1425 {
1426 // negated character class:
1427 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1428 if(m != 0)
1429 {
1430 char_set.add_negated_class(m);
1431 ++m_position;
1432 break;
1433 }
1434 }
1435 // not a character class, just a regular escape:
1436 --m_position;
1437 parse_set_literal(char_set);
1438 break;
1439 }
1440 default:
1441 parse_set_literal(char_set);
1442 break;
1443 }
1444 }
1445 return m_position != m_end;
1446}
1447
1448template <class charT, class traits>
1449bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1450{
20effc67 1451 static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
7c673cae
FG
1452 //
1453 // we have either a character class [:name:]
1454 // a collating element [.name.]
1455 // or an equivalence class [=name=]
1456 //
1457 if(m_end == ++m_position)
1458 {
1459 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1460 return false;
1461 }
1462 switch(this->m_traits.syntax_type(*m_position))
1463 {
1464 case regex_constants::syntax_dot:
1465 //
1466 // a collating element is treated as a literal:
1467 //
1468 --m_position;
1469 parse_set_literal(char_set);
1470 return true;
1471 case regex_constants::syntax_colon:
1472 {
1473 // check that character classes are actually enabled:
1474 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1475 == (regbase::basic_syntax_group | regbase::no_char_classes))
1476 {
1477 --m_position;
1478 parse_set_literal(char_set);
1479 return true;
1480 }
1481 // skip the ':'
1482 if(m_end == ++m_position)
1483 {
1484 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1485 return false;
1486 }
1487 const charT* name_first = m_position;
1488 // skip at least one character, then find the matching ':]'
1489 if(m_end == ++m_position)
1490 {
1491 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1492 return false;
1493 }
1494 while((m_position != m_end)
1495 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1496 ++m_position;
1497 const charT* name_last = m_position;
1498 if(m_end == m_position)
1499 {
1500 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1501 return false;
1502 }
1503 if((m_end == ++m_position)
1504 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1505 {
1506 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1507 return false;
1508 }
1509 //
1510 // check for negated class:
1511 //
1512 bool negated = false;
1513 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1514 {
1515 ++name_first;
1516 negated = true;
1517 }
1518 typedef typename traits::char_class_type m_type;
1519 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1520 if(m == 0)
1521 {
1522 if(char_set.empty() && (name_last - name_first == 1))
1523 {
1524 // maybe a special case:
1525 ++m_position;
1526 if( (m_position != m_end)
1527 && (this->m_traits.syntax_type(*m_position)
1528 == regex_constants::syntax_close_set))
1529 {
1530 if(this->m_traits.escape_syntax_type(*name_first)
1531 == regex_constants::escape_type_left_word)
1532 {
1533 ++m_position;
1534 this->append_state(syntax_element_word_start);
1535 return false;
1536 }
1537 if(this->m_traits.escape_syntax_type(*name_first)
1538 == regex_constants::escape_type_right_word)
1539 {
1540 ++m_position;
1541 this->append_state(syntax_element_word_end);
1542 return false;
1543 }
1544 }
1545 }
1546 fail(regex_constants::error_ctype, name_first - m_base);
1547 return false;
1548 }
1e59de90 1549 if(!negated)
7c673cae
FG
1550 char_set.add_class(m);
1551 else
1552 char_set.add_negated_class(m);
1553 ++m_position;
1554 break;
1555 }
1556 case regex_constants::syntax_equal:
1557 {
1558 // skip the '='
1559 if(m_end == ++m_position)
1560 {
1561 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1562 return false;
1563 }
1564 const charT* name_first = m_position;
1565 // skip at least one character, then find the matching '=]'
1566 if(m_end == ++m_position)
1567 {
1568 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1569 return false;
1570 }
1571 while((m_position != m_end)
1572 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1573 ++m_position;
1574 const charT* name_last = m_position;
1575 if(m_end == m_position)
1576 {
1577 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1578 return false;
1579 }
1580 if((m_end == ++m_position)
1581 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1582 {
1583 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1584 return false;
1585 }
1586 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1e59de90 1587 if(m.empty() || (m.size() > 2))
7c673cae
FG
1588 {
1589 fail(regex_constants::error_collate, name_first - m_base);
1590 return false;
1591 }
1592 digraph<charT> d;
1593 d.first = m[0];
1594 if(m.size() > 1)
1595 d.second = m[1];
1596 else
1597 d.second = 0;
1598 char_set.add_equivalent(d);
1599 ++m_position;
1600 break;
1601 }
1602 default:
1603 --m_position;
1604 parse_set_literal(char_set);
1605 break;
1606 }
1607 return true;
1608}
1609
1610template <class charT, class traits>
1611void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1612{
1613 digraph<charT> start_range(get_next_set_literal(char_set));
1614 if(m_end == m_position)
1615 {
1616 fail(regex_constants::error_brack, m_position - m_base);
1617 return;
1618 }
1619 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1620 {
1621 // we have a range:
1622 if(m_end == ++m_position)
1623 {
1624 fail(regex_constants::error_brack, m_position - m_base);
1625 return;
1626 }
1627 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1628 {
1629 digraph<charT> end_range = get_next_set_literal(char_set);
1630 char_set.add_range(start_range, end_range);
1631 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1632 {
1633 if(m_end == ++m_position)
1634 {
1635 fail(regex_constants::error_brack, m_position - m_base);
1636 return;
1637 }
1638 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1639 {
1640 // trailing - :
1641 --m_position;
1642 return;
1643 }
1644 fail(regex_constants::error_range, m_position - m_base);
1645 return;
1646 }
1647 return;
1648 }
1649 --m_position;
1650 }
1651 char_set.add_single(start_range);
1652}
1653
1654template <class charT, class traits>
1655digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1656{
1657 digraph<charT> result;
1658 switch(this->m_traits.syntax_type(*m_position))
1659 {
1660 case regex_constants::syntax_dash:
1661 if(!char_set.empty())
1662 {
1663 // see if we are at the end of the set:
1664 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1665 {
1666 fail(regex_constants::error_range, m_position - m_base);
1667 return result;
1668 }
1669 --m_position;
1670 }
1671 result.first = *m_position++;
1672 return result;
1673 case regex_constants::syntax_escape:
1674 // check to see if escapes are supported first:
1675 if(this->flags() & regex_constants::no_escape_in_lists)
1676 {
1677 result = *m_position++;
1678 break;
1679 }
1680 ++m_position;
1681 result = unescape_character();
1682 break;
1683 case regex_constants::syntax_open_set:
1684 {
1685 if(m_end == ++m_position)
1686 {
1687 fail(regex_constants::error_collate, m_position - m_base);
1688 return result;
1689 }
1690 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1691 {
1692 --m_position;
1693 result.first = *m_position;
1694 ++m_position;
1695 return result;
1696 }
1697 if(m_end == ++m_position)
1698 {
1699 fail(regex_constants::error_collate, m_position - m_base);
1700 return result;
1701 }
1702 const charT* name_first = m_position;
1703 // skip at least one character, then find the matching ':]'
1704 if(m_end == ++m_position)
1705 {
1706 fail(regex_constants::error_collate, name_first - m_base);
1707 return result;
1708 }
1709 while((m_position != m_end)
1710 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1711 ++m_position;
1712 const charT* name_last = m_position;
1713 if(m_end == m_position)
1714 {
1715 fail(regex_constants::error_collate, name_first - m_base);
1716 return result;
1717 }
1718 if((m_end == ++m_position)
1719 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1720 {
1721 fail(regex_constants::error_collate, name_first - m_base);
1722 return result;
1723 }
1724 ++m_position;
1725 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1726 if(s.empty() || (s.size() > 2))
1727 {
1728 fail(regex_constants::error_collate, name_first - m_base);
1729 return result;
1730 }
1731 result.first = s[0];
1732 if(s.size() > 1)
1733 result.second = s[1];
1734 else
1735 result.second = 0;
1736 return result;
1737 }
1738 default:
1739 result = *m_position++;
1740 }
1741 return result;
1742}
1743
1744//
1745// does a value fit in the specified charT type?
1746//
1747template <class charT>
1748bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1749{
1750 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1751}
1752template <class charT>
1753bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1754{
1755 return true; // v will alsways fit in a charT
1756}
1757template <class charT>
1758bool valid_value(charT c, boost::intmax_t v)
1759{
1760 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1761}
1762
1763template <class charT, class traits>
1764charT basic_regex_parser<charT, traits>::unescape_character()
1765{
1766#ifdef BOOST_MSVC
1767#pragma warning(push)
1768#pragma warning(disable:4127)
1769#endif
1770 charT result(0);
1771 if(m_position == m_end)
1772 {
1773 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1774 return false;
1775 }
1776 switch(this->m_traits.escape_syntax_type(*m_position))
1777 {
1778 case regex_constants::escape_type_control_a:
1779 result = charT('\a');
1780 break;
1781 case regex_constants::escape_type_e:
1782 result = charT(27);
1783 break;
1784 case regex_constants::escape_type_control_f:
1785 result = charT('\f');
1786 break;
1787 case regex_constants::escape_type_control_n:
1788 result = charT('\n');
1789 break;
1790 case regex_constants::escape_type_control_r:
1791 result = charT('\r');
1792 break;
1793 case regex_constants::escape_type_control_t:
1794 result = charT('\t');
1795 break;
1796 case regex_constants::escape_type_control_v:
1797 result = charT('\v');
1798 break;
1799 case regex_constants::escape_type_word_assert:
1800 result = charT('\b');
1801 break;
1802 case regex_constants::escape_type_ascii_control:
1803 ++m_position;
1804 if(m_position == m_end)
1805 {
1806 // Rewind to start of escape:
1807 --m_position;
1808 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1809 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1810 return result;
1811 }
1812 result = static_cast<charT>(*m_position % 32);
1813 break;
1814 case regex_constants::escape_type_hex:
1815 ++m_position;
1816 if(m_position == m_end)
1817 {
1818 // Rewind to start of escape:
1819 --m_position;
1820 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1821 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1822 return result;
1823 }
1824 // maybe have \x{ddd}
1825 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1826 {
1827 ++m_position;
1828 if(m_position == m_end)
1829 {
1830 // Rewind to start of escape:
1831 --m_position;
1832 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1833 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1834 return result;
1835 }
1836 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1837 if((m_position == m_end)
1838 || (i < 0)
1839 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1840 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1841 {
1842 // Rewind to start of escape:
1843 --m_position;
1844 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1845 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1846 return result;
1847 }
1848 ++m_position;
1849 result = charT(i);
1850 }
1851 else
1852 {
1853 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1854 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1855 if((i < 0)
1856 || !valid_value(charT(0), i))
1857 {
1858 // Rewind to start of escape:
1859 --m_position;
1860 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1861 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1862 return result;
1863 }
1864 result = charT(i);
1865 }
1866 return result;
1867 case regex_constants::syntax_digit:
1868 {
1869 // an octal escape sequence, the first character must be a zero
1870 // followed by up to 3 octal digits:
1871 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1872 const charT* bp = m_position;
1873 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1874 if(val != 0)
1875 {
1876 // Rewind to start of escape:
1877 --m_position;
1878 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1879 // Oops not an octal escape after all:
1880 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1881 return result;
1882 }
1883 val = this->m_traits.toi(m_position, m_position + len, 8);
1884 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1885 {
1886 // Rewind to start of escape:
1887 --m_position;
1888 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1889 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1890 return result;
1891 }
1892 return static_cast<charT>(val);
1893 }
1894 case regex_constants::escape_type_named_char:
1895 {
1896 ++m_position;
1897 if(m_position == m_end)
1898 {
1899 // Rewind to start of escape:
1900 --m_position;
1901 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1902 fail(regex_constants::error_escape, m_position - m_base);
1903 return false;
1904 }
1905 // maybe have \N{name}
1906 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1907 {
1908 const charT* base = m_position;
1909 // skip forward until we find enclosing brace:
1910 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1911 ++m_position;
1912 if(m_position == m_end)
1913 {
1914 // Rewind to start of escape:
1915 --m_position;
1916 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1917 fail(regex_constants::error_escape, m_position - m_base);
1918 return false;
1919 }
1920 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1921 if(s.empty())
1922 {
1923 // Rewind to start of escape:
1924 --m_position;
1925 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1926 fail(regex_constants::error_collate, m_position - m_base);
1927 return false;
1928 }
1929 if(s.size() == 1)
1930 {
1931 return s[0];
1932 }
1933 }
1934 // fall through is a failure:
1935 // Rewind to start of escape:
1936 --m_position;
1937 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1938 fail(regex_constants::error_escape, m_position - m_base);
1939 return false;
1940 }
1941 default:
1942 result = *m_position;
1943 break;
1944 }
1945 ++m_position;
1946 return result;
1947#ifdef BOOST_MSVC
1948#pragma warning(pop)
1949#endif
1950}
1951
1952template <class charT, class traits>
1953bool basic_regex_parser<charT, traits>::parse_backref()
1954{
1e59de90 1955 BOOST_REGEX_ASSERT(m_position != m_end);
7c673cae
FG
1956 const charT* pc = m_position;
1957 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1958 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1959 {
1960 // not a backref at all but an octal escape sequence:
1961 charT c = unescape_character();
1962 this->append_literal(c);
1963 }
20effc67 1964 else if((i > 0) && (this->m_backrefs.test(i)))
7c673cae
FG
1965 {
1966 m_position = pc;
1967 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1968 pb->index = i;
1969 pb->icase = this->flags() & regbase::icase;
1970 }
1971 else
1972 {
1973 // Rewind to start of escape:
1974 --m_position;
1975 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1976 fail(regex_constants::error_backref, m_position - m_base);
1977 return false;
1978 }
1979 return true;
1980}
1981
1982template <class charT, class traits>
1983bool basic_regex_parser<charT, traits>::parse_QE()
1984{
1985#ifdef BOOST_MSVC
1986#pragma warning(push)
1987#pragma warning(disable:4127)
1988#endif
1989 //
1990 // parse a \Q...\E sequence:
1991 //
1992 ++m_position; // skip the Q
1993 const charT* start = m_position;
1994 const charT* end;
1995 do
1996 {
1997 while((m_position != m_end)
1998 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1999 ++m_position;
2000 if(m_position == m_end)
2001 {
2002 // a \Q...\E sequence may terminate with the end of the expression:
2003 end = m_position;
2004 break;
2005 }
2006 if(++m_position == m_end) // skip the escape
2007 {
2008 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
2009 return false;
2010 }
2011 // check to see if it's a \E:
2012 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
2013 {
2014 ++m_position;
2015 end = m_position - 2;
2016 break;
2017 }
2018 // otherwise go round again:
2019 }while(true);
2020 //
2021 // now add all the character between the two escapes as literals:
2022 //
2023 while(start != end)
2024 {
2025 this->append_literal(*start);
2026 ++start;
2027 }
2028 return true;
2029#ifdef BOOST_MSVC
2030#pragma warning(pop)
2031#endif
2032}
2033
2034template <class charT, class traits>
2035bool basic_regex_parser<charT, traits>::parse_perl_extension()
2036{
2037 if(++m_position == m_end)
2038 {
2039 // Rewind to start of (? sequence:
2040 --m_position;
2041 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2042 fail(regex_constants::error_perl_extension, m_position - m_base);
2043 return false;
2044 }
2045 //
2046 // treat comments as a special case, as these
2047 // are the only ones that don't start with a leading
2048 // startmark state:
2049 //
2050 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
2051 {
2052 while((m_position != m_end)
2053 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
b32b8144 2054 {}
7c673cae
FG
2055 return true;
2056 }
2057 //
2058 // backup some state, and prepare the way:
2059 //
2060 int markid = 0;
2061 std::ptrdiff_t jump_offset = 0;
2062 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2063 pb->icase = this->flags() & regbase::icase;
2064 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2065 // back up insertion point for alternations, and set new point:
2066 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2067 this->m_pdata->m_data.align();
2068 m_alt_insert_point = this->m_pdata->m_data.size();
2069 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2070 bool restore_flags = true;
2071 regex_constants::syntax_option_type old_flags = this->flags();
2072 bool old_case_change = m_has_case_change;
2073 m_has_case_change = false;
2074 charT name_delim;
2075 int mark_reset = m_mark_reset;
2076 int max_mark = m_max_mark;
2077 m_mark_reset = -1;
2078 m_max_mark = m_mark_count;
2079 boost::intmax_t v;
2080 //
2081 // select the actual extension used:
2082 //
2083 switch(this->m_traits.syntax_type(*m_position))
2084 {
2085 case regex_constants::syntax_or:
2086 m_mark_reset = m_mark_count;
2087 BOOST_FALLTHROUGH;
2088 case regex_constants::syntax_colon:
2089 //
2090 // a non-capturing mark:
2091 //
2092 pb->index = markid = 0;
2093 ++m_position;
2094 break;
2095 case regex_constants::syntax_digit:
2096 {
2097 //
2098 // a recursive subexpression:
2099 //
2100 v = this->m_traits.toi(m_position, m_end, 10);
2101 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2102 {
2103 // Rewind to start of (? sequence:
2104 --m_position;
2105 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2106 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2107 return false;
2108 }
2109insert_recursion:
2110 pb->index = markid = 0;
2111 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2112 pr->alt.i = v;
2113 pr->state_id = 0;
2114 static_cast<re_case*>(
2115 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2116 )->icase = this->flags() & regbase::icase;
2117 break;
2118 }
2119 case regex_constants::syntax_plus:
2120 //
2121 // A forward-relative recursive subexpression:
2122 //
2123 ++m_position;
2124 v = this->m_traits.toi(m_position, m_end, 10);
2125 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2126 {
2127 // Rewind to start of (? sequence:
2128 --m_position;
2129 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2130 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2131 return false;
2132 }
b32b8144
FG
2133 if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
2134 {
2135 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2136 return false;
2137 }
7c673cae
FG
2138 v += m_mark_count;
2139 goto insert_recursion;
2140 case regex_constants::syntax_dash:
2141 //
2142 // Possibly a backward-relative recursive subexpression:
2143 //
2144 ++m_position;
2145 v = this->m_traits.toi(m_position, m_end, 10);
2146 if(v <= 0)
2147 {
2148 --m_position;
2149 // Oops not a relative recursion at all, but a (?-imsx) group:
2150 goto option_group_jump;
2151 }
20effc67 2152 v = static_cast<boost::intmax_t>(m_mark_count) + 1 - v;
7c673cae
FG
2153 if(v <= 0)
2154 {
2155 // Rewind to start of (? sequence:
2156 --m_position;
2157 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2158 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2159 return false;
2160 }
2161 goto insert_recursion;
2162 case regex_constants::syntax_equal:
2163 pb->index = markid = -1;
2164 ++m_position;
2165 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2166 this->m_pdata->m_data.align();
2167 m_alt_insert_point = this->m_pdata->m_data.size();
2168 break;
2169 case regex_constants::syntax_not:
2170 pb->index = markid = -2;
2171 ++m_position;
2172 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2173 this->m_pdata->m_data.align();
2174 m_alt_insert_point = this->m_pdata->m_data.size();
2175 break;
2176 case regex_constants::escape_type_left_word:
2177 {
2178 // a lookbehind assertion:
2179 if(++m_position == m_end)
2180 {
2181 // Rewind to start of (? sequence:
2182 --m_position;
2183 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2184 fail(regex_constants::error_perl_extension, m_position - m_base);
2185 return false;
2186 }
2187 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2188 if(t == regex_constants::syntax_not)
2189 pb->index = markid = -2;
2190 else if(t == regex_constants::syntax_equal)
2191 pb->index = markid = -1;
2192 else
2193 {
2194 // Probably a named capture which also starts (?< :
2195 name_delim = '>';
2196 --m_position;
2197 goto named_capture_jump;
2198 }
2199 ++m_position;
2200 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2201 this->append_state(syntax_element_backstep, sizeof(re_brace));
2202 this->m_pdata->m_data.align();
2203 m_alt_insert_point = this->m_pdata->m_data.size();
2204 break;
2205 }
2206 case regex_constants::escape_type_right_word:
2207 //
2208 // an independent sub-expression:
2209 //
2210 pb->index = markid = -3;
2211 ++m_position;
2212 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2213 this->m_pdata->m_data.align();
2214 m_alt_insert_point = this->m_pdata->m_data.size();
2215 break;
2216 case regex_constants::syntax_open_mark:
2217 {
2218 // a conditional expression:
2219 pb->index = markid = -4;
2220 if(++m_position == m_end)
2221 {
2222 // Rewind to start of (? sequence:
2223 --m_position;
2224 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2225 fail(regex_constants::error_perl_extension, m_position - m_base);
2226 return false;
2227 }
2228 v = this->m_traits.toi(m_position, m_end, 10);
2229 if(m_position == m_end)
2230 {
2231 // Rewind to start of (? sequence:
2232 --m_position;
2233 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2234 fail(regex_constants::error_perl_extension, m_position - m_base);
2235 return false;
2236 }
2237 if(*m_position == charT('R'))
2238 {
2239 if(++m_position == m_end)
2240 {
2241 // Rewind to start of (? sequence:
2242 --m_position;
2243 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2244 fail(regex_constants::error_perl_extension, m_position - m_base);
2245 return false;
2246 }
2247 if(*m_position == charT('&'))
2248 {
2249 const charT* base = ++m_position;
2250 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2251 ++m_position;
2252 if(m_position == m_end)
2253 {
2254 // Rewind to start of (? sequence:
2255 --m_position;
2256 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2257 fail(regex_constants::error_perl_extension, m_position - m_base);
2258 return false;
2259 }
2260 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2261 }
2262 else
2263 {
2264 v = -this->m_traits.toi(m_position, m_end, 10);
2265 }
2266 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2267 br->index = v < 0 ? (v - 1) : 0;
2268 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2269 {
2270 // Rewind to start of (? sequence:
2271 --m_position;
2272 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2273 fail(regex_constants::error_perl_extension, m_position - m_base);
2274 return false;
2275 }
2276 if(++m_position == m_end)
2277 {
2278 // Rewind to start of (? sequence:
2279 --m_position;
2280 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2281 fail(regex_constants::error_perl_extension, m_position - m_base);
2282 return false;
2283 }
2284 }
2285 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2286 {
2287 const charT* base = ++m_position;
2288 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2289 ++m_position;
2290 if(m_position == m_end)
2291 {
2292 // Rewind to start of (? sequence:
2293 --m_position;
2294 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2295 fail(regex_constants::error_perl_extension, m_position - m_base);
2296 return false;
2297 }
2298 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2299 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2300 br->index = v;
2301 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2302 {
2303 // Rewind to start of (? sequence:
2304 --m_position;
2305 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2306 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2307 return false;
2308 }
2309 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2310 {
2311 // Rewind to start of (? sequence:
2312 --m_position;
2313 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2314 fail(regex_constants::error_perl_extension, m_position - m_base);
2315 return false;
2316 }
2317 if(++m_position == m_end)
2318 {
2319 // Rewind to start of (? sequence:
2320 --m_position;
2321 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2322 fail(regex_constants::error_perl_extension, m_position - m_base);
2323 return false;
2324 }
2325 }
2326 else if(*m_position == charT('D'))
2327 {
2328 const char* def = "DEFINE";
2329 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2330 ++m_position, ++def;
2331 if((m_position == m_end) || *def)
2332 {
2333 // Rewind to start of (? sequence:
2334 --m_position;
2335 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2336 fail(regex_constants::error_perl_extension, m_position - m_base);
2337 return false;
2338 }
2339 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2340 br->index = 9999; // special magic value!
2341 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2342 {
2343 // Rewind to start of (? sequence:
2344 --m_position;
2345 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2346 fail(regex_constants::error_perl_extension, m_position - m_base);
2347 return false;
2348 }
2349 if(++m_position == m_end)
2350 {
2351 // Rewind to start of (? sequence:
2352 --m_position;
2353 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2354 fail(regex_constants::error_perl_extension, m_position - m_base);
2355 return false;
2356 }
2357 }
2358 else if(v > 0)
2359 {
2360 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2361 br->index = v;
2362 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2363 {
2364 // Rewind to start of (? sequence:
2365 --m_position;
2366 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2367 fail(regex_constants::error_perl_extension, m_position - m_base);
2368 return false;
2369 }
2370 if(++m_position == m_end)
2371 {
2372 // Rewind to start of (? sequence:
2373 --m_position;
2374 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2375 fail(regex_constants::error_perl_extension, m_position - m_base);
2376 return false;
2377 }
2378 }
2379 else
2380 {
2381 // verify that we have a lookahead or lookbehind assert:
2382 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2383 {
2384 // Rewind to start of (? sequence:
2385 --m_position;
2386 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2387 fail(regex_constants::error_perl_extension, m_position - m_base);
2388 return false;
2389 }
2390 if(++m_position == m_end)
2391 {
2392 // Rewind to start of (? sequence:
2393 --m_position;
2394 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2395 fail(regex_constants::error_perl_extension, m_position - m_base);
2396 return false;
2397 }
2398 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2399 {
2400 if(++m_position == m_end)
2401 {
2402 // Rewind to start of (? sequence:
2403 --m_position;
2404 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2405 fail(regex_constants::error_perl_extension, m_position - m_base);
2406 return false;
2407 }
2408 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2409 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2410 {
2411 // Rewind to start of (? sequence:
2412 --m_position;
2413 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2414 fail(regex_constants::error_perl_extension, m_position - m_base);
2415 return false;
2416 }
2417 m_position -= 3;
2418 }
2419 else
2420 {
2421 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2422 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2423 {
2424 // Rewind to start of (? sequence:
2425 --m_position;
2426 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2427 fail(regex_constants::error_perl_extension, m_position - m_base);
2428 return false;
2429 }
2430 m_position -= 2;
2431 }
2432 }
2433 break;
2434 }
2435 case regex_constants::syntax_close_mark:
2436 // Rewind to start of (? sequence:
2437 --m_position;
2438 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2439 fail(regex_constants::error_perl_extension, m_position - m_base);
2440 return false;
2441 case regex_constants::escape_type_end_buffer:
2442 {
2443 name_delim = *m_position;
2444named_capture_jump:
2445 markid = 0;
2446 if(0 == (this->flags() & regbase::nosubs))
2447 {
2448 markid = ++m_mark_count;
2449 #ifndef BOOST_NO_STD_DISTANCE
2450 if(this->flags() & regbase::save_subexpression_location)
2451 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2452 #else
2453 if(this->flags() & regbase::save_subexpression_location)
2454 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2455 #endif
2456 }
2457 pb->index = markid;
2458 const charT* base = ++m_position;
2459 if(m_position == m_end)
2460 {
2461 // Rewind to start of (? sequence:
2462 --m_position;
2463 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2464 fail(regex_constants::error_perl_extension, m_position - m_base);
2465 return false;
2466 }
2467 while((m_position != m_end) && (*m_position != name_delim))
2468 ++m_position;
2469 if(m_position == m_end)
2470 {
2471 // Rewind to start of (? sequence:
2472 --m_position;
2473 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2474 fail(regex_constants::error_perl_extension, m_position - m_base);
2475 return false;
2476 }
2477 this->m_pdata->set_name(base, m_position, markid);
2478 ++m_position;
2479 break;
2480 }
2481 default:
2482 if(*m_position == charT('R'))
2483 {
2484 ++m_position;
2485 v = 0;
2486 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2487 {
2488 // Rewind to start of (? sequence:
2489 --m_position;
2490 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2491 fail(regex_constants::error_perl_extension, m_position - m_base);
2492 return false;
2493 }
2494 goto insert_recursion;
2495 }
2496 if(*m_position == charT('&'))
2497 {
2498 ++m_position;
2499 const charT* base = m_position;
2500 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2501 ++m_position;
2502 if(m_position == m_end)
2503 {
2504 // Rewind to start of (? sequence:
2505 --m_position;
2506 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2507 fail(regex_constants::error_perl_extension, m_position - m_base);
2508 return false;
2509 }
2510 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2511 goto insert_recursion;
2512 }
2513 if(*m_position == charT('P'))
2514 {
2515 ++m_position;
2516 if(m_position == m_end)
2517 {
2518 // Rewind to start of (? sequence:
2519 --m_position;
2520 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2521 fail(regex_constants::error_perl_extension, m_position - m_base);
2522 return false;
2523 }
2524 if(*m_position == charT('>'))
2525 {
2526 ++m_position;
2527 const charT* base = m_position;
2528 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2529 ++m_position;
2530 if(m_position == m_end)
2531 {
2532 // Rewind to start of (? sequence:
2533 --m_position;
2534 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2535 fail(regex_constants::error_perl_extension, m_position - m_base);
2536 return false;
2537 }
2538 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2539 goto insert_recursion;
2540 }
2541 }
2542 //
2543 // lets assume that we have a (?imsx) group and try and parse it:
2544 //
2545option_group_jump:
2546 regex_constants::syntax_option_type opts = parse_options();
2547 if(m_position == m_end)
2548 {
2549 // Rewind to start of (? sequence:
2550 --m_position;
2551 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2552 fail(regex_constants::error_perl_extension, m_position - m_base);
2553 return false;
2554 }
2555 // make a note of whether we have a case change:
2556 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2557 pb->index = markid = 0;
2558 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2559 {
2560 // update flags and carry on as normal:
2561 this->flags(opts);
2562 restore_flags = false;
2563 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2564 }
2565 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2566 {
2567 // update flags and carry on until the matching ')' is found:
2568 this->flags(opts);
2569 ++m_position;
2570 }
2571 else
2572 {
2573 // Rewind to start of (? sequence:
2574 --m_position;
2575 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2576 fail(regex_constants::error_perl_extension, m_position - m_base);
2577 return false;
2578 }
2579
2580 // finally append a case change state if we need it:
2581 if(m_has_case_change)
2582 {
2583 static_cast<re_case*>(
2584 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2585 )->icase = opts & regbase::icase;
2586 }
2587
2588 }
2589 //
2590 // now recursively add more states, this will terminate when we get to a
2591 // matching ')' :
2592 //
2593 parse_all();
2594 //
2595 // Unwind alternatives:
2596 //
2597 if(0 == unwind_alts(last_paren_start))
2598 {
2599 // Rewind to start of (? sequence:
2600 --m_position;
2601 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2602 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2603 return false;
2604 }
2605 //
2606 // we either have a ')' or we have run out of characters prematurely:
2607 //
2608 if(m_position == m_end)
2609 {
2610 // Rewind to start of (? sequence:
2611 --m_position;
2612 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2613 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2614 return false;
2615 }
1e59de90 2616 BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
7c673cae
FG
2617 ++m_position;
2618 //
2619 // restore the flags:
2620 //
2621 if(restore_flags)
2622 {
2623 // append a case change state if we need it:
2624 if(m_has_case_change)
2625 {
2626 static_cast<re_case*>(
2627 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2628 )->icase = old_flags & regbase::icase;
2629 }
2630 this->flags(old_flags);
2631 }
2632 //
2633 // set up the jump pointer if we have one:
2634 //
2635 if(jump_offset)
2636 {
2637 this->m_pdata->m_data.align();
2638 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2639 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2640 if((this->m_last_state == jmp) && (markid != -2))
2641 {
2642 // Oops... we didn't have anything inside the assertion.
2643 // Note we don't get here for negated forward lookahead as (?!)
2644 // does have some uses.
2645 // Rewind to start of (? sequence:
2646 --m_position;
2647 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2648 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2649 return false;
2650 }
2651 }
2652 //
2653 // verify that if this is conditional expression, that we do have
2654 // an alternative, if not add one:
2655 //
2656 if(markid == -4)
2657 {
2658 re_syntax_base* b = this->getaddress(expected_alt_point);
2659 // Make sure we have exactly one alternative following this state:
2660 if(b->type != syntax_element_alt)
2661 {
2662 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2663 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2664 }
b32b8144 2665 else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
7c673cae
FG
2666 {
2667 // Can't have seen more than one alternative:
2668 // Rewind to start of (? sequence:
2669 --m_position;
2670 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2671 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2672 return false;
2673 }
2674 else
2675 {
2676 // We must *not* have seen an alternative inside a (DEFINE) block:
2677 b = this->getaddress(b->next.i, b);
2678 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2679 {
2680 // Rewind to start of (? sequence:
2681 --m_position;
2682 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2683 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2684 return false;
2685 }
2686 }
2687 // check for invalid repetition of next state:
2688 b = this->getaddress(expected_alt_point);
2689 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2690 if((b->type != syntax_element_assert_backref)
2691 && (b->type != syntax_element_startmark))
2692 {
2693 // Rewind to start of (? sequence:
2694 --m_position;
2695 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2696 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2697 return false;
2698 }
2699 }
2700 //
2701 // append closing parenthesis state:
2702 //
2703 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2704 pb->index = markid;
2705 pb->icase = this->flags() & regbase::icase;
2706 this->m_paren_start = last_paren_start;
2707 //
2708 // restore the alternate insertion point:
2709 //
2710 this->m_alt_insert_point = last_alt_point;
2711 //
2712 // and the case change data:
2713 //
2714 m_has_case_change = old_case_change;
2715 //
2716 // And the mark_reset data:
2717 //
2718 if(m_max_mark > m_mark_count)
2719 {
2720 m_mark_count = m_max_mark;
2721 }
2722 m_mark_reset = mark_reset;
2723 m_max_mark = max_mark;
2724
2725
2726 if(markid > 0)
2727 {
2728#ifndef BOOST_NO_STD_DISTANCE
2729 if(this->flags() & regbase::save_subexpression_location)
20effc67 2730 this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
7c673cae
FG
2731#else
2732 if(this->flags() & regbase::save_subexpression_location)
2733 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2734#endif
2735 //
2736 // allow backrefs to this mark:
2737 //
20effc67 2738 this->m_backrefs.set(markid);
7c673cae
FG
2739 }
2740 return true;
2741}
2742
2743template <class charT, class traits>
2744bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2745{
2746 while(*verb)
2747 {
2748 if(static_cast<charT>(*verb) != *m_position)
2749 {
2750 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2751 fail(regex_constants::error_perl_extension, m_position - m_base);
2752 return false;
2753 }
2754 if(++m_position == m_end)
2755 {
2756 --m_position;
2757 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2758 fail(regex_constants::error_perl_extension, m_position - m_base);
2759 return false;
2760 }
2761 ++verb;
2762 }
2763 return true;
2764}
2765
20effc67
TL
2766#ifdef BOOST_MSVC
2767# pragma warning(push)
2768#if BOOST_MSVC >= 1800
2769#pragma warning(disable:26812)
2770#endif
2771#endif
7c673cae
FG
2772template <class charT, class traits>
2773bool basic_regex_parser<charT, traits>::parse_perl_verb()
2774{
2775 if(++m_position == m_end)
2776 {
2777 // Rewind to start of (* sequence:
2778 --m_position;
2779 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2780 fail(regex_constants::error_perl_extension, m_position - m_base);
2781 return false;
2782 }
2783 switch(*m_position)
2784 {
2785 case 'F':
2786 if(++m_position == m_end)
2787 {
2788 // Rewind to start of (* sequence:
2789 --m_position;
2790 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2791 fail(regex_constants::error_perl_extension, m_position - m_base);
2792 return false;
2793 }
2794 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2795 {
2796 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2797 {
2798 // Rewind to start of (* sequence:
2799 --m_position;
2800 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2801 fail(regex_constants::error_perl_extension, m_position - m_base);
2802 return false;
2803 }
2804 ++m_position;
2805 this->append_state(syntax_element_fail);
2806 return true;
2807 }
2808 break;
2809 case 'A':
2810 if(++m_position == m_end)
2811 {
2812 // Rewind to start of (* sequence:
2813 --m_position;
2814 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2815 fail(regex_constants::error_perl_extension, m_position - m_base);
2816 return false;
2817 }
2818 if(match_verb("CCEPT"))
2819 {
2820 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2821 {
2822 // Rewind to start of (* sequence:
2823 --m_position;
2824 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2825 fail(regex_constants::error_perl_extension, m_position - m_base);
2826 return false;
2827 }
2828 ++m_position;
2829 this->append_state(syntax_element_accept);
2830 return true;
2831 }
2832 break;
2833 case 'C':
2834 if(++m_position == m_end)
2835 {
2836 // Rewind to start of (* sequence:
2837 --m_position;
2838 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2839 fail(regex_constants::error_perl_extension, m_position - m_base);
2840 return false;
2841 }
2842 if(match_verb("OMMIT"))
2843 {
2844 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2845 {
2846 // Rewind to start of (* sequence:
2847 --m_position;
2848 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2849 fail(regex_constants::error_perl_extension, m_position - m_base);
2850 return false;
2851 }
2852 ++m_position;
2853 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2854 this->m_pdata->m_disable_match_any = true;
2855 return true;
2856 }
2857 break;
2858 case 'P':
2859 if(++m_position == m_end)
2860 {
2861 // Rewind to start of (* sequence:
2862 --m_position;
2863 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2864 fail(regex_constants::error_perl_extension, m_position - m_base);
2865 return false;
2866 }
2867 if(match_verb("RUNE"))
2868 {
2869 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2870 {
2871 // Rewind to start of (* sequence:
2872 --m_position;
2873 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2874 fail(regex_constants::error_perl_extension, m_position - m_base);
2875 return false;
2876 }
2877 ++m_position;
2878 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2879 this->m_pdata->m_disable_match_any = true;
2880 return true;
2881 }
2882 break;
2883 case 'S':
2884 if(++m_position == m_end)
2885 {
2886 // Rewind to start of (* sequence:
2887 --m_position;
2888 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2889 fail(regex_constants::error_perl_extension, m_position - m_base);
2890 return false;
2891 }
2892 if(match_verb("KIP"))
2893 {
2894 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2895 {
2896 // Rewind to start of (* sequence:
2897 --m_position;
2898 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2899 fail(regex_constants::error_perl_extension, m_position - m_base);
2900 return false;
2901 }
2902 ++m_position;
2903 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2904 this->m_pdata->m_disable_match_any = true;
2905 return true;
2906 }
2907 break;
2908 case 'T':
2909 if(++m_position == m_end)
2910 {
2911 // Rewind to start of (* sequence:
2912 --m_position;
2913 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2914 fail(regex_constants::error_perl_extension, m_position - m_base);
2915 return false;
2916 }
2917 if(match_verb("HEN"))
2918 {
2919 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2920 {
2921 // Rewind to start of (* sequence:
2922 --m_position;
2923 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2924 fail(regex_constants::error_perl_extension, m_position - m_base);
2925 return false;
2926 }
2927 ++m_position;
2928 this->append_state(syntax_element_then);
2929 this->m_pdata->m_disable_match_any = true;
2930 return true;
2931 }
2932 break;
2933 }
b32b8144
FG
2934 // Rewind to start of (* sequence:
2935 --m_position;
2936 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2937 fail(regex_constants::error_perl_extension, m_position - m_base);
7c673cae
FG
2938 return false;
2939}
20effc67
TL
2940#ifdef BOOST_MSVC
2941# pragma warning(pop)
2942#endif
7c673cae
FG
2943
2944template <class charT, class traits>
2945bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2946{
2947 //
2948 // parses an emacs style \sx or \Sx construct.
2949 //
2950 if(++m_position == m_end)
2951 {
2952 // Rewind to start of sequence:
2953 --m_position;
2954 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2955 fail(regex_constants::error_escape, m_position - m_base);
2956 return false;
2957 }
2958 basic_char_set<charT, traits> char_set;
2959 if(negate)
2960 char_set.negate();
2961
2962 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2963
2964 switch(*m_position)
2965 {
2966 case 's':
2967 case ' ':
2968 char_set.add_class(this->m_mask_space);
2969 break;
2970 case 'w':
2971 char_set.add_class(this->m_word_mask);
2972 break;
2973 case '_':
2974 char_set.add_single(digraph<charT>(charT('$')));
2975 char_set.add_single(digraph<charT>(charT('&')));
2976 char_set.add_single(digraph<charT>(charT('*')));
2977 char_set.add_single(digraph<charT>(charT('+')));
2978 char_set.add_single(digraph<charT>(charT('-')));
2979 char_set.add_single(digraph<charT>(charT('_')));
2980 char_set.add_single(digraph<charT>(charT('<')));
2981 char_set.add_single(digraph<charT>(charT('>')));
2982 break;
2983 case '.':
2984 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2985 break;
2986 case '(':
2987 char_set.add_single(digraph<charT>(charT('(')));
2988 char_set.add_single(digraph<charT>(charT('[')));
2989 char_set.add_single(digraph<charT>(charT('{')));
2990 break;
2991 case ')':
2992 char_set.add_single(digraph<charT>(charT(')')));
2993 char_set.add_single(digraph<charT>(charT(']')));
2994 char_set.add_single(digraph<charT>(charT('}')));
2995 break;
2996 case '"':
2997 char_set.add_single(digraph<charT>(charT('"')));
2998 char_set.add_single(digraph<charT>(charT('\'')));
2999 char_set.add_single(digraph<charT>(charT('`')));
3000 break;
3001 case '\'':
3002 char_set.add_single(digraph<charT>(charT('\'')));
3003 char_set.add_single(digraph<charT>(charT(',')));
3004 char_set.add_single(digraph<charT>(charT('#')));
3005 break;
3006 case '<':
3007 char_set.add_single(digraph<charT>(charT(';')));
3008 break;
3009 case '>':
3010 char_set.add_single(digraph<charT>(charT('\n')));
3011 char_set.add_single(digraph<charT>(charT('\f')));
3012 break;
3013 default:
3014 fail(regex_constants::error_ctype, m_position - m_base);
3015 return false;
3016 }
3017 if(0 == this->append_set(char_set))
3018 {
3019 fail(regex_constants::error_ctype, m_position - m_base);
3020 return false;
3021 }
3022 ++m_position;
3023 return true;
3024}
3025
3026template <class charT, class traits>
3027regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
3028{
3029 // we have a (?imsx-imsx) group, convert it into a set of flags:
3030 regex_constants::syntax_option_type f = this->flags();
3031 bool breakout = false;
3032 do
3033 {
3034 switch(*m_position)
3035 {
3036 case 's':
3037 f |= regex_constants::mod_s;
3038 f &= ~regex_constants::no_mod_s;
3039 break;
3040 case 'm':
3041 f &= ~regex_constants::no_mod_m;
3042 break;
3043 case 'i':
3044 f |= regex_constants::icase;
3045 break;
3046 case 'x':
3047 f |= regex_constants::mod_x;
3048 break;
3049 default:
3050 breakout = true;
3051 continue;
3052 }
3053 if(++m_position == m_end)
3054 {
3055 // Rewind to start of (? sequence:
3056 --m_position;
3057 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3058 fail(regex_constants::error_paren, m_position - m_base);
3059 return false;
3060 }
3061 }
3062 while(!breakout);
3063
3064 breakout = false;
3065
3066 if(*m_position == static_cast<charT>('-'))
3067 {
3068 if(++m_position == m_end)
3069 {
3070 // Rewind to start of (? sequence:
3071 --m_position;
3072 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3073 fail(regex_constants::error_paren, m_position - m_base);
3074 return false;
3075 }
3076 do
3077 {
3078 switch(*m_position)
3079 {
3080 case 's':
3081 f &= ~regex_constants::mod_s;
3082 f |= regex_constants::no_mod_s;
3083 break;
3084 case 'm':
3085 f |= regex_constants::no_mod_m;
3086 break;
3087 case 'i':
3088 f &= ~regex_constants::icase;
3089 break;
3090 case 'x':
3091 f &= ~regex_constants::mod_x;
3092 break;
3093 default:
3094 breakout = true;
3095 continue;
3096 }
3097 if(++m_position == m_end)
3098 {
3099 // Rewind to start of (? sequence:
3100 --m_position;
3101 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3102 fail(regex_constants::error_paren, m_position - m_base);
3103 return false;
3104 }
3105 }
3106 while(!breakout);
3107 }
3108 return f;
3109}
3110
3111template <class charT, class traits>
3112bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3113{
3114 //
3115 // If we didn't actually add any states after the last
3116 // alternative then that's an error:
3117 //
3118 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
1e59de90 3119 && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)
7c673cae
FG
3120 &&
3121 !(
3122 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3123 &&
3124 ((this->flags() & regbase::no_empty_expressions) == 0)
3125 )
3126 )
3127 {
3128 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3129 return false;
3130 }
3131 //
3132 // Fix up our alternatives:
3133 //
1e59de90 3134 while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start))
7c673cae
FG
3135 {
3136 //
3137 // fix up the jump to point to the end of the states
3138 // that we've just added:
3139 //
3140 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3141 m_alt_jumps.pop_back();
3142 this->m_pdata->m_data.align();
3143 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
1e59de90
TL
3144 if (jmp->type != syntax_element_jump)
3145 {
3146 // Something really bad happened, this used to be an assert,
3147 // but we'll make it an error just in case we should ever get here.
3148 fail(regex_constants::error_unknown, this->m_position - this->m_base, "Internal logic failed while compiling the expression, probably you added a repeat to something non-repeatable!");
3149 return false;
3150 }
7c673cae
FG
3151 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3152 }
3153 return true;
3154}
3155
3156#ifdef BOOST_MSVC
3157#pragma warning(pop)
3158#endif
3159
3160} // namespace BOOST_REGEX_DETAIL_NS
3161} // namespace boost
3162
3163#ifdef BOOST_MSVC
3164#pragma warning(push)
3165#pragma warning(disable: 4103)
3166#endif
3167#ifdef BOOST_HAS_ABI_HEADERS
3168# include BOOST_ABI_SUFFIX
3169#endif
3170#ifdef BOOST_MSVC
3171#pragma warning(pop)
3172#endif
3173
3174#endif