]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/regex/include/boost/regex/v4/basic_regex_parser.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / regex / include / boost / regex / v4 / basic_regex_parser.hpp
CommitLineData
7c673cae
FG
1/*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
17 */
18
19#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
21
22#ifdef BOOST_MSVC
23#pragma warning(push)
24#pragma warning(disable: 4103)
25#endif
26#ifdef BOOST_HAS_ABI_HEADERS
27# include BOOST_ABI_PREFIX
28#endif
29#ifdef BOOST_MSVC
30#pragma warning(pop)
31#endif
32
33namespace boost{
34namespace BOOST_REGEX_DETAIL_NS{
35
36#ifdef BOOST_MSVC
37#pragma warning(push)
38#pragma warning(disable:4244 4800)
39#endif
40
41inline boost::intmax_t umax(mpl::false_ const&)
42{
43 // Get out clause here, just in case numeric_limits is unspecialized:
44 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
45}
46inline boost::intmax_t umax(mpl::true_ const&)
47{
48 return (std::numeric_limits<std::size_t>::max)();
49}
50
51inline boost::intmax_t umax()
52{
53 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
54}
55
56template <class charT, class traits>
57class basic_regex_parser : public basic_regex_creator<charT, traits>
58{
59public:
60 basic_regex_parser(regex_data<charT, traits>* data);
61 void parse(const charT* p1, const charT* p2, unsigned flags);
62 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
63 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
64 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
65 {
66 fail(error_code, position, message, position);
67 }
68
69 bool parse_all();
70 bool parse_basic();
71 bool parse_extended();
72 bool parse_literal();
73 bool parse_open_paren();
74 bool parse_basic_escape();
75 bool parse_extended_escape();
76 bool parse_match_any();
77 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
78 bool parse_repeat_range(bool isbasic);
79 bool parse_alt();
80 bool parse_set();
81 bool parse_backref();
82 void parse_set_literal(basic_char_set<charT, traits>& char_set);
83 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
84 bool parse_QE();
85 bool parse_perl_extension();
86 bool parse_perl_verb();
87 bool match_verb(const char*);
88 bool add_emacs_code(bool negate);
89 bool unwind_alts(std::ptrdiff_t last_paren_start);
90 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
91 charT unescape_character();
92 regex_constants::syntax_option_type parse_options();
93
94private:
95 typedef bool (basic_regex_parser::*parser_proc_type)();
96 typedef typename traits::string_type string_type;
97 typedef typename traits::char_class_type char_class_type;
98 parser_proc_type m_parser_proc; // the main parser to use
99 const charT* m_base; // the start of the string being parsed
100 const charT* m_end; // the end of the string being parsed
101 const charT* m_position; // our current parser position
102 unsigned m_mark_count; // how many sub-expressions we have
103 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
104 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
105 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
106 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
107 bool m_has_case_change; // true if somewhere in the current block the case has changed
108#if defined(BOOST_MSVC) && defined(_M_IX86)
109 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
110 // that can not otherwise be suppressed)...
111 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
112 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
113#else
114 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
115#endif
116
117 basic_regex_parser& operator=(const basic_regex_parser&);
118 basic_regex_parser(const basic_regex_parser&);
119};
120
121template <class charT, class traits>
122basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
123 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
124{
125}
126
127template <class charT, class traits>
128void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
129{
130 // pass l_flags on to base class:
131 this->init(l_flags);
132 // set up pointers:
133 m_position = m_base = p1;
134 m_end = p2;
135 // empty strings are errors:
136 if((p1 == p2) &&
137 (
138 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
139 || (l_flags & regbase::no_empty_expressions)
140 )
141 )
142 {
143 fail(regex_constants::error_empty, 0);
144 return;
145 }
146 // select which parser to use:
147 switch(l_flags & regbase::main_option_type)
148 {
149 case regbase::perl_syntax_group:
150 {
151 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
152 //
153 // Add a leading paren with index zero to give recursions a target:
154 //
155 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
156 br->index = 0;
157 br->icase = this->flags() & regbase::icase;
158 break;
159 }
160 case regbase::basic_syntax_group:
161 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
162 break;
163 case regbase::literal:
164 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
165 break;
166 default:
167 // Ooops, someone has managed to set more than one of the main option flags,
168 // so this must be an error:
169 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
170 return;
171 }
172
173 // parse all our characters:
174 bool result = parse_all();
175 //
176 // Unwind our alternatives:
177 //
178 unwind_alts(-1);
179 // reset l_flags as a global scope (?imsx) may have altered them:
180 this->flags(l_flags);
181 // if we haven't gobbled up all the characters then we must
182 // have had an unexpected ')' :
183 if(!result)
184 {
185 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
186 return;
187 }
188 // if an error has been set then give up now:
189 if(this->m_pdata->m_status)
190 return;
191 // fill in our sub-expression count:
192 this->m_pdata->m_mark_count = 1 + m_mark_count;
193 this->finalize(p1, p2);
194}
195
196template <class charT, class traits>
197void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
198{
199 // get the error message:
200 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
201 fail(error_code, position, message);
202}
203
204template <class charT, class traits>
205void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
206{
207 if(0 == this->m_pdata->m_status) // update the error code if not already set
208 this->m_pdata->m_status = error_code;
209 m_position = m_end; // don't bother parsing anything else
210
211#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
212 //
213 // Augment error message with the regular expression text:
214 //
215 if(start_pos == position)
216 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
217 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
218 if(error_code != regex_constants::error_empty)
219 {
220 if((start_pos != 0) || (end_pos != (m_end - m_base)))
221 message += " The error occurred while parsing the regular expression fragment: '";
222 else
223 message += " The error occurred while parsing the regular expression: '";
224 if(start_pos != end_pos)
225 {
226 message += std::string(m_base + start_pos, m_base + position);
227 message += ">>>HERE>>>";
228 message += std::string(m_base + position, m_base + end_pos);
229 }
230 message += "'.";
231 }
232#endif
233
234#ifndef BOOST_NO_EXCEPTIONS
235 if(0 == (this->flags() & regex_constants::no_except))
236 {
237 boost::regex_error e(message, error_code, position);
238 e.raise();
239 }
240#else
241 (void)position; // suppress warnings.
242#endif
243}
244
245template <class charT, class traits>
246bool basic_regex_parser<charT, traits>::parse_all()
247{
248 bool result = true;
249 while(result && (m_position != m_end))
250 {
251 result = (this->*m_parser_proc)();
252 }
253 return result;
254}
255
256#ifdef BOOST_MSVC
257#pragma warning(push)
258#pragma warning(disable:4702)
259#endif
260template <class charT, class traits>
261bool basic_regex_parser<charT, traits>::parse_basic()
262{
263 switch(this->m_traits.syntax_type(*m_position))
264 {
265 case regex_constants::syntax_escape:
266 return parse_basic_escape();
267 case regex_constants::syntax_dot:
268 return parse_match_any();
269 case regex_constants::syntax_caret:
270 ++m_position;
271 this->append_state(syntax_element_start_line);
272 break;
273 case regex_constants::syntax_dollar:
274 ++m_position;
275 this->append_state(syntax_element_end_line);
276 break;
277 case regex_constants::syntax_star:
278 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
279 return parse_literal();
280 else
281 {
282 ++m_position;
283 return parse_repeat();
284 }
285 case regex_constants::syntax_plus:
286 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
287 return parse_literal();
288 else
289 {
290 ++m_position;
291 return parse_repeat(1);
292 }
293 case regex_constants::syntax_question:
294 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
295 return parse_literal();
296 else
297 {
298 ++m_position;
299 return parse_repeat(0, 1);
300 }
301 case regex_constants::syntax_open_set:
302 return parse_set();
303 case regex_constants::syntax_newline:
304 if(this->flags() & regbase::newline_alt)
305 return parse_alt();
306 else
307 return parse_literal();
308 default:
309 return parse_literal();
310 }
311 return true;
312}
313
314template <class charT, class traits>
315bool basic_regex_parser<charT, traits>::parse_extended()
316{
317 bool result = true;
318 switch(this->m_traits.syntax_type(*m_position))
319 {
320 case regex_constants::syntax_open_mark:
321 return parse_open_paren();
322 case regex_constants::syntax_close_mark:
323 return false;
324 case regex_constants::syntax_escape:
325 return parse_extended_escape();
326 case regex_constants::syntax_dot:
327 return parse_match_any();
328 case regex_constants::syntax_caret:
329 ++m_position;
330 this->append_state(
331 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
332 break;
333 case regex_constants::syntax_dollar:
334 ++m_position;
335 this->append_state(
336 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
337 break;
338 case regex_constants::syntax_star:
339 if(m_position == this->m_base)
340 {
341 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
342 return false;
343 }
344 ++m_position;
345 return parse_repeat();
346 case regex_constants::syntax_question:
347 if(m_position == this->m_base)
348 {
349 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
350 return false;
351 }
352 ++m_position;
353 return parse_repeat(0,1);
354 case regex_constants::syntax_plus:
355 if(m_position == this->m_base)
356 {
357 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
358 return false;
359 }
360 ++m_position;
361 return parse_repeat(1);
362 case regex_constants::syntax_open_brace:
363 ++m_position;
364 return parse_repeat_range(false);
365 case regex_constants::syntax_close_brace:
366 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
367 {
368 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
369 return false;
370 }
371 result = parse_literal();
372 break;
373 case regex_constants::syntax_or:
374 return parse_alt();
375 case regex_constants::syntax_open_set:
376 return parse_set();
377 case regex_constants::syntax_newline:
378 if(this->flags() & regbase::newline_alt)
379 return parse_alt();
380 else
381 return parse_literal();
382 case regex_constants::syntax_hash:
383 //
384 // If we have a mod_x flag set, then skip until
385 // we get to a newline character:
386 //
387 if((this->flags()
388 & (regbase::no_perl_ex|regbase::mod_x))
389 == regbase::mod_x)
390 {
391 while((m_position != m_end) && !is_separator(*m_position++)){}
392 return true;
393 }
394 BOOST_FALLTHROUGH;
395 default:
396 result = parse_literal();
397 break;
398 }
399 return result;
400}
401#ifdef BOOST_MSVC
402#pragma warning(pop)
403#endif
404
405template <class charT, class traits>
406bool basic_regex_parser<charT, traits>::parse_literal()
407{
408 // append this as a literal provided it's not a space character
409 // or the perl option regbase::mod_x is not set:
410 if(
411 ((this->flags()
412 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
413 != regbase::mod_x)
414 || !this->m_traits.isctype(*m_position, this->m_mask_space))
415 this->append_literal(*m_position);
416 ++m_position;
417 return true;
418}
419
420template <class charT, class traits>
421bool basic_regex_parser<charT, traits>::parse_open_paren()
422{
423 //
424 // skip the '(' and error check:
425 //
426 if(++m_position == m_end)
427 {
428 fail(regex_constants::error_paren, m_position - m_base);
429 return false;
430 }
431 //
432 // begin by checking for a perl-style (?...) extension:
433 //
434 if(
435 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
436 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
437 )
438 {
439 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
440 return parse_perl_extension();
441 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
442 return parse_perl_verb();
443 }
444 //
445 // update our mark count, and append the required state:
446 //
447 unsigned markid = 0;
448 if(0 == (this->flags() & regbase::nosubs))
449 {
450 markid = ++m_mark_count;
451#ifndef BOOST_NO_STD_DISTANCE
452 if(this->flags() & regbase::save_subexpression_location)
453 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
454#else
455 if(this->flags() & regbase::save_subexpression_location)
456 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
457#endif
458 }
459 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
460 pb->index = markid;
461 pb->icase = this->flags() & regbase::icase;
462 std::ptrdiff_t last_paren_start = this->getoffset(pb);
463 // back up insertion point for alternations, and set new point:
464 std::ptrdiff_t last_alt_point = m_alt_insert_point;
465 this->m_pdata->m_data.align();
466 m_alt_insert_point = this->m_pdata->m_data.size();
467 //
468 // back up the current flags in case we have a nested (?imsx) group:
469 //
470 regex_constants::syntax_option_type opts = this->flags();
471 bool old_case_change = m_has_case_change;
472 m_has_case_change = false; // no changes to this scope as yet...
473 //
474 // Back up branch reset data in case we have a nested (?|...)
475 //
476 int mark_reset = m_mark_reset;
477 m_mark_reset = -1;
478 //
479 // now recursively add more states, this will terminate when we get to a
480 // matching ')' :
481 //
482 parse_all();
483 //
484 // Unwind pushed alternatives:
485 //
486 if(0 == unwind_alts(last_paren_start))
487 return false;
488 //
489 // restore flags:
490 //
491 if(m_has_case_change)
492 {
493 // the case has changed in one or more of the alternatives
494 // within the scoped (...) block: we have to add a state
495 // to reset the case sensitivity:
496 static_cast<re_case*>(
497 this->append_state(syntax_element_toggle_case, sizeof(re_case))
498 )->icase = opts & regbase::icase;
499 }
500 this->flags(opts);
501 m_has_case_change = old_case_change;
502 //
503 // restore branch reset:
504 //
505 m_mark_reset = mark_reset;
506 //
507 // we either have a ')' or we have run out of characters prematurely:
508 //
509 if(m_position == m_end)
510 {
511 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
512 return false;
513 }
514 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
515#ifndef BOOST_NO_STD_DISTANCE
516 if(markid && (this->flags() & regbase::save_subexpression_location))
517 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
518#else
519 if(markid && (this->flags() & regbase::save_subexpression_location))
520 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
521#endif
522 ++m_position;
523 //
524 // append closing parenthesis state:
525 //
526 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
527 pb->index = markid;
528 pb->icase = this->flags() & regbase::icase;
529 this->m_paren_start = last_paren_start;
530 //
531 // restore the alternate insertion point:
532 //
533 this->m_alt_insert_point = last_alt_point;
534 //
535 // allow backrefs to this mark:
536 //
537 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
538 this->m_backrefs |= 1u << (markid - 1);
539
540 return true;
541}
542
543template <class charT, class traits>
544bool basic_regex_parser<charT, traits>::parse_basic_escape()
545{
546 if(++m_position == m_end)
547 {
548 fail(regex_constants::error_paren, m_position - m_base);
549 return false;
550 }
551 bool result = true;
552 switch(this->m_traits.escape_syntax_type(*m_position))
553 {
554 case regex_constants::syntax_open_mark:
555 return parse_open_paren();
556 case regex_constants::syntax_close_mark:
557 return false;
558 case regex_constants::syntax_plus:
559 if(this->flags() & regex_constants::bk_plus_qm)
560 {
561 ++m_position;
562 return parse_repeat(1);
563 }
564 else
565 return parse_literal();
566 case regex_constants::syntax_question:
567 if(this->flags() & regex_constants::bk_plus_qm)
568 {
569 ++m_position;
570 return parse_repeat(0, 1);
571 }
572 else
573 return parse_literal();
574 case regex_constants::syntax_open_brace:
575 if(this->flags() & regbase::no_intervals)
576 return parse_literal();
577 ++m_position;
578 return parse_repeat_range(true);
579 case regex_constants::syntax_close_brace:
580 if(this->flags() & regbase::no_intervals)
581 return parse_literal();
582 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
583 return false;
584 case regex_constants::syntax_or:
585 if(this->flags() & regbase::bk_vbar)
586 return parse_alt();
587 else
588 result = parse_literal();
589 break;
590 case regex_constants::syntax_digit:
591 return parse_backref();
592 case regex_constants::escape_type_start_buffer:
593 if(this->flags() & regbase::emacs_ex)
594 {
595 ++m_position;
596 this->append_state(syntax_element_buffer_start);
597 }
598 else
599 result = parse_literal();
600 break;
601 case regex_constants::escape_type_end_buffer:
602 if(this->flags() & regbase::emacs_ex)
603 {
604 ++m_position;
605 this->append_state(syntax_element_buffer_end);
606 }
607 else
608 result = parse_literal();
609 break;
610 case regex_constants::escape_type_word_assert:
611 if(this->flags() & regbase::emacs_ex)
612 {
613 ++m_position;
614 this->append_state(syntax_element_word_boundary);
615 }
616 else
617 result = parse_literal();
618 break;
619 case regex_constants::escape_type_not_word_assert:
620 if(this->flags() & regbase::emacs_ex)
621 {
622 ++m_position;
623 this->append_state(syntax_element_within_word);
624 }
625 else
626 result = parse_literal();
627 break;
628 case regex_constants::escape_type_left_word:
629 if(this->flags() & regbase::emacs_ex)
630 {
631 ++m_position;
632 this->append_state(syntax_element_word_start);
633 }
634 else
635 result = parse_literal();
636 break;
637 case regex_constants::escape_type_right_word:
638 if(this->flags() & regbase::emacs_ex)
639 {
640 ++m_position;
641 this->append_state(syntax_element_word_end);
642 }
643 else
644 result = parse_literal();
645 break;
646 default:
647 if(this->flags() & regbase::emacs_ex)
648 {
649 bool negate = true;
650 switch(*m_position)
651 {
652 case 'w':
653 negate = false;
654 BOOST_FALLTHROUGH;
655 case 'W':
656 {
657 basic_char_set<charT, traits> char_set;
658 if(negate)
659 char_set.negate();
660 char_set.add_class(this->m_word_mask);
661 if(0 == this->append_set(char_set))
662 {
663 fail(regex_constants::error_ctype, m_position - m_base);
664 return false;
665 }
666 ++m_position;
667 return true;
668 }
669 case 's':
670 negate = false;
671 BOOST_FALLTHROUGH;
672 case 'S':
673 return add_emacs_code(negate);
674 case 'c':
675 case 'C':
676 // not supported yet:
677 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
678 return false;
679 default:
680 break;
681 }
682 }
683 result = parse_literal();
684 break;
685 }
686 return result;
687}
688
689template <class charT, class traits>
690bool basic_regex_parser<charT, traits>::parse_extended_escape()
691{
692 ++m_position;
693 if(m_position == m_end)
694 {
695 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
696 return false;
697 }
698 bool negate = false; // in case this is a character class escape: \w \d etc
699 switch(this->m_traits.escape_syntax_type(*m_position))
700 {
701 case regex_constants::escape_type_not_class:
702 negate = true;
703 BOOST_FALLTHROUGH;
704 case regex_constants::escape_type_class:
705 {
706escape_type_class_jump:
707 typedef typename traits::char_class_type m_type;
708 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
709 if(m != 0)
710 {
711 basic_char_set<charT, traits> char_set;
712 if(negate)
713 char_set.negate();
714 char_set.add_class(m);
715 if(0 == this->append_set(char_set))
716 {
717 fail(regex_constants::error_ctype, m_position - m_base);
718 return false;
719 }
720 ++m_position;
721 return true;
722 }
723 //
724 // not a class, just a regular unknown escape:
725 //
726 this->append_literal(unescape_character());
727 break;
728 }
729 case regex_constants::syntax_digit:
730 return parse_backref();
731 case regex_constants::escape_type_left_word:
732 ++m_position;
733 this->append_state(syntax_element_word_start);
734 break;
735 case regex_constants::escape_type_right_word:
736 ++m_position;
737 this->append_state(syntax_element_word_end);
738 break;
739 case regex_constants::escape_type_start_buffer:
740 ++m_position;
741 this->append_state(syntax_element_buffer_start);
742 break;
743 case regex_constants::escape_type_end_buffer:
744 ++m_position;
745 this->append_state(syntax_element_buffer_end);
746 break;
747 case regex_constants::escape_type_word_assert:
748 ++m_position;
749 this->append_state(syntax_element_word_boundary);
750 break;
751 case regex_constants::escape_type_not_word_assert:
752 ++m_position;
753 this->append_state(syntax_element_within_word);
754 break;
755 case regex_constants::escape_type_Z:
756 ++m_position;
757 this->append_state(syntax_element_soft_buffer_end);
758 break;
759 case regex_constants::escape_type_Q:
760 return parse_QE();
761 case regex_constants::escape_type_C:
762 return parse_match_any();
763 case regex_constants::escape_type_X:
764 ++m_position;
765 this->append_state(syntax_element_combining);
766 break;
767 case regex_constants::escape_type_G:
768 ++m_position;
769 this->append_state(syntax_element_restart_continue);
770 break;
771 case regex_constants::escape_type_not_property:
772 negate = true;
773 BOOST_FALLTHROUGH;
774 case regex_constants::escape_type_property:
775 {
776 ++m_position;
777 char_class_type m;
778 if(m_position == m_end)
779 {
780 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
781 return false;
782 }
783 // maybe have \p{ddd}
784 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
785 {
786 const charT* base = m_position;
787 // skip forward until we find enclosing brace:
788 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
789 ++m_position;
790 if(m_position == m_end)
791 {
792 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
793 return false;
794 }
795 m = this->m_traits.lookup_classname(++base, m_position++);
796 }
797 else
798 {
799 m = this->m_traits.lookup_classname(m_position, m_position+1);
800 ++m_position;
801 }
802 if(m != 0)
803 {
804 basic_char_set<charT, traits> char_set;
805 if(negate)
806 char_set.negate();
807 char_set.add_class(m);
808 if(0 == this->append_set(char_set))
809 {
810 fail(regex_constants::error_ctype, m_position - m_base);
811 return false;
812 }
813 return true;
814 }
815 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
816 return false;
817 }
818 case regex_constants::escape_type_reset_start_mark:
819 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
820 {
821 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
822 pb->index = -5;
823 pb->icase = this->flags() & regbase::icase;
824 this->m_pdata->m_data.align();
825 ++m_position;
826 return true;
827 }
828 goto escape_type_class_jump;
829 case regex_constants::escape_type_line_ending:
830 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
831 {
832 const charT* e = get_escape_R_string<charT>();
833 const charT* old_position = m_position;
834 const charT* old_end = m_end;
835 const charT* old_base = m_base;
836 m_position = e;
837 m_base = e;
838 m_end = e + traits::length(e);
839 bool r = parse_all();
840 m_position = ++old_position;
841 m_end = old_end;
842 m_base = old_base;
843 return r;
844 }
845 goto escape_type_class_jump;
846 case regex_constants::escape_type_extended_backref:
847 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
848 {
849 bool have_brace = false;
850 bool negative = false;
851 static const char* incomplete_message = "Incomplete \\g escape found.";
852 if(++m_position == m_end)
853 {
854 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
855 return false;
856 }
857 // maybe have \g{ddd}
858 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
859 regex_constants::syntax_type syn_end = 0;
860 if((syn == regex_constants::syntax_open_brace)
861 || (syn == regex_constants::escape_type_left_word)
862 || (syn == regex_constants::escape_type_end_buffer))
863 {
864 if(++m_position == m_end)
865 {
866 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
867 return false;
868 }
869 have_brace = true;
870 switch(syn)
871 {
872 case regex_constants::syntax_open_brace:
873 syn_end = regex_constants::syntax_close_brace;
874 break;
875 case regex_constants::escape_type_left_word:
876 syn_end = regex_constants::escape_type_right_word;
877 break;
878 default:
879 syn_end = regex_constants::escape_type_end_buffer;
880 break;
881 }
882 }
883 negative = (*m_position == static_cast<charT>('-'));
884 if((negative) && (++m_position == m_end))
885 {
886 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
887 return false;
888 }
889 const charT* pc = m_position;
890 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
891 if((i < 0) && syn_end)
892 {
893 // Check for a named capture, get the leftmost one if there is more than one:
894 const charT* base = m_position;
895 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
896 {
897 ++m_position;
898 }
899 i = hash_value_from_capture_name(base, m_position);
900 pc = m_position;
901 }
902 if(negative)
903 i = 1 + m_mark_count - i;
904 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
905 {
906 m_position = pc;
907 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
908 pb->index = i;
909 pb->icase = this->flags() & regbase::icase;
910 }
911 else
912 {
913 fail(regex_constants::error_backref, m_position - m_base);
914 return false;
915 }
916 m_position = pc;
917 if(have_brace)
918 {
919 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
920 {
921 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
922 return false;
923 }
924 ++m_position;
925 }
926 return true;
927 }
928 goto escape_type_class_jump;
929 case regex_constants::escape_type_control_v:
930 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
931 goto escape_type_class_jump;
932 BOOST_FALLTHROUGH;
933 default:
934 this->append_literal(unescape_character());
935 break;
936 }
937 return true;
938}
939
940template <class charT, class traits>
941bool basic_regex_parser<charT, traits>::parse_match_any()
942{
943 //
944 // we have a '.' that can match any character:
945 //
946 ++m_position;
947 static_cast<re_dot*>(
948 this->append_state(syntax_element_wild, sizeof(re_dot))
949 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
950 ? BOOST_REGEX_DETAIL_NS::force_not_newline
951 : this->flags() & regbase::mod_s ?
952 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
953 return true;
954}
955
956template <class charT, class traits>
957bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
958{
959 bool greedy = true;
960 bool pocessive = false;
961 std::size_t insert_point;
962 //
963 // when we get to here we may have a non-greedy ? mark still to come:
964 //
965 if((m_position != m_end)
966 && (
967 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
968 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
969 )
970 )
971 {
972 // OK we have a perl or emacs regex, check for a '?':
973 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
974 {
975 greedy = false;
976 ++m_position;
977 }
978 // for perl regexes only check for pocessive ++ repeats.
979 if((m_position != m_end)
980 && (0 == (this->flags() & regbase::main_option_type))
981 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
982 {
983 pocessive = true;
984 ++m_position;
985 }
986 }
987 if(0 == this->m_last_state)
988 {
989 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
990 return false;
991 }
992 if(this->m_last_state->type == syntax_element_endmark)
993 {
994 // insert a repeat before the '(' matching the last ')':
995 insert_point = this->m_paren_start;
996 }
997 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
998 {
999 // the last state was a literal with more than one character, split it in two:
1000 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1001 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1002 lit->length -= 1;
1003 // now append new state:
1004 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1005 lit->length = 1;
1006 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1007 insert_point = this->getoffset(this->m_last_state);
1008 }
1009 else
1010 {
1011 // repeat the last state whatever it was, need to add some error checking here:
1012 switch(this->m_last_state->type)
1013 {
1014 case syntax_element_start_line:
1015 case syntax_element_end_line:
1016 case syntax_element_word_boundary:
1017 case syntax_element_within_word:
1018 case syntax_element_word_start:
1019 case syntax_element_word_end:
1020 case syntax_element_buffer_start:
1021 case syntax_element_buffer_end:
1022 case syntax_element_alt:
1023 case syntax_element_soft_buffer_end:
1024 case syntax_element_restart_continue:
1025 case syntax_element_jump:
1026 case syntax_element_startmark:
1027 case syntax_element_backstep:
1028 // can't legally repeat any of the above:
1029 fail(regex_constants::error_badrepeat, m_position - m_base);
1030 return false;
1031 default:
1032 // do nothing...
1033 break;
1034 }
1035 insert_point = this->getoffset(this->m_last_state);
1036 }
1037 //
1038 // OK we now know what to repeat, so insert the repeat around it:
1039 //
1040 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1041 rep->min = low;
1042 rep->max = high;
1043 rep->greedy = greedy;
1044 rep->leading = false;
1045 // store our repeater position for later:
1046 std::ptrdiff_t rep_off = this->getoffset(rep);
1047 // and append a back jump to the repeat:
1048 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1049 jmp->alt.i = rep_off - this->getoffset(jmp);
1050 this->m_pdata->m_data.align();
1051 // now fill in the alt jump for the repeat:
1052 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1053 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1054 //
1055 // If the repeat is pocessive then bracket the repeat with a (?>...)
1056 // independent sub-expression construct:
1057 //
1058 if(pocessive)
1059 {
1060 if(m_position != m_end)
1061 {
1062 //
1063 // Check for illegal following quantifier, we have to do this here, because
1064 // the extra states we insert below circumvents our usual error checking :-(
1065 //
1066 switch(this->m_traits.syntax_type(*m_position))
1067 {
1068 case regex_constants::syntax_star:
1069 case regex_constants::syntax_plus:
1070 case regex_constants::syntax_question:
1071 case regex_constants::syntax_open_brace:
1072 fail(regex_constants::error_badrepeat, m_position - m_base);
1073 return false;
1074 }
1075 }
1076 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1077 pb->index = -3;
1078 pb->icase = this->flags() & regbase::icase;
1079 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1080 this->m_pdata->m_data.align();
1081 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1082 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1083 pb->index = -3;
1084 pb->icase = this->flags() & regbase::icase;
1085 }
1086 return true;
1087}
1088
1089template <class charT, class traits>
1090bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1091{
1092 static const char* incomplete_message = "Missing } in quantified repetition.";
1093 //
1094 // parse a repeat-range:
1095 //
1096 std::size_t min, max;
1097 boost::intmax_t v;
1098 // skip whitespace:
1099 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1100 ++m_position;
1101 if(this->m_position == this->m_end)
1102 {
1103 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1104 {
1105 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1106 return false;
1107 }
1108 // Treat the opening '{' as a literal character, rewind to start of error:
1109 --m_position;
1110 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1111 return parse_literal();
1112 }
1113 // get min:
1114 v = this->m_traits.toi(m_position, m_end, 10);
1115 // skip whitespace:
1116 if((v < 0) || (v > umax()))
1117 {
1118 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1119 {
1120 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1121 return false;
1122 }
1123 // Treat the opening '{' as a literal character, rewind to start of error:
1124 --m_position;
1125 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1126 return parse_literal();
1127 }
1128 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1129 ++m_position;
1130 if(this->m_position == this->m_end)
1131 {
1132 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1133 {
1134 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1135 return false;
1136 }
1137 // Treat the opening '{' as a literal character, rewind to start of error:
1138 --m_position;
1139 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1140 return parse_literal();
1141 }
1142 min = static_cast<std::size_t>(v);
1143 // see if we have a comma:
1144 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1145 {
1146 // move on and error check:
1147 ++m_position;
1148 // skip whitespace:
1149 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1150 ++m_position;
1151 if(this->m_position == this->m_end)
1152 {
1153 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1154 {
1155 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1156 return false;
1157 }
1158 // Treat the opening '{' as a literal character, rewind to start of error:
1159 --m_position;
1160 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1161 return parse_literal();
1162 }
1163 // get the value if any:
1164 v = this->m_traits.toi(m_position, m_end, 10);
1165 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1166 }
1167 else
1168 {
1169 // no comma, max = min:
1170 max = min;
1171 }
1172 // skip whitespace:
1173 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1174 ++m_position;
1175 // OK now check trailing }:
1176 if(this->m_position == this->m_end)
1177 {
1178 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1179 {
1180 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1181 return false;
1182 }
1183 // Treat the opening '{' as a literal character, rewind to start of error:
1184 --m_position;
1185 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1186 return parse_literal();
1187 }
1188 if(isbasic)
1189 {
1190 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1191 {
1192 ++m_position;
1193 if(this->m_position == this->m_end)
1194 {
1195 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1196 return false;
1197 }
1198 }
1199 else
1200 {
1201 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1202 return false;
1203 }
1204 }
1205 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1206 ++m_position;
1207 else
1208 {
1209 // Treat the opening '{' as a literal character, rewind to start of error:
1210 --m_position;
1211 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1212 return parse_literal();
1213 }
1214 //
1215 // finally go and add the repeat, unless error:
1216 //
1217 if(min > max)
1218 {
1219 // Backtrack to error location:
1220 m_position -= 2;
1221 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1222 ++m_position;
1223 fail(regex_constants::error_badbrace, m_position - m_base);
1224 return false;
1225 }
1226 return parse_repeat(min, max);
1227}
1228
1229template <class charT, class traits>
1230bool basic_regex_parser<charT, traits>::parse_alt()
1231{
1232 //
1233 // error check: if there have been no previous states,
1234 // or if the last state was a '(' then error:
1235 //
1236 if(
1237 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1238 &&
1239 !(
1240 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1241 &&
1242 ((this->flags() & regbase::no_empty_expressions) == 0)
1243 )
1244 )
1245 {
1246 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1247 return false;
1248 }
1249 //
1250 // Reset mark count if required:
1251 //
1252 if(m_max_mark < m_mark_count)
1253 m_max_mark = m_mark_count;
1254 if(m_mark_reset >= 0)
1255 m_mark_count = m_mark_reset;
1256
1257 ++m_position;
1258 //
1259 // we need to append a trailing jump:
1260 //
1261 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1262 std::ptrdiff_t jump_offset = this->getoffset(pj);
1263 //
1264 // now insert the alternative:
1265 //
1266 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1267 jump_offset += re_alt_size;
1268 this->m_pdata->m_data.align();
1269 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1270 //
1271 // update m_alt_insert_point so that the next alternate gets
1272 // inserted at the start of the second of the two we've just created:
1273 //
1274 this->m_alt_insert_point = this->m_pdata->m_data.size();
1275 //
1276 // the start of this alternative must have a case changes state
1277 // if the current block has messed around with case changes:
1278 //
1279 if(m_has_case_change)
1280 {
1281 static_cast<re_case*>(
1282 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1283 )->icase = this->m_icase;
1284 }
1285 //
1286 // push the alternative onto our stack, a recursive
1287 // implementation here is easier to understand (and faster
1288 // as it happens), but causes all kinds of stack overflow problems
1289 // on programs with small stacks (COM+).
1290 //
1291 m_alt_jumps.push_back(jump_offset);
1292 return true;
1293}
1294
1295template <class charT, class traits>
1296bool basic_regex_parser<charT, traits>::parse_set()
1297{
1298 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1299 ++m_position;
1300 if(m_position == m_end)
1301 {
1302 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1303 return false;
1304 }
1305 basic_char_set<charT, traits> char_set;
1306
1307 const charT* base = m_position; // where the '[' was
1308 const charT* item_base = m_position; // where the '[' or '^' was
1309
1310 while(m_position != m_end)
1311 {
1312 switch(this->m_traits.syntax_type(*m_position))
1313 {
1314 case regex_constants::syntax_caret:
1315 if(m_position == base)
1316 {
1317 char_set.negate();
1318 ++m_position;
1319 item_base = m_position;
1320 }
1321 else
1322 parse_set_literal(char_set);
1323 break;
1324 case regex_constants::syntax_close_set:
1325 if(m_position == item_base)
1326 {
1327 parse_set_literal(char_set);
1328 break;
1329 }
1330 else
1331 {
1332 ++m_position;
1333 if(0 == this->append_set(char_set))
1334 {
1335 fail(regex_constants::error_ctype, m_position - m_base);
1336 return false;
1337 }
1338 }
1339 return true;
1340 case regex_constants::syntax_open_set:
1341 if(parse_inner_set(char_set))
1342 break;
1343 return true;
1344 case regex_constants::syntax_escape:
1345 {
1346 //
1347 // look ahead and see if this is a character class shortcut
1348 // \d \w \s etc...
1349 //
1350 ++m_position;
1351 if(this->m_traits.escape_syntax_type(*m_position)
1352 == regex_constants::escape_type_class)
1353 {
1354 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1355 if(m != 0)
1356 {
1357 char_set.add_class(m);
1358 ++m_position;
1359 break;
1360 }
1361 }
1362 else if(this->m_traits.escape_syntax_type(*m_position)
1363 == regex_constants::escape_type_not_class)
1364 {
1365 // negated character class:
1366 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1367 if(m != 0)
1368 {
1369 char_set.add_negated_class(m);
1370 ++m_position;
1371 break;
1372 }
1373 }
1374 // not a character class, just a regular escape:
1375 --m_position;
1376 parse_set_literal(char_set);
1377 break;
1378 }
1379 default:
1380 parse_set_literal(char_set);
1381 break;
1382 }
1383 }
1384 return m_position != m_end;
1385}
1386
1387template <class charT, class traits>
1388bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1389{
1390 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1391 //
1392 // we have either a character class [:name:]
1393 // a collating element [.name.]
1394 // or an equivalence class [=name=]
1395 //
1396 if(m_end == ++m_position)
1397 {
1398 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1399 return false;
1400 }
1401 switch(this->m_traits.syntax_type(*m_position))
1402 {
1403 case regex_constants::syntax_dot:
1404 //
1405 // a collating element is treated as a literal:
1406 //
1407 --m_position;
1408 parse_set_literal(char_set);
1409 return true;
1410 case regex_constants::syntax_colon:
1411 {
1412 // check that character classes are actually enabled:
1413 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1414 == (regbase::basic_syntax_group | regbase::no_char_classes))
1415 {
1416 --m_position;
1417 parse_set_literal(char_set);
1418 return true;
1419 }
1420 // skip the ':'
1421 if(m_end == ++m_position)
1422 {
1423 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1424 return false;
1425 }
1426 const charT* name_first = m_position;
1427 // skip at least one character, then find the matching ':]'
1428 if(m_end == ++m_position)
1429 {
1430 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1431 return false;
1432 }
1433 while((m_position != m_end)
1434 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1435 ++m_position;
1436 const charT* name_last = m_position;
1437 if(m_end == m_position)
1438 {
1439 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1440 return false;
1441 }
1442 if((m_end == ++m_position)
1443 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1444 {
1445 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1446 return false;
1447 }
1448 //
1449 // check for negated class:
1450 //
1451 bool negated = false;
1452 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1453 {
1454 ++name_first;
1455 negated = true;
1456 }
1457 typedef typename traits::char_class_type m_type;
1458 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1459 if(m == 0)
1460 {
1461 if(char_set.empty() && (name_last - name_first == 1))
1462 {
1463 // maybe a special case:
1464 ++m_position;
1465 if( (m_position != m_end)
1466 && (this->m_traits.syntax_type(*m_position)
1467 == regex_constants::syntax_close_set))
1468 {
1469 if(this->m_traits.escape_syntax_type(*name_first)
1470 == regex_constants::escape_type_left_word)
1471 {
1472 ++m_position;
1473 this->append_state(syntax_element_word_start);
1474 return false;
1475 }
1476 if(this->m_traits.escape_syntax_type(*name_first)
1477 == regex_constants::escape_type_right_word)
1478 {
1479 ++m_position;
1480 this->append_state(syntax_element_word_end);
1481 return false;
1482 }
1483 }
1484 }
1485 fail(regex_constants::error_ctype, name_first - m_base);
1486 return false;
1487 }
1488 if(negated == false)
1489 char_set.add_class(m);
1490 else
1491 char_set.add_negated_class(m);
1492 ++m_position;
1493 break;
1494 }
1495 case regex_constants::syntax_equal:
1496 {
1497 // skip the '='
1498 if(m_end == ++m_position)
1499 {
1500 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1501 return false;
1502 }
1503 const charT* name_first = m_position;
1504 // skip at least one character, then find the matching '=]'
1505 if(m_end == ++m_position)
1506 {
1507 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1508 return false;
1509 }
1510 while((m_position != m_end)
1511 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1512 ++m_position;
1513 const charT* name_last = m_position;
1514 if(m_end == m_position)
1515 {
1516 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1517 return false;
1518 }
1519 if((m_end == ++m_position)
1520 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1521 {
1522 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1523 return false;
1524 }
1525 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1526 if((0 == m.size()) || (m.size() > 2))
1527 {
1528 fail(regex_constants::error_collate, name_first - m_base);
1529 return false;
1530 }
1531 digraph<charT> d;
1532 d.first = m[0];
1533 if(m.size() > 1)
1534 d.second = m[1];
1535 else
1536 d.second = 0;
1537 char_set.add_equivalent(d);
1538 ++m_position;
1539 break;
1540 }
1541 default:
1542 --m_position;
1543 parse_set_literal(char_set);
1544 break;
1545 }
1546 return true;
1547}
1548
1549template <class charT, class traits>
1550void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1551{
1552 digraph<charT> start_range(get_next_set_literal(char_set));
1553 if(m_end == m_position)
1554 {
1555 fail(regex_constants::error_brack, m_position - m_base);
1556 return;
1557 }
1558 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1559 {
1560 // we have a range:
1561 if(m_end == ++m_position)
1562 {
1563 fail(regex_constants::error_brack, m_position - m_base);
1564 return;
1565 }
1566 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1567 {
1568 digraph<charT> end_range = get_next_set_literal(char_set);
1569 char_set.add_range(start_range, end_range);
1570 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1571 {
1572 if(m_end == ++m_position)
1573 {
1574 fail(regex_constants::error_brack, m_position - m_base);
1575 return;
1576 }
1577 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1578 {
1579 // trailing - :
1580 --m_position;
1581 return;
1582 }
1583 fail(regex_constants::error_range, m_position - m_base);
1584 return;
1585 }
1586 return;
1587 }
1588 --m_position;
1589 }
1590 char_set.add_single(start_range);
1591}
1592
1593template <class charT, class traits>
1594digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1595{
1596 digraph<charT> result;
1597 switch(this->m_traits.syntax_type(*m_position))
1598 {
1599 case regex_constants::syntax_dash:
1600 if(!char_set.empty())
1601 {
1602 // see if we are at the end of the set:
1603 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1604 {
1605 fail(regex_constants::error_range, m_position - m_base);
1606 return result;
1607 }
1608 --m_position;
1609 }
1610 result.first = *m_position++;
1611 return result;
1612 case regex_constants::syntax_escape:
1613 // check to see if escapes are supported first:
1614 if(this->flags() & regex_constants::no_escape_in_lists)
1615 {
1616 result = *m_position++;
1617 break;
1618 }
1619 ++m_position;
1620 result = unescape_character();
1621 break;
1622 case regex_constants::syntax_open_set:
1623 {
1624 if(m_end == ++m_position)
1625 {
1626 fail(regex_constants::error_collate, m_position - m_base);
1627 return result;
1628 }
1629 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1630 {
1631 --m_position;
1632 result.first = *m_position;
1633 ++m_position;
1634 return result;
1635 }
1636 if(m_end == ++m_position)
1637 {
1638 fail(regex_constants::error_collate, m_position - m_base);
1639 return result;
1640 }
1641 const charT* name_first = m_position;
1642 // skip at least one character, then find the matching ':]'
1643 if(m_end == ++m_position)
1644 {
1645 fail(regex_constants::error_collate, name_first - m_base);
1646 return result;
1647 }
1648 while((m_position != m_end)
1649 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1650 ++m_position;
1651 const charT* name_last = m_position;
1652 if(m_end == m_position)
1653 {
1654 fail(regex_constants::error_collate, name_first - m_base);
1655 return result;
1656 }
1657 if((m_end == ++m_position)
1658 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1659 {
1660 fail(regex_constants::error_collate, name_first - m_base);
1661 return result;
1662 }
1663 ++m_position;
1664 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1665 if(s.empty() || (s.size() > 2))
1666 {
1667 fail(regex_constants::error_collate, name_first - m_base);
1668 return result;
1669 }
1670 result.first = s[0];
1671 if(s.size() > 1)
1672 result.second = s[1];
1673 else
1674 result.second = 0;
1675 return result;
1676 }
1677 default:
1678 result = *m_position++;
1679 }
1680 return result;
1681}
1682
1683//
1684// does a value fit in the specified charT type?
1685//
1686template <class charT>
1687bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1688{
1689 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1690}
1691template <class charT>
1692bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1693{
1694 return true; // v will alsways fit in a charT
1695}
1696template <class charT>
1697bool valid_value(charT c, boost::intmax_t v)
1698{
1699 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1700}
1701
1702template <class charT, class traits>
1703charT basic_regex_parser<charT, traits>::unescape_character()
1704{
1705#ifdef BOOST_MSVC
1706#pragma warning(push)
1707#pragma warning(disable:4127)
1708#endif
1709 charT result(0);
1710 if(m_position == m_end)
1711 {
1712 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1713 return false;
1714 }
1715 switch(this->m_traits.escape_syntax_type(*m_position))
1716 {
1717 case regex_constants::escape_type_control_a:
1718 result = charT('\a');
1719 break;
1720 case regex_constants::escape_type_e:
1721 result = charT(27);
1722 break;
1723 case regex_constants::escape_type_control_f:
1724 result = charT('\f');
1725 break;
1726 case regex_constants::escape_type_control_n:
1727 result = charT('\n');
1728 break;
1729 case regex_constants::escape_type_control_r:
1730 result = charT('\r');
1731 break;
1732 case regex_constants::escape_type_control_t:
1733 result = charT('\t');
1734 break;
1735 case regex_constants::escape_type_control_v:
1736 result = charT('\v');
1737 break;
1738 case regex_constants::escape_type_word_assert:
1739 result = charT('\b');
1740 break;
1741 case regex_constants::escape_type_ascii_control:
1742 ++m_position;
1743 if(m_position == m_end)
1744 {
1745 // Rewind to start of escape:
1746 --m_position;
1747 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1748 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1749 return result;
1750 }
1751 result = static_cast<charT>(*m_position % 32);
1752 break;
1753 case regex_constants::escape_type_hex:
1754 ++m_position;
1755 if(m_position == m_end)
1756 {
1757 // Rewind to start of escape:
1758 --m_position;
1759 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1760 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1761 return result;
1762 }
1763 // maybe have \x{ddd}
1764 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1765 {
1766 ++m_position;
1767 if(m_position == m_end)
1768 {
1769 // Rewind to start of escape:
1770 --m_position;
1771 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1772 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1773 return result;
1774 }
1775 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1776 if((m_position == m_end)
1777 || (i < 0)
1778 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1779 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1780 {
1781 // Rewind to start of escape:
1782 --m_position;
1783 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1784 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1785 return result;
1786 }
1787 ++m_position;
1788 result = charT(i);
1789 }
1790 else
1791 {
1792 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1793 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1794 if((i < 0)
1795 || !valid_value(charT(0), i))
1796 {
1797 // Rewind to start of escape:
1798 --m_position;
1799 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1800 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1801 return result;
1802 }
1803 result = charT(i);
1804 }
1805 return result;
1806 case regex_constants::syntax_digit:
1807 {
1808 // an octal escape sequence, the first character must be a zero
1809 // followed by up to 3 octal digits:
1810 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1811 const charT* bp = m_position;
1812 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1813 if(val != 0)
1814 {
1815 // Rewind to start of escape:
1816 --m_position;
1817 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1818 // Oops not an octal escape after all:
1819 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1820 return result;
1821 }
1822 val = this->m_traits.toi(m_position, m_position + len, 8);
1823 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1824 {
1825 // Rewind to start of escape:
1826 --m_position;
1827 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1828 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1829 return result;
1830 }
1831 return static_cast<charT>(val);
1832 }
1833 case regex_constants::escape_type_named_char:
1834 {
1835 ++m_position;
1836 if(m_position == m_end)
1837 {
1838 // Rewind to start of escape:
1839 --m_position;
1840 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1841 fail(regex_constants::error_escape, m_position - m_base);
1842 return false;
1843 }
1844 // maybe have \N{name}
1845 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1846 {
1847 const charT* base = m_position;
1848 // skip forward until we find enclosing brace:
1849 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1850 ++m_position;
1851 if(m_position == m_end)
1852 {
1853 // Rewind to start of escape:
1854 --m_position;
1855 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1856 fail(regex_constants::error_escape, m_position - m_base);
1857 return false;
1858 }
1859 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1860 if(s.empty())
1861 {
1862 // Rewind to start of escape:
1863 --m_position;
1864 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1865 fail(regex_constants::error_collate, m_position - m_base);
1866 return false;
1867 }
1868 if(s.size() == 1)
1869 {
1870 return s[0];
1871 }
1872 }
1873 // fall through is a failure:
1874 // Rewind to start of escape:
1875 --m_position;
1876 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1877 fail(regex_constants::error_escape, m_position - m_base);
1878 return false;
1879 }
1880 default:
1881 result = *m_position;
1882 break;
1883 }
1884 ++m_position;
1885 return result;
1886#ifdef BOOST_MSVC
1887#pragma warning(pop)
1888#endif
1889}
1890
1891template <class charT, class traits>
1892bool basic_regex_parser<charT, traits>::parse_backref()
1893{
1894 BOOST_ASSERT(m_position != m_end);
1895 const charT* pc = m_position;
1896 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1897 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1898 {
1899 // not a backref at all but an octal escape sequence:
1900 charT c = unescape_character();
1901 this->append_literal(c);
1902 }
1903 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1904 {
1905 m_position = pc;
1906 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1907 pb->index = i;
1908 pb->icase = this->flags() & regbase::icase;
1909 }
1910 else
1911 {
1912 // Rewind to start of escape:
1913 --m_position;
1914 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1915 fail(regex_constants::error_backref, m_position - m_base);
1916 return false;
1917 }
1918 return true;
1919}
1920
1921template <class charT, class traits>
1922bool basic_regex_parser<charT, traits>::parse_QE()
1923{
1924#ifdef BOOST_MSVC
1925#pragma warning(push)
1926#pragma warning(disable:4127)
1927#endif
1928 //
1929 // parse a \Q...\E sequence:
1930 //
1931 ++m_position; // skip the Q
1932 const charT* start = m_position;
1933 const charT* end;
1934 do
1935 {
1936 while((m_position != m_end)
1937 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1938 ++m_position;
1939 if(m_position == m_end)
1940 {
1941 // a \Q...\E sequence may terminate with the end of the expression:
1942 end = m_position;
1943 break;
1944 }
1945 if(++m_position == m_end) // skip the escape
1946 {
1947 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1948 return false;
1949 }
1950 // check to see if it's a \E:
1951 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1952 {
1953 ++m_position;
1954 end = m_position - 2;
1955 break;
1956 }
1957 // otherwise go round again:
1958 }while(true);
1959 //
1960 // now add all the character between the two escapes as literals:
1961 //
1962 while(start != end)
1963 {
1964 this->append_literal(*start);
1965 ++start;
1966 }
1967 return true;
1968#ifdef BOOST_MSVC
1969#pragma warning(pop)
1970#endif
1971}
1972
1973template <class charT, class traits>
1974bool basic_regex_parser<charT, traits>::parse_perl_extension()
1975{
1976 if(++m_position == m_end)
1977 {
1978 // Rewind to start of (? sequence:
1979 --m_position;
1980 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1981 fail(regex_constants::error_perl_extension, m_position - m_base);
1982 return false;
1983 }
1984 //
1985 // treat comments as a special case, as these
1986 // are the only ones that don't start with a leading
1987 // startmark state:
1988 //
1989 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1990 {
1991 while((m_position != m_end)
1992 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1993 {}
1994 return true;
1995 }
1996 //
1997 // backup some state, and prepare the way:
1998 //
1999 int markid = 0;
2000 std::ptrdiff_t jump_offset = 0;
2001 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2002 pb->icase = this->flags() & regbase::icase;
2003 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2004 // back up insertion point for alternations, and set new point:
2005 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2006 this->m_pdata->m_data.align();
2007 m_alt_insert_point = this->m_pdata->m_data.size();
2008 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2009 bool restore_flags = true;
2010 regex_constants::syntax_option_type old_flags = this->flags();
2011 bool old_case_change = m_has_case_change;
2012 m_has_case_change = false;
2013 charT name_delim;
2014 int mark_reset = m_mark_reset;
2015 int max_mark = m_max_mark;
2016 m_mark_reset = -1;
2017 m_max_mark = m_mark_count;
2018 boost::intmax_t v;
2019 //
2020 // select the actual extension used:
2021 //
2022 switch(this->m_traits.syntax_type(*m_position))
2023 {
2024 case regex_constants::syntax_or:
2025 m_mark_reset = m_mark_count;
2026 BOOST_FALLTHROUGH;
2027 case regex_constants::syntax_colon:
2028 //
2029 // a non-capturing mark:
2030 //
2031 pb->index = markid = 0;
2032 ++m_position;
2033 break;
2034 case regex_constants::syntax_digit:
2035 {
2036 //
2037 // a recursive subexpression:
2038 //
2039 v = this->m_traits.toi(m_position, m_end, 10);
2040 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2041 {
2042 // Rewind to start of (? sequence:
2043 --m_position;
2044 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2045 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2046 return false;
2047 }
2048insert_recursion:
2049 pb->index = markid = 0;
2050 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2051 pr->alt.i = v;
2052 pr->state_id = 0;
2053 static_cast<re_case*>(
2054 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2055 )->icase = this->flags() & regbase::icase;
2056 break;
2057 }
2058 case regex_constants::syntax_plus:
2059 //
2060 // A forward-relative recursive subexpression:
2061 //
2062 ++m_position;
2063 v = this->m_traits.toi(m_position, m_end, 10);
2064 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2065 {
2066 // Rewind to start of (? sequence:
2067 --m_position;
2068 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2069 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2070 return false;
2071 }
2072 v += m_mark_count;
2073 goto insert_recursion;
2074 case regex_constants::syntax_dash:
2075 //
2076 // Possibly a backward-relative recursive subexpression:
2077 //
2078 ++m_position;
2079 v = this->m_traits.toi(m_position, m_end, 10);
2080 if(v <= 0)
2081 {
2082 --m_position;
2083 // Oops not a relative recursion at all, but a (?-imsx) group:
2084 goto option_group_jump;
2085 }
2086 v = m_mark_count + 1 - v;
2087 if(v <= 0)
2088 {
2089 // Rewind to start of (? sequence:
2090 --m_position;
2091 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2092 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2093 return false;
2094 }
2095 goto insert_recursion;
2096 case regex_constants::syntax_equal:
2097 pb->index = markid = -1;
2098 ++m_position;
2099 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2100 this->m_pdata->m_data.align();
2101 m_alt_insert_point = this->m_pdata->m_data.size();
2102 break;
2103 case regex_constants::syntax_not:
2104 pb->index = markid = -2;
2105 ++m_position;
2106 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2107 this->m_pdata->m_data.align();
2108 m_alt_insert_point = this->m_pdata->m_data.size();
2109 break;
2110 case regex_constants::escape_type_left_word:
2111 {
2112 // a lookbehind assertion:
2113 if(++m_position == m_end)
2114 {
2115 // Rewind to start of (? sequence:
2116 --m_position;
2117 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2118 fail(regex_constants::error_perl_extension, m_position - m_base);
2119 return false;
2120 }
2121 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2122 if(t == regex_constants::syntax_not)
2123 pb->index = markid = -2;
2124 else if(t == regex_constants::syntax_equal)
2125 pb->index = markid = -1;
2126 else
2127 {
2128 // Probably a named capture which also starts (?< :
2129 name_delim = '>';
2130 --m_position;
2131 goto named_capture_jump;
2132 }
2133 ++m_position;
2134 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2135 this->append_state(syntax_element_backstep, sizeof(re_brace));
2136 this->m_pdata->m_data.align();
2137 m_alt_insert_point = this->m_pdata->m_data.size();
2138 break;
2139 }
2140 case regex_constants::escape_type_right_word:
2141 //
2142 // an independent sub-expression:
2143 //
2144 pb->index = markid = -3;
2145 ++m_position;
2146 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2147 this->m_pdata->m_data.align();
2148 m_alt_insert_point = this->m_pdata->m_data.size();
2149 break;
2150 case regex_constants::syntax_open_mark:
2151 {
2152 // a conditional expression:
2153 pb->index = markid = -4;
2154 if(++m_position == m_end)
2155 {
2156 // Rewind to start of (? sequence:
2157 --m_position;
2158 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2159 fail(regex_constants::error_perl_extension, m_position - m_base);
2160 return false;
2161 }
2162 v = this->m_traits.toi(m_position, m_end, 10);
2163 if(m_position == m_end)
2164 {
2165 // Rewind to start of (? sequence:
2166 --m_position;
2167 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2168 fail(regex_constants::error_perl_extension, m_position - m_base);
2169 return false;
2170 }
2171 if(*m_position == charT('R'))
2172 {
2173 if(++m_position == m_end)
2174 {
2175 // Rewind to start of (? sequence:
2176 --m_position;
2177 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2178 fail(regex_constants::error_perl_extension, m_position - m_base);
2179 return false;
2180 }
2181 if(*m_position == charT('&'))
2182 {
2183 const charT* base = ++m_position;
2184 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2185 ++m_position;
2186 if(m_position == m_end)
2187 {
2188 // Rewind to start of (? sequence:
2189 --m_position;
2190 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2191 fail(regex_constants::error_perl_extension, m_position - m_base);
2192 return false;
2193 }
2194 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2195 }
2196 else
2197 {
2198 v = -this->m_traits.toi(m_position, m_end, 10);
2199 }
2200 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2201 br->index = v < 0 ? (v - 1) : 0;
2202 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2203 {
2204 // Rewind to start of (? sequence:
2205 --m_position;
2206 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2207 fail(regex_constants::error_perl_extension, m_position - m_base);
2208 return false;
2209 }
2210 if(++m_position == m_end)
2211 {
2212 // Rewind to start of (? sequence:
2213 --m_position;
2214 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2215 fail(regex_constants::error_perl_extension, m_position - m_base);
2216 return false;
2217 }
2218 }
2219 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2220 {
2221 const charT* base = ++m_position;
2222 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2223 ++m_position;
2224 if(m_position == m_end)
2225 {
2226 // Rewind to start of (? sequence:
2227 --m_position;
2228 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2229 fail(regex_constants::error_perl_extension, m_position - m_base);
2230 return false;
2231 }
2232 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2233 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2234 br->index = v;
2235 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2236 {
2237 // Rewind to start of (? sequence:
2238 --m_position;
2239 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2240 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2241 return false;
2242 }
2243 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2244 {
2245 // Rewind to start of (? sequence:
2246 --m_position;
2247 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2248 fail(regex_constants::error_perl_extension, m_position - m_base);
2249 return false;
2250 }
2251 if(++m_position == m_end)
2252 {
2253 // Rewind to start of (? sequence:
2254 --m_position;
2255 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2256 fail(regex_constants::error_perl_extension, m_position - m_base);
2257 return false;
2258 }
2259 }
2260 else if(*m_position == charT('D'))
2261 {
2262 const char* def = "DEFINE";
2263 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2264 ++m_position, ++def;
2265 if((m_position == m_end) || *def)
2266 {
2267 // Rewind to start of (? sequence:
2268 --m_position;
2269 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2270 fail(regex_constants::error_perl_extension, m_position - m_base);
2271 return false;
2272 }
2273 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2274 br->index = 9999; // special magic value!
2275 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2276 {
2277 // Rewind to start of (? sequence:
2278 --m_position;
2279 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2280 fail(regex_constants::error_perl_extension, m_position - m_base);
2281 return false;
2282 }
2283 if(++m_position == m_end)
2284 {
2285 // Rewind to start of (? sequence:
2286 --m_position;
2287 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2288 fail(regex_constants::error_perl_extension, m_position - m_base);
2289 return false;
2290 }
2291 }
2292 else if(v > 0)
2293 {
2294 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2295 br->index = v;
2296 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2297 {
2298 // Rewind to start of (? sequence:
2299 --m_position;
2300 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2301 fail(regex_constants::error_perl_extension, m_position - m_base);
2302 return false;
2303 }
2304 if(++m_position == m_end)
2305 {
2306 // Rewind to start of (? sequence:
2307 --m_position;
2308 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2309 fail(regex_constants::error_perl_extension, m_position - m_base);
2310 return false;
2311 }
2312 }
2313 else
2314 {
2315 // verify that we have a lookahead or lookbehind assert:
2316 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2317 {
2318 // Rewind to start of (? sequence:
2319 --m_position;
2320 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2321 fail(regex_constants::error_perl_extension, m_position - m_base);
2322 return false;
2323 }
2324 if(++m_position == m_end)
2325 {
2326 // Rewind to start of (? sequence:
2327 --m_position;
2328 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2329 fail(regex_constants::error_perl_extension, m_position - m_base);
2330 return false;
2331 }
2332 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2333 {
2334 if(++m_position == m_end)
2335 {
2336 // Rewind to start of (? sequence:
2337 --m_position;
2338 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2339 fail(regex_constants::error_perl_extension, m_position - m_base);
2340 return false;
2341 }
2342 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2343 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2344 {
2345 // Rewind to start of (? sequence:
2346 --m_position;
2347 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2348 fail(regex_constants::error_perl_extension, m_position - m_base);
2349 return false;
2350 }
2351 m_position -= 3;
2352 }
2353 else
2354 {
2355 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2356 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2357 {
2358 // Rewind to start of (? sequence:
2359 --m_position;
2360 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2361 fail(regex_constants::error_perl_extension, m_position - m_base);
2362 return false;
2363 }
2364 m_position -= 2;
2365 }
2366 }
2367 break;
2368 }
2369 case regex_constants::syntax_close_mark:
2370 // Rewind to start of (? sequence:
2371 --m_position;
2372 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2373 fail(regex_constants::error_perl_extension, m_position - m_base);
2374 return false;
2375 case regex_constants::escape_type_end_buffer:
2376 {
2377 name_delim = *m_position;
2378named_capture_jump:
2379 markid = 0;
2380 if(0 == (this->flags() & regbase::nosubs))
2381 {
2382 markid = ++m_mark_count;
2383 #ifndef BOOST_NO_STD_DISTANCE
2384 if(this->flags() & regbase::save_subexpression_location)
2385 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2386 #else
2387 if(this->flags() & regbase::save_subexpression_location)
2388 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2389 #endif
2390 }
2391 pb->index = markid;
2392 const charT* base = ++m_position;
2393 if(m_position == m_end)
2394 {
2395 // Rewind to start of (? sequence:
2396 --m_position;
2397 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2398 fail(regex_constants::error_perl_extension, m_position - m_base);
2399 return false;
2400 }
2401 while((m_position != m_end) && (*m_position != name_delim))
2402 ++m_position;
2403 if(m_position == m_end)
2404 {
2405 // Rewind to start of (? sequence:
2406 --m_position;
2407 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2408 fail(regex_constants::error_perl_extension, m_position - m_base);
2409 return false;
2410 }
2411 this->m_pdata->set_name(base, m_position, markid);
2412 ++m_position;
2413 break;
2414 }
2415 default:
2416 if(*m_position == charT('R'))
2417 {
2418 ++m_position;
2419 v = 0;
2420 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2421 {
2422 // Rewind to start of (? sequence:
2423 --m_position;
2424 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2425 fail(regex_constants::error_perl_extension, m_position - m_base);
2426 return false;
2427 }
2428 goto insert_recursion;
2429 }
2430 if(*m_position == charT('&'))
2431 {
2432 ++m_position;
2433 const charT* base = m_position;
2434 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2435 ++m_position;
2436 if(m_position == m_end)
2437 {
2438 // Rewind to start of (? sequence:
2439 --m_position;
2440 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2441 fail(regex_constants::error_perl_extension, m_position - m_base);
2442 return false;
2443 }
2444 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2445 goto insert_recursion;
2446 }
2447 if(*m_position == charT('P'))
2448 {
2449 ++m_position;
2450 if(m_position == m_end)
2451 {
2452 // Rewind to start of (? sequence:
2453 --m_position;
2454 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2455 fail(regex_constants::error_perl_extension, m_position - m_base);
2456 return false;
2457 }
2458 if(*m_position == charT('>'))
2459 {
2460 ++m_position;
2461 const charT* base = m_position;
2462 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2463 ++m_position;
2464 if(m_position == m_end)
2465 {
2466 // Rewind to start of (? sequence:
2467 --m_position;
2468 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2469 fail(regex_constants::error_perl_extension, m_position - m_base);
2470 return false;
2471 }
2472 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2473 goto insert_recursion;
2474 }
2475 }
2476 //
2477 // lets assume that we have a (?imsx) group and try and parse it:
2478 //
2479option_group_jump:
2480 regex_constants::syntax_option_type opts = parse_options();
2481 if(m_position == m_end)
2482 {
2483 // Rewind to start of (? sequence:
2484 --m_position;
2485 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2486 fail(regex_constants::error_perl_extension, m_position - m_base);
2487 return false;
2488 }
2489 // make a note of whether we have a case change:
2490 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2491 pb->index = markid = 0;
2492 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2493 {
2494 // update flags and carry on as normal:
2495 this->flags(opts);
2496 restore_flags = false;
2497 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2498 }
2499 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2500 {
2501 // update flags and carry on until the matching ')' is found:
2502 this->flags(opts);
2503 ++m_position;
2504 }
2505 else
2506 {
2507 // Rewind to start of (? sequence:
2508 --m_position;
2509 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2510 fail(regex_constants::error_perl_extension, m_position - m_base);
2511 return false;
2512 }
2513
2514 // finally append a case change state if we need it:
2515 if(m_has_case_change)
2516 {
2517 static_cast<re_case*>(
2518 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2519 )->icase = opts & regbase::icase;
2520 }
2521
2522 }
2523 //
2524 // now recursively add more states, this will terminate when we get to a
2525 // matching ')' :
2526 //
2527 parse_all();
2528 //
2529 // Unwind alternatives:
2530 //
2531 if(0 == unwind_alts(last_paren_start))
2532 {
2533 // Rewind to start of (? sequence:
2534 --m_position;
2535 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2536 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2537 return false;
2538 }
2539 //
2540 // we either have a ')' or we have run out of characters prematurely:
2541 //
2542 if(m_position == m_end)
2543 {
2544 // Rewind to start of (? sequence:
2545 --m_position;
2546 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2547 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2548 return false;
2549 }
2550 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2551 ++m_position;
2552 //
2553 // restore the flags:
2554 //
2555 if(restore_flags)
2556 {
2557 // append a case change state if we need it:
2558 if(m_has_case_change)
2559 {
2560 static_cast<re_case*>(
2561 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2562 )->icase = old_flags & regbase::icase;
2563 }
2564 this->flags(old_flags);
2565 }
2566 //
2567 // set up the jump pointer if we have one:
2568 //
2569 if(jump_offset)
2570 {
2571 this->m_pdata->m_data.align();
2572 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2573 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2574 if((this->m_last_state == jmp) && (markid != -2))
2575 {
2576 // Oops... we didn't have anything inside the assertion.
2577 // Note we don't get here for negated forward lookahead as (?!)
2578 // does have some uses.
2579 // Rewind to start of (? sequence:
2580 --m_position;
2581 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2582 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2583 return false;
2584 }
2585 }
2586 //
2587 // verify that if this is conditional expression, that we do have
2588 // an alternative, if not add one:
2589 //
2590 if(markid == -4)
2591 {
2592 re_syntax_base* b = this->getaddress(expected_alt_point);
2593 // Make sure we have exactly one alternative following this state:
2594 if(b->type != syntax_element_alt)
2595 {
2596 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2597 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2598 }
2599 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2600 {
2601 // Can't have seen more than one alternative:
2602 // Rewind to start of (? sequence:
2603 --m_position;
2604 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2605 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2606 return false;
2607 }
2608 else
2609 {
2610 // We must *not* have seen an alternative inside a (DEFINE) block:
2611 b = this->getaddress(b->next.i, b);
2612 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2613 {
2614 // Rewind to start of (? sequence:
2615 --m_position;
2616 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2617 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2618 return false;
2619 }
2620 }
2621 // check for invalid repetition of next state:
2622 b = this->getaddress(expected_alt_point);
2623 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2624 if((b->type != syntax_element_assert_backref)
2625 && (b->type != syntax_element_startmark))
2626 {
2627 // Rewind to start of (? sequence:
2628 --m_position;
2629 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2630 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2631 return false;
2632 }
2633 }
2634 //
2635 // append closing parenthesis state:
2636 //
2637 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2638 pb->index = markid;
2639 pb->icase = this->flags() & regbase::icase;
2640 this->m_paren_start = last_paren_start;
2641 //
2642 // restore the alternate insertion point:
2643 //
2644 this->m_alt_insert_point = last_alt_point;
2645 //
2646 // and the case change data:
2647 //
2648 m_has_case_change = old_case_change;
2649 //
2650 // And the mark_reset data:
2651 //
2652 if(m_max_mark > m_mark_count)
2653 {
2654 m_mark_count = m_max_mark;
2655 }
2656 m_mark_reset = mark_reset;
2657 m_max_mark = max_mark;
2658
2659
2660 if(markid > 0)
2661 {
2662#ifndef BOOST_NO_STD_DISTANCE
2663 if(this->flags() & regbase::save_subexpression_location)
2664 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2665#else
2666 if(this->flags() & regbase::save_subexpression_location)
2667 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2668#endif
2669 //
2670 // allow backrefs to this mark:
2671 //
2672 if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
2673 this->m_backrefs |= 1u << (markid - 1);
2674 }
2675 return true;
2676}
2677
2678template <class charT, class traits>
2679bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2680{
2681 while(*verb)
2682 {
2683 if(static_cast<charT>(*verb) != *m_position)
2684 {
2685 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2686 fail(regex_constants::error_perl_extension, m_position - m_base);
2687 return false;
2688 }
2689 if(++m_position == m_end)
2690 {
2691 --m_position;
2692 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2693 fail(regex_constants::error_perl_extension, m_position - m_base);
2694 return false;
2695 }
2696 ++verb;
2697 }
2698 return true;
2699}
2700
2701template <class charT, class traits>
2702bool basic_regex_parser<charT, traits>::parse_perl_verb()
2703{
2704 if(++m_position == m_end)
2705 {
2706 // Rewind to start of (* sequence:
2707 --m_position;
2708 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2709 fail(regex_constants::error_perl_extension, m_position - m_base);
2710 return false;
2711 }
2712 switch(*m_position)
2713 {
2714 case 'F':
2715 if(++m_position == m_end)
2716 {
2717 // Rewind to start of (* sequence:
2718 --m_position;
2719 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2720 fail(regex_constants::error_perl_extension, m_position - m_base);
2721 return false;
2722 }
2723 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2724 {
2725 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2726 {
2727 // Rewind to start of (* sequence:
2728 --m_position;
2729 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2730 fail(regex_constants::error_perl_extension, m_position - m_base);
2731 return false;
2732 }
2733 ++m_position;
2734 this->append_state(syntax_element_fail);
2735 return true;
2736 }
2737 break;
2738 case 'A':
2739 if(++m_position == m_end)
2740 {
2741 // Rewind to start of (* sequence:
2742 --m_position;
2743 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2744 fail(regex_constants::error_perl_extension, m_position - m_base);
2745 return false;
2746 }
2747 if(match_verb("CCEPT"))
2748 {
2749 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2750 {
2751 // Rewind to start of (* sequence:
2752 --m_position;
2753 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2754 fail(regex_constants::error_perl_extension, m_position - m_base);
2755 return false;
2756 }
2757 ++m_position;
2758 this->append_state(syntax_element_accept);
2759 return true;
2760 }
2761 break;
2762 case 'C':
2763 if(++m_position == m_end)
2764 {
2765 // Rewind to start of (* sequence:
2766 --m_position;
2767 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2768 fail(regex_constants::error_perl_extension, m_position - m_base);
2769 return false;
2770 }
2771 if(match_verb("OMMIT"))
2772 {
2773 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2774 {
2775 // Rewind to start of (* sequence:
2776 --m_position;
2777 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2778 fail(regex_constants::error_perl_extension, m_position - m_base);
2779 return false;
2780 }
2781 ++m_position;
2782 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2783 this->m_pdata->m_disable_match_any = true;
2784 return true;
2785 }
2786 break;
2787 case 'P':
2788 if(++m_position == m_end)
2789 {
2790 // Rewind to start of (* sequence:
2791 --m_position;
2792 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2793 fail(regex_constants::error_perl_extension, m_position - m_base);
2794 return false;
2795 }
2796 if(match_verb("RUNE"))
2797 {
2798 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2799 {
2800 // Rewind to start of (* sequence:
2801 --m_position;
2802 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2803 fail(regex_constants::error_perl_extension, m_position - m_base);
2804 return false;
2805 }
2806 ++m_position;
2807 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2808 this->m_pdata->m_disable_match_any = true;
2809 return true;
2810 }
2811 break;
2812 case 'S':
2813 if(++m_position == m_end)
2814 {
2815 // Rewind to start of (* sequence:
2816 --m_position;
2817 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2818 fail(regex_constants::error_perl_extension, m_position - m_base);
2819 return false;
2820 }
2821 if(match_verb("KIP"))
2822 {
2823 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2824 {
2825 // Rewind to start of (* sequence:
2826 --m_position;
2827 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2828 fail(regex_constants::error_perl_extension, m_position - m_base);
2829 return false;
2830 }
2831 ++m_position;
2832 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2833 this->m_pdata->m_disable_match_any = true;
2834 return true;
2835 }
2836 break;
2837 case 'T':
2838 if(++m_position == m_end)
2839 {
2840 // Rewind to start of (* sequence:
2841 --m_position;
2842 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2843 fail(regex_constants::error_perl_extension, m_position - m_base);
2844 return false;
2845 }
2846 if(match_verb("HEN"))
2847 {
2848 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2849 {
2850 // Rewind to start of (* sequence:
2851 --m_position;
2852 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2853 fail(regex_constants::error_perl_extension, m_position - m_base);
2854 return false;
2855 }
2856 ++m_position;
2857 this->append_state(syntax_element_then);
2858 this->m_pdata->m_disable_match_any = true;
2859 return true;
2860 }
2861 break;
2862 }
2863 return false;
2864}
2865
2866template <class charT, class traits>
2867bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2868{
2869 //
2870 // parses an emacs style \sx or \Sx construct.
2871 //
2872 if(++m_position == m_end)
2873 {
2874 // Rewind to start of sequence:
2875 --m_position;
2876 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2877 fail(regex_constants::error_escape, m_position - m_base);
2878 return false;
2879 }
2880 basic_char_set<charT, traits> char_set;
2881 if(negate)
2882 char_set.negate();
2883
2884 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2885
2886 switch(*m_position)
2887 {
2888 case 's':
2889 case ' ':
2890 char_set.add_class(this->m_mask_space);
2891 break;
2892 case 'w':
2893 char_set.add_class(this->m_word_mask);
2894 break;
2895 case '_':
2896 char_set.add_single(digraph<charT>(charT('$')));
2897 char_set.add_single(digraph<charT>(charT('&')));
2898 char_set.add_single(digraph<charT>(charT('*')));
2899 char_set.add_single(digraph<charT>(charT('+')));
2900 char_set.add_single(digraph<charT>(charT('-')));
2901 char_set.add_single(digraph<charT>(charT('_')));
2902 char_set.add_single(digraph<charT>(charT('<')));
2903 char_set.add_single(digraph<charT>(charT('>')));
2904 break;
2905 case '.':
2906 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2907 break;
2908 case '(':
2909 char_set.add_single(digraph<charT>(charT('(')));
2910 char_set.add_single(digraph<charT>(charT('[')));
2911 char_set.add_single(digraph<charT>(charT('{')));
2912 break;
2913 case ')':
2914 char_set.add_single(digraph<charT>(charT(')')));
2915 char_set.add_single(digraph<charT>(charT(']')));
2916 char_set.add_single(digraph<charT>(charT('}')));
2917 break;
2918 case '"':
2919 char_set.add_single(digraph<charT>(charT('"')));
2920 char_set.add_single(digraph<charT>(charT('\'')));
2921 char_set.add_single(digraph<charT>(charT('`')));
2922 break;
2923 case '\'':
2924 char_set.add_single(digraph<charT>(charT('\'')));
2925 char_set.add_single(digraph<charT>(charT(',')));
2926 char_set.add_single(digraph<charT>(charT('#')));
2927 break;
2928 case '<':
2929 char_set.add_single(digraph<charT>(charT(';')));
2930 break;
2931 case '>':
2932 char_set.add_single(digraph<charT>(charT('\n')));
2933 char_set.add_single(digraph<charT>(charT('\f')));
2934 break;
2935 default:
2936 fail(regex_constants::error_ctype, m_position - m_base);
2937 return false;
2938 }
2939 if(0 == this->append_set(char_set))
2940 {
2941 fail(regex_constants::error_ctype, m_position - m_base);
2942 return false;
2943 }
2944 ++m_position;
2945 return true;
2946}
2947
2948template <class charT, class traits>
2949regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2950{
2951 // we have a (?imsx-imsx) group, convert it into a set of flags:
2952 regex_constants::syntax_option_type f = this->flags();
2953 bool breakout = false;
2954 do
2955 {
2956 switch(*m_position)
2957 {
2958 case 's':
2959 f |= regex_constants::mod_s;
2960 f &= ~regex_constants::no_mod_s;
2961 break;
2962 case 'm':
2963 f &= ~regex_constants::no_mod_m;
2964 break;
2965 case 'i':
2966 f |= regex_constants::icase;
2967 break;
2968 case 'x':
2969 f |= regex_constants::mod_x;
2970 break;
2971 default:
2972 breakout = true;
2973 continue;
2974 }
2975 if(++m_position == m_end)
2976 {
2977 // Rewind to start of (? sequence:
2978 --m_position;
2979 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2980 fail(regex_constants::error_paren, m_position - m_base);
2981 return false;
2982 }
2983 }
2984 while(!breakout);
2985
2986 breakout = false;
2987
2988 if(*m_position == static_cast<charT>('-'))
2989 {
2990 if(++m_position == m_end)
2991 {
2992 // Rewind to start of (? sequence:
2993 --m_position;
2994 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2995 fail(regex_constants::error_paren, m_position - m_base);
2996 return false;
2997 }
2998 do
2999 {
3000 switch(*m_position)
3001 {
3002 case 's':
3003 f &= ~regex_constants::mod_s;
3004 f |= regex_constants::no_mod_s;
3005 break;
3006 case 'm':
3007 f |= regex_constants::no_mod_m;
3008 break;
3009 case 'i':
3010 f &= ~regex_constants::icase;
3011 break;
3012 case 'x':
3013 f &= ~regex_constants::mod_x;
3014 break;
3015 default:
3016 breakout = true;
3017 continue;
3018 }
3019 if(++m_position == m_end)
3020 {
3021 // Rewind to start of (? sequence:
3022 --m_position;
3023 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3024 fail(regex_constants::error_paren, m_position - m_base);
3025 return false;
3026 }
3027 }
3028 while(!breakout);
3029 }
3030 return f;
3031}
3032
3033template <class charT, class traits>
3034bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3035{
3036 //
3037 // If we didn't actually add any states after the last
3038 // alternative then that's an error:
3039 //
3040 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3041 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3042 &&
3043 !(
3044 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3045 &&
3046 ((this->flags() & regbase::no_empty_expressions) == 0)
3047 )
3048 )
3049 {
3050 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3051 return false;
3052 }
3053 //
3054 // Fix up our alternatives:
3055 //
3056 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3057 {
3058 //
3059 // fix up the jump to point to the end of the states
3060 // that we've just added:
3061 //
3062 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3063 m_alt_jumps.pop_back();
3064 this->m_pdata->m_data.align();
3065 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3066 BOOST_ASSERT(jmp->type == syntax_element_jump);
3067 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3068 }
3069 return true;
3070}
3071
3072#ifdef BOOST_MSVC
3073#pragma warning(pop)
3074#endif
3075
3076} // namespace BOOST_REGEX_DETAIL_NS
3077} // namespace boost
3078
3079#ifdef BOOST_MSVC
3080#pragma warning(push)
3081#pragma warning(disable: 4103)
3082#endif
3083#ifdef BOOST_HAS_ABI_HEADERS
3084# include BOOST_ABI_SUFFIX
3085#endif
3086#ifdef BOOST_MSVC
3087#pragma warning(pop)
3088#endif
3089
3090#endif