]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/regex/v4/perl_matcher.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / regex / v4 / perl_matcher.hpp
CommitLineData
7c673cae
FG
1/*
2 *
3 * Copyright (c) 2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12#ifndef BOOST_REGEX_MATCHER_HPP
13#define BOOST_REGEX_MATCHER_HPP
14
15#include <boost/regex/v4/iterator_category.hpp>
16
17#ifdef BOOST_MSVC
18#pragma warning(push)
19#pragma warning(disable: 4103)
20#endif
21#ifdef BOOST_HAS_ABI_HEADERS
22# include BOOST_ABI_PREFIX
23#endif
24#ifdef BOOST_MSVC
25#pragma warning(pop)
26#endif
27
28#ifdef BOOST_MSVC
29# pragma warning(push)
92f5a8d4
TL
30#if BOOST_MSVC < 1910
31#pragma warning(disable:4800)
32#endif
7c673cae
FG
33#endif
34
35namespace boost{
36namespace BOOST_REGEX_DETAIL_NS{
37
38//
39// error checking API:
40//
41BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf);
42//
43// function can_start:
44//
45template <class charT>
46inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
47{
48 return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
49}
50inline bool can_start(char c, const unsigned char* map, unsigned char mask)
51{
52 return map[(unsigned char)c] & mask;
53}
54inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
55{
56 return map[(unsigned char)c] & mask;
57}
58inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
59{
60 return map[c] & mask;
61}
62inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
63{
64 return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
65}
66#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives.
67#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
68inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
69{
70 return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
71}
72#endif
73#endif
74#if !defined(BOOST_NO_INTRINSIC_WCHAR_T)
75inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
76{
77 return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
78}
79#endif
80
81
82//
83// Unfortunately Rogue Waves standard library appears to have a bug
84// in std::basic_string::compare that results in eroneous answers
85// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
86// 0x020101) the test case was:
87// {39135,0} < {0xff,0}
88// which succeeds when it should not.
89//
90#ifndef _RWSTD_VER
91template <class C, class T, class A>
92inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
93{
94 if(0 == *p)
95 {
96 if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
97 return 0;
98 }
99 return s.compare(p);
100}
101#else
102template <class C, class T, class A>
103inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
104{
105 if(0 == *p)
106 {
107 if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
108 return 0;
109 }
110 return s.compare(p);
111}
112inline int string_compare(const std::string& s, const char* p)
113{ return std::strcmp(s.c_str(), p); }
114# ifndef BOOST_NO_WREGEX
115inline int string_compare(const std::wstring& s, const wchar_t* p)
116{ return std::wcscmp(s.c_str(), p); }
117#endif
118#endif
119template <class Seq, class C>
120inline int string_compare(const Seq& s, const C* p)
121{
122 std::size_t i = 0;
123 while((i < s.size()) && (p[i] == s[i]))
124 {
125 ++i;
126 }
b32b8144 127 return (i == s.size()) ? -(int)p[i] : (int)s[i] - (int)p[i];
7c673cae
FG
128}
129# define STR_COMP(s,p) string_compare(s,p)
130
131template<class charT>
132inline const charT* re_skip_past_null(const charT* p)
133{
134 while (*p != static_cast<charT>(0)) ++p;
135 return ++p;
136}
137
138template <class iterator, class charT, class traits_type, class char_classT>
139iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
140 iterator last,
141 const re_set_long<char_classT>* set_,
142 const regex_data<charT, traits_type>& e, bool icase)
143{
144 const charT* p = reinterpret_cast<const charT*>(set_+1);
145 iterator ptr;
146 unsigned int i;
147 //bool icase = e.m_flags & regex_constants::icase;
148
149 if(next == last) return next;
150
151 typedef typename traits_type::string_type traits_string_type;
152 const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
153
154 // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
155 // referenced
156 (void)traits_inst;
157
158 // try and match a single character, could be a multi-character
159 // collating element...
160 for(i = 0; i < set_->csingles; ++i)
161 {
162 ptr = next;
163 if(*p == static_cast<charT>(0))
164 {
165 // treat null string as special case:
b32b8144 166 if(traits_inst.translate(*ptr, icase))
7c673cae 167 {
b32b8144 168 ++p;
7c673cae
FG
169 continue;
170 }
171 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
172 }
173 else
174 {
175 while(*p && (ptr != last))
176 {
177 if(traits_inst.translate(*ptr, icase) != *p)
178 break;
179 ++p;
180 ++ptr;
181 }
182
183 if(*p == static_cast<charT>(0)) // if null we've matched
184 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
185
186 p = re_skip_past_null(p); // skip null
187 }
188 }
189
190 charT col = traits_inst.translate(*next, icase);
191
192
193 if(set_->cranges || set_->cequivalents)
194 {
195 traits_string_type s1;
196 //
197 // try and match a range, NB only a single character can match
198 if(set_->cranges)
199 {
200 if((e.m_flags & regex_constants::collate) == 0)
201 s1.assign(1, col);
202 else
203 {
204 charT a[2] = { col, charT(0), };
205 s1 = traits_inst.transform(a, a + 1);
206 }
207 for(i = 0; i < set_->cranges; ++i)
208 {
209 if(STR_COMP(s1, p) >= 0)
210 {
211 do{ ++p; }while(*p);
212 ++p;
213 if(STR_COMP(s1, p) <= 0)
214 return set_->isnot ? next : ++next;
215 }
216 else
217 {
218 // skip first string
219 do{ ++p; }while(*p);
220 ++p;
221 }
222 // skip second string
223 do{ ++p; }while(*p);
224 ++p;
225 }
226 }
227 //
228 // try and match an equivalence class, NB only a single character can match
229 if(set_->cequivalents)
230 {
231 charT a[2] = { col, charT(0), };
232 s1 = traits_inst.transform_primary(a, a +1);
233 for(i = 0; i < set_->cequivalents; ++i)
234 {
235 if(STR_COMP(s1, p) == 0)
236 return set_->isnot ? next : ++next;
237 // skip string
238 do{ ++p; }while(*p);
239 ++p;
240 }
241 }
242 }
243 if(traits_inst.isctype(col, set_->cclasses) == true)
244 return set_->isnot ? next : ++next;
245 if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
246 return set_->isnot ? next : ++next;
247 return set_->isnot ? ++next : next;
248}
249
250template <class BidiIterator>
251class repeater_count
252{
253 repeater_count** stack;
254 repeater_count* next;
255 int state_id;
256 std::size_t count; // the number of iterations so far
257 BidiIterator start_pos; // where the last repeat started
258
259 repeater_count* unwind_until(int n, repeater_count* p, int current_recursion_id)
260 {
261 while(p && (p->state_id != n))
262 {
263 if(-2 - current_recursion_id == p->state_id)
264 return 0;
265 p = p->next;
266 if(p && (p->state_id < 0))
267 {
268 p = unwind_until(p->state_id, p, current_recursion_id);
269 if(!p)
270 return p;
271 p = p->next;
272 }
273 }
274 return p;
275 }
276public:
277 repeater_count(repeater_count** s) : stack(s), next(0), state_id(-1), count(0), start_pos() {}
278
279 repeater_count(int i, repeater_count** s, BidiIterator start, int current_recursion_id)
280 : start_pos(start)
281 {
282 state_id = i;
283 stack = s;
284 next = *stack;
285 *stack = this;
286 if((state_id > next->state_id) && (next->state_id >= 0))
287 count = 0;
288 else
289 {
290 repeater_count* p = next;
291 p = unwind_until(state_id, p, current_recursion_id);
292 if(p)
293 {
294 count = p->count;
295 start_pos = p->start_pos;
296 }
297 else
298 count = 0;
299 }
300 }
301 ~repeater_count()
302 {
303 if(next)
304 *stack = next;
305 }
306 std::size_t get_count() { return count; }
307 int get_id() { return state_id; }
308 std::size_t operator++() { return ++count; }
309 bool check_null_repeat(const BidiIterator& pos, std::size_t max)
310 {
311 // this is called when we are about to start a new repeat,
312 // if the last one was NULL move our count to max,
313 // otherwise save the current position.
314 bool result = (count == 0) ? false : (pos == start_pos);
315 if(result)
316 count = max;
317 else
318 start_pos = pos;
319 return result;
320 }
321};
322
323struct saved_state;
324
325enum saved_state_type
326{
327 saved_type_end = 0,
328 saved_type_paren = 1,
329 saved_type_recurse = 2,
330 saved_type_assertion = 3,
331 saved_state_alt = 4,
332 saved_state_repeater_count = 5,
333 saved_state_extra_block = 6,
334 saved_state_greedy_single_repeat = 7,
335 saved_state_rep_slow_dot = 8,
336 saved_state_rep_fast_dot = 9,
337 saved_state_rep_char = 10,
338 saved_state_rep_short_set = 11,
339 saved_state_rep_long_set = 12,
340 saved_state_non_greedy_long_repeat = 13,
341 saved_state_count = 14
342};
343
344template <class Results>
345struct recursion_info
346{
347 typedef typename Results::value_type value_type;
348 typedef typename value_type::iterator iterator;
349 int idx;
350 const re_syntax_base* preturn_address;
351 Results results;
352 repeater_count<iterator>* repeater_stack;
b32b8144 353 iterator location_of_start;
7c673cae
FG
354};
355
356#ifdef BOOST_MSVC
357#pragma warning(push)
92f5a8d4
TL
358#pragma warning(disable : 4251)
359#if BOOST_MSVC < 1700
360# pragma warning(disable : 4231)
361#endif
7c673cae
FG
362# if BOOST_MSVC < 1600
363# pragma warning(disable : 4660)
364# endif
365#endif
366
367template <class BidiIterator, class Allocator, class traits>
368class perl_matcher
369{
370public:
371 typedef typename traits::char_type char_type;
372 typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
373 typedef bool (self_type::*matcher_proc_type)(void);
374 typedef std::size_t traits_size_type;
375 typedef typename is_byte<char_type>::width_type width_type;
376 typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
377 typedef match_results<BidiIterator, Allocator> results_type;
378
379 perl_matcher(BidiIterator first, BidiIterator end,
380 match_results<BidiIterator, Allocator>& what,
381 const basic_regex<char_type, traits>& e,
382 match_flag_type f,
383 BidiIterator l_base)
384 : m_result(what), base(first), last(end),
385 position(first), backstop(l_base), re(e), traits_inst(e.get_traits()),
386 m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
b32b8144
FG
387#ifdef BOOST_REGEX_NON_RECURSIVE
388 , m_recursions(0)
389#endif
7c673cae
FG
390 {
391 construct_init(e, f);
392 }
393
394 bool match();
395 bool find();
396
397 void setf(match_flag_type f)
398 { m_match_flags |= f; }
399 void unsetf(match_flag_type f)
400 { m_match_flags &= ~f; }
401
402private:
403 void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
404
405 bool find_imp();
406 bool match_imp();
407#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
408 typedef bool (perl_matcher::*protected_proc_type)();
409 bool protected_call(protected_proc_type);
410#endif
411 void estimate_max_state_count(std::random_access_iterator_tag*);
412 void estimate_max_state_count(void*);
413 bool match_prefix();
414 bool match_all_states();
415
416 // match procs, stored in s_match_vtable:
417 bool match_startmark();
418 bool match_endmark();
419 bool match_literal();
420 bool match_start_line();
421 bool match_end_line();
422 bool match_wild();
423 bool match_match();
424 bool match_word_boundary();
425 bool match_within_word();
426 bool match_word_start();
427 bool match_word_end();
428 bool match_buffer_start();
429 bool match_buffer_end();
430 bool match_backref();
431 bool match_long_set();
432 bool match_set();
433 bool match_jump();
434 bool match_alt();
435 bool match_rep();
436 bool match_combining();
437 bool match_soft_buffer_end();
438 bool match_restart_continue();
439 bool match_long_set_repeat();
440 bool match_set_repeat();
441 bool match_char_repeat();
442 bool match_dot_repeat_fast();
443 bool match_dot_repeat_slow();
444 bool match_dot_repeat_dispatch()
445 {
446 return ::boost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow();
447 }
448 bool match_backstep();
449 bool match_assert_backref();
450 bool match_toggle_case();
451#ifdef BOOST_REGEX_RECURSIVE
452 bool backtrack_till_match(std::size_t count);
453#endif
454 bool match_recursion();
455 bool match_fail();
456 bool match_accept();
457 bool match_commit();
458 bool match_then();
459 bool skip_until_paren(int index, bool match = true);
460
461 // find procs stored in s_find_vtable:
462 bool find_restart_any();
463 bool find_restart_word();
464 bool find_restart_line();
465 bool find_restart_buf();
466 bool find_restart_lit();
467
468private:
469 // final result structure to be filled in:
470 match_results<BidiIterator, Allocator>& m_result;
471 // temporary result for POSIX matches:
472 scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match;
473 // pointer to actual result structure to fill in:
474 match_results<BidiIterator, Allocator>* m_presult;
475 // start of sequence being searched:
476 BidiIterator base;
477 // end of sequence being searched:
478 BidiIterator last;
479 // current character being examined:
480 BidiIterator position;
481 // where to restart next search after failed match attempt:
482 BidiIterator restart;
483 // where the current search started from, acts as base for $` during grep:
484 BidiIterator search_base;
485 // how far we can go back when matching lookbehind:
486 BidiIterator backstop;
487 // the expression being examined:
488 const basic_regex<char_type, traits>& re;
489 // the expression's traits class:
490 const ::boost::regex_traits_wrapper<traits>& traits_inst;
491 // the next state in the machine being matched:
492 const re_syntax_base* pstate;
493 // matching flags in use:
494 match_flag_type m_match_flags;
495 // how many states we have examined so far:
496 std::ptrdiff_t state_count;
497 // max number of states to examine before giving up:
498 std::ptrdiff_t max_state_count;
499 // whether we should ignore case or not:
500 bool icase;
501 // set to true when (position == last), indicates that we may have a partial match:
502 bool m_has_partial_match;
503 // set to true whenever we get a match:
504 bool m_has_found_match;
505 // set to true whenever we're inside an independent sub-expression:
506 bool m_independent;
507 // the current repeat being examined:
508 repeater_count<BidiIterator>* next_count;
509 // the first repeat being examined (top of linked list):
510 repeater_count<BidiIterator> rep_obj;
511 // the mask to pass when matching word boundaries:
512 typename traits::char_class_type m_word_mask;
513 // the bitmask to use when determining whether a match_any matches a newline or not:
514 unsigned char match_any_mask;
515 // recursion information:
516 std::vector<recursion_info<results_type> > recursion_stack;
517#ifdef BOOST_REGEX_RECURSIVE
518 // Set to false by a (*COMMIT):
519 bool m_can_backtrack;
520 bool m_have_accept;
521 bool m_have_then;
522#endif
523#ifdef BOOST_REGEX_NON_RECURSIVE
524 //
525 // additional members for non-recursive version:
526 //
527 typedef bool (self_type::*unwind_proc_type)(bool);
528
529 void extend_stack();
530 bool unwind(bool);
531 bool unwind_end(bool);
532 bool unwind_paren(bool);
533 bool unwind_recursion_stopper(bool);
534 bool unwind_assertion(bool);
535 bool unwind_alt(bool);
536 bool unwind_repeater_counter(bool);
537 bool unwind_extra_block(bool);
538 bool unwind_greedy_single_repeat(bool);
539 bool unwind_slow_dot_repeat(bool);
540 bool unwind_fast_dot_repeat(bool);
541 bool unwind_char_repeat(bool);
542 bool unwind_short_set_repeat(bool);
543 bool unwind_long_set_repeat(bool);
544 bool unwind_non_greedy_repeat(bool);
545 bool unwind_recursion(bool);
546 bool unwind_recursion_pop(bool);
547 bool unwind_commit(bool);
548 bool unwind_then(bool);
549 bool unwind_case(bool);
550 void destroy_single_repeat();
551 void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
552 void push_recursion_stopper();
553 void push_assertion(const re_syntax_base* ps, bool positive);
554 void push_alt(const re_syntax_base* ps);
555 void push_repeater_count(int i, repeater_count<BidiIterator>** s);
556 void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
557 void push_non_greedy_repeat(const re_syntax_base* ps);
b32b8144 558 void push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2);
7c673cae
FG
559 void push_recursion_pop();
560 void push_case_change(bool);
561
562 // pointer to base of stack:
563 saved_state* m_stack_base;
564 // pointer to current stack position:
565 saved_state* m_backup_state;
566 // how many memory blocks have we used up?:
567 unsigned used_block_count;
568 // determines what value to return when unwinding from recursion,
569 // allows for mixed recursive/non-recursive algorithm:
570 bool m_recursive_result;
571 // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP:
572 bool m_unwound_lookahead;
573 // We have unwound to an alternative, used by THEN:
574 bool m_unwound_alt;
575 // We are unwinding a commit - used by independent subs to determine whether to stop there or carry on unwinding:
576 //bool m_unwind_commit;
b32b8144
FG
577 // Recursion limit:
578 unsigned m_recursions;
7c673cae
FG
579#endif
580
581 // these operations aren't allowed, so are declared private,
582 // bodies are provided to keep explicit-instantiation requests happy:
583 perl_matcher& operator=(const perl_matcher&)
584 {
585 return *this;
586 }
587 perl_matcher(const perl_matcher& that)
588 : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
589};
590
591#ifdef BOOST_MSVC
592#pragma warning(pop)
593#endif
594
595} // namespace BOOST_REGEX_DETAIL_NS
596
597#ifdef BOOST_MSVC
598#pragma warning(push)
599#pragma warning(disable: 4103)
600#endif
601#ifdef BOOST_HAS_ABI_HEADERS
602# include BOOST_ABI_SUFFIX
603#endif
604#ifdef BOOST_MSVC
605#pragma warning(pop)
606#endif
607
608} // namespace boost
609
610#ifdef BOOST_MSVC
611# pragma warning(pop)
612#endif
613
614//
615// include the implementation of perl_matcher:
616//
617#ifdef BOOST_REGEX_RECURSIVE
618#include <boost/regex/v4/perl_matcher_recursive.hpp>
619#else
620#include <boost/regex/v4/perl_matcher_non_recursive.hpp>
621#endif
622// this one has to be last:
623#include <boost/regex/v4/perl_matcher_common.hpp>
624
625#endif
626