]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/xpressive/detail/dynamic/parser_traits.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / xpressive / detail / dynamic / parser_traits.hpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // detail/dynamic/parser_traits.hpp
3 //
4 // Copyright 2008 Eric Niebler. Distributed under the Boost
5 // Software License, Version 1.0. (See accompanying file
6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
9 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
10
11 // MS compatible compilers support #pragma once
12 #if defined(_MSC_VER)
13 # pragma once
14 #endif
15
16 #include <string>
17 #include <climits>
18 #include <boost/config.hpp>
19 #include <boost/assert.hpp>
20 #include <boost/throw_exception.hpp>
21 #include <boost/xpressive/regex_error.hpp>
22 #include <boost/xpressive/regex_traits.hpp>
23 #include <boost/xpressive/detail/detail_fwd.hpp>
24 #include <boost/xpressive/detail/dynamic/matchable.hpp>
25 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
26 #include <boost/xpressive/detail/utility/literals.hpp>
27 #include <boost/xpressive/detail/utility/algorithm.hpp>
28
29 namespace boost { namespace xpressive
30 {
31
32 ///////////////////////////////////////////////////////////////////////////////
33 // compiler_traits
34 // this works for char and wchar_t. it must be specialized for anything else.
35 //
36 template<typename RegexTraits>
37 struct compiler_traits
38 {
39 typedef RegexTraits regex_traits;
40 typedef typename regex_traits::char_type char_type;
41 typedef typename regex_traits::string_type string_type;
42 typedef typename regex_traits::locale_type locale_type;
43
44 ///////////////////////////////////////////////////////////////////////////////
45 // constructor
46 explicit compiler_traits(RegexTraits const &traits = RegexTraits())
47 : traits_(traits)
48 , flags_(regex_constants::ECMAScript)
49 , space_(lookup_classname(traits_, "space"))
50 , alnum_(lookup_classname(traits_, "alnum"))
51 {
52 }
53
54 ///////////////////////////////////////////////////////////////////////////////
55 // flags
56 regex_constants::syntax_option_type flags() const
57 {
58 return this->flags_;
59 }
60
61 ///////////////////////////////////////////////////////////////////////////////
62 // flags
63 void flags(regex_constants::syntax_option_type flags)
64 {
65 this->flags_ = flags;
66 }
67
68 ///////////////////////////////////////////////////////////////////////////////
69 // traits
70 regex_traits &traits()
71 {
72 return this->traits_;
73 }
74
75 regex_traits const &traits() const
76 {
77 return this->traits_;
78 }
79
80 ///////////////////////////////////////////////////////////////////////////////
81 // imbue
82 locale_type imbue(locale_type const &loc)
83 {
84 locale_type oldloc = this->traits().imbue(loc);
85 this->space_ = lookup_classname(this->traits(), "space");
86 this->alnum_ = lookup_classname(this->traits(), "alnum");
87 return oldloc;
88 }
89
90 ///////////////////////////////////////////////////////////////////////////////
91 // getloc
92 locale_type getloc() const
93 {
94 return this->traits().getloc();
95 }
96
97 ///////////////////////////////////////////////////////////////////////////////
98 // get_token
99 // get a token and advance the iterator
100 template<typename FwdIter>
101 regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end)
102 {
103 using namespace regex_constants;
104 if(this->eat_ws_(begin, end) == end)
105 {
106 return regex_constants::token_end_of_pattern;
107 }
108
109 switch(*begin)
110 {
111 case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
112 case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
113 case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
114 case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
115 case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
116 case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
117 case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
118 case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
119
120 case BOOST_XPR_CHAR_(char_type, '*'):
121 case BOOST_XPR_CHAR_(char_type, '+'):
122 case BOOST_XPR_CHAR_(char_type, '?'):
123 return token_invalid_quantifier;
124
125 case BOOST_XPR_CHAR_(char_type, ']'):
126 case BOOST_XPR_CHAR_(char_type, '{'):
127 default:
128 return token_literal;
129 }
130 }
131
132 ///////////////////////////////////////////////////////////////////////////////
133 // get_quant_spec
134 template<typename FwdIter>
135 bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec)
136 {
137 using namespace regex_constants;
138 FwdIter old_begin;
139
140 if(this->eat_ws_(begin, end) == end)
141 {
142 return false;
143 }
144
145 switch(*begin)
146 {
147 case BOOST_XPR_CHAR_(char_type, '*'):
148 spec.min_ = 0;
149 spec.max_ = (std::numeric_limits<unsigned int>::max)();
150 break;
151
152 case BOOST_XPR_CHAR_(char_type, '+'):
153 spec.min_ = 1;
154 spec.max_ = (std::numeric_limits<unsigned int>::max)();
155 break;
156
157 case BOOST_XPR_CHAR_(char_type, '?'):
158 spec.min_ = 0;
159 spec.max_ = 1;
160 break;
161
162 case BOOST_XPR_CHAR_(char_type, '{'):
163 old_begin = this->eat_ws_(++begin, end);
164 spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
165 BOOST_XPR_ENSURE_
166 (
167 begin != old_begin && begin != end, error_brace, "invalid quantifier"
168 );
169
170 if(*begin == BOOST_XPR_CHAR_(char_type, ','))
171 {
172 old_begin = this->eat_ws_(++begin, end);
173 spec.max_ = detail::toi(begin, end, this->traits());
174 BOOST_XPR_ENSURE_
175 (
176 begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
177 , error_brace, "invalid quantifier"
178 );
179
180 if(begin == old_begin)
181 {
182 spec.max_ = (std::numeric_limits<unsigned int>::max)();
183 }
184 else
185 {
186 BOOST_XPR_ENSURE_
187 (
188 spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
189 );
190 }
191 }
192 else
193 {
194 BOOST_XPR_ENSURE_
195 (
196 BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
197 );
198 }
199 break;
200
201 default:
202 return false;
203 }
204
205 spec.greedy_ = true;
206 if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
207 {
208 ++begin;
209 spec.greedy_ = false;
210 }
211
212 return true;
213 }
214
215 ///////////////////////////////////////////////////////////////////////////
216 // get_group_type
217 template<typename FwdIter>
218 regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name)
219 {
220 using namespace regex_constants;
221 if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
222 {
223 this->eat_ws_(++begin, end);
224 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
225
226 switch(*begin)
227 {
228 case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
229 case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
230 case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
231 case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
232 case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
233 case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse;
234 case BOOST_XPR_CHAR_(char_type, '$'):
235 this->get_name_(++begin, end, name);
236 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
237 if(BOOST_XPR_CHAR_(char_type, '=') == *begin)
238 {
239 ++begin;
240 return token_rule_assign;
241 }
242 return token_rule_ref;
243
244 case BOOST_XPR_CHAR_(char_type, '<'):
245 this->eat_ws_(++begin, end);
246 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
247 switch(*begin)
248 {
249 case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
250 case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
251 default:
252 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
253 }
254
255 case BOOST_XPR_CHAR_(char_type, 'P'):
256 this->eat_ws_(++begin, end);
257 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
258 switch(*begin)
259 {
260 case BOOST_XPR_CHAR_(char_type, '<'):
261 this->get_name_(++begin, end, name);
262 BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension");
263 return token_named_mark;
264 case BOOST_XPR_CHAR_(char_type, '='):
265 this->get_name_(++begin, end, name);
266 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
267 return token_named_mark_ref;
268 default:
269 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
270 }
271
272 case BOOST_XPR_CHAR_(char_type, 'i'):
273 case BOOST_XPR_CHAR_(char_type, 'm'):
274 case BOOST_XPR_CHAR_(char_type, 's'):
275 case BOOST_XPR_CHAR_(char_type, 'x'):
276 case BOOST_XPR_CHAR_(char_type, '-'):
277 return this->parse_mods_(begin, end);
278
279 default:
280 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
281 }
282 }
283
284 return token_literal;
285 }
286
287 //////////////////////////////////////////////////////////////////////////
288 // get_charset_token
289 // NOTE: white-space is *never* ignored in a charset.
290 template<typename FwdIter>
291 regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end)
292 {
293 using namespace regex_constants;
294 BOOST_ASSERT(begin != end);
295 switch(*begin)
296 {
297 case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
298 case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
299 case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
300 case BOOST_XPR_CHAR_(char_type, '['):
301 {
302 FwdIter next = begin; ++next;
303 if(next != end)
304 {
305 BOOST_XPR_ENSURE_(
306 *next != BOOST_XPR_CHAR_(char_type, '=')
307 , error_collate
308 , "equivalence classes are not yet supported"
309 );
310
311 BOOST_XPR_ENSURE_(
312 *next != BOOST_XPR_CHAR_(char_type, '.')
313 , error_collate
314 , "collation sequences are not yet supported"
315 );
316
317 if(*next == BOOST_XPR_CHAR_(char_type, ':'))
318 {
319 begin = ++next;
320 return token_posix_charset_begin;
321 }
322 }
323 }
324 break;
325 case BOOST_XPR_CHAR_(char_type, ':'):
326 {
327 FwdIter next = begin; ++next;
328 if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
329 {
330 begin = ++next;
331 return token_posix_charset_end;
332 }
333 }
334 break;
335 case BOOST_XPR_CHAR_(char_type, '\\'):
336 if(++begin != end)
337 {
338 switch(*begin)
339 {
340 case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
341 default:;
342 }
343 }
344 return token_escape;
345 default:;
346 }
347 return token_literal;
348 }
349
350 //////////////////////////////////////////////////////////////////////////
351 // get_escape_token
352 template<typename FwdIter>
353 regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end)
354 {
355 using namespace regex_constants;
356 if(begin != end)
357 {
358 switch(*begin)
359 {
360 //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell;
361 //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control;
362 //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape;
363 //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed;
364 //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline;
365 //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab;
366 //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab;
367 case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
368 case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
369 case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
370 case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
371 case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
372 case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
373 // Non-standard extension to ECMAScript syntax
374 case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
375 case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
376 default:; // fall-through
377 }
378 }
379
380 return token_escape;
381 }
382
383 private:
384
385 //////////////////////////////////////////////////////////////////////////
386 // parse_mods_
387 template<typename FwdIter>
388 regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end)
389 {
390 using namespace regex_constants;
391 bool set = true;
392 do switch(*begin)
393 {
394 case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
395 case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
396 case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
397 case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
398 case BOOST_XPR_CHAR_(char_type, ':'): ++begin; BOOST_FALLTHROUGH;
399 case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
400 case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; BOOST_FALLTHROUGH;
401 default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier"));
402 }
403 while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension"));
404 // this return is technically unreachable, but this must
405 // be here to work around a bug in gcc 4.0
406 return token_no_mark;
407 }
408
409 ///////////////////////////////////////////////////////////////////////////////
410 // flag_
411 void flag_(bool set, regex_constants::syntax_option_type flag)
412 {
413 this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag);
414 }
415
416 ///////////////////////////////////////////////////////////////////////////
417 // is_space_
418 bool is_space_(char_type ch) const
419 {
420 return 0 != this->space_ && this->traits().isctype(ch, this->space_);
421 }
422
423 ///////////////////////////////////////////////////////////////////////////
424 // is_alnum_
425 bool is_alnum_(char_type ch) const
426 {
427 return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_);
428 }
429
430 ///////////////////////////////////////////////////////////////////////////
431 // get_name_
432 template<typename FwdIter>
433 void get_name_(FwdIter &begin, FwdIter end, string_type &name)
434 {
435 this->eat_ws_(begin, end);
436 for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin)
437 {
438 name.push_back(*begin);
439 }
440 this->eat_ws_(begin, end);
441 BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension");
442 }
443
444 ///////////////////////////////////////////////////////////////////////////////
445 // eat_ws_
446 template<typename FwdIter>
447 FwdIter &eat_ws_(FwdIter &begin, FwdIter end)
448 {
449 if(0 != (regex_constants::ignore_white_space & this->flags()))
450 {
451 while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin)))
452 {
453 if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
454 {
455 while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
456 }
457 else
458 {
459 for(; end != begin && this->is_space_(*begin); ++begin) {}
460 }
461 }
462 }
463
464 return begin;
465 }
466
467 regex_traits traits_;
468 regex_constants::syntax_option_type flags_;
469 typename regex_traits::char_class_type space_;
470 typename regex_traits::char_class_type alnum_;
471 };
472
473 }} // namespace boost::xpressive
474
475 #endif