]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/spirit/include/boost/spirit/home/support/detail/lexer/parser/parser.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / spirit / include / boost / spirit / home / support / detail / lexer / parser / parser.hpp
1 // parser.hpp
2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_PARSER_HPP
7 #define BOOST_LEXER_PARSER_HPP
8
9 #include <boost/assert.hpp>
10 #include "tree/end_node.hpp"
11 #include "tree/iteration_node.hpp"
12 #include "tree/leaf_node.hpp"
13 #include "../runtime_error.hpp"
14 #include "tree/selection_node.hpp"
15 #include "tree/sequence_node.hpp"
16 #include "../size_t.hpp"
17 #include "tokeniser/re_tokeniser.hpp"
18
19 namespace boost
20 {
21 namespace lexer
22 {
23 namespace detail
24 {
25 template<typename CharT>
26 class basic_parser
27 {
28 public:
29 typedef basic_re_tokeniser<CharT> tokeniser;
30 typedef typename tokeniser::string string;
31 typedef std::map<string, const node *> macro_map;
32 typedef node::node_ptr_vector node_ptr_vector;
33 typedef typename tokeniser::num_token token;
34
35 /*
36 General principles of regex parsing:
37 - Every regex is a sequence of sub-regexes.
38 - Regexes consist of operands and operators
39 - All operators decompose to sequence, selection ('|') and iteration ('*')
40 - Regex tokens are stored on the stack.
41 - When a complete sequence of regex tokens is on the stack it is processed.
42
43 Grammar:
44
45 <REGEX> -> <OREXP>
46 <OREXP> -> <SEQUENCE> | <OREXP>'|'<SEQUENCE>
47 <SEQUENCE> -> <SUB>
48 <SUB> -> <EXPRESSION> | <SUB><EXPRESSION>
49 <EXPRESSION> -> <REPEAT>
50 <REPEAT> -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE>
51 <DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}'
52 */
53 static node *parse (const CharT *start_, const CharT * const end_,
54 const std::size_t id_, const std::size_t unique_id_,
55 const std::size_t dfa_state_, const regex_flags flags_,
56 const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
57 const macro_map &macromap_, typename tokeniser::token_map &map_,
58 bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
59 {
60 node *root_ = 0;
61 state state_ (start_, end_, flags_, locale_);
62 token lhs_token_;
63 token rhs_token_;
64 token_stack token_stack_;
65 tree_node_stack tree_node_stack_;
66 char action_ = 0;
67
68 token_stack_.push (rhs_token_);
69 tokeniser::next (state_, map_, rhs_token_);
70
71 do
72 {
73 lhs_token_ = token_stack_.top ();
74 action_ = lhs_token_.precedence (rhs_token_._type);
75
76 switch (action_)
77 {
78 case '<':
79 case '=':
80 token_stack_.push (rhs_token_);
81 tokeniser::next (state_, map_, rhs_token_);
82 break;
83 case '>':
84 reduce (token_stack_, macromap_, node_ptr_vector_,
85 tree_node_stack_);
86 break;
87 default:
88 std::ostringstream ss_;
89
90 ss_ << "A syntax error occurred: '" <<
91 lhs_token_.precedence_string () <<
92 "' against '" << rhs_token_.precedence_string () <<
93 "' at index " << state_.index () << ".";
94 throw runtime_error (ss_.str ().c_str ());
95 break;
96 }
97 } while (!token_stack_.empty ());
98
99 if (tree_node_stack_.empty ())
100 {
101 throw runtime_error ("Empty rules are not allowed.");
102 }
103
104 BOOST_ASSERT(tree_node_stack_.size () == 1);
105
106 node *lhs_node_ = tree_node_stack_.top ();
107
108 tree_node_stack_.pop ();
109
110 if (id_ == 0)
111 {
112 // Macros have no end state...
113 root_ = lhs_node_;
114 }
115 else
116 {
117 node_ptr_vector_->push_back (static_cast<end_node *>(0));
118
119 node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_);
120
121 node_ptr_vector_->back () = rhs_node_;
122 node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
123 node_ptr_vector_->back () = new sequence_node
124 (lhs_node_, rhs_node_);
125 root_ = node_ptr_vector_->back ();
126 }
127
128 // Done this way as bug in VC++ 6 prevents |= operator working
129 // properly!
130 if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true;
131
132 if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true;
133
134 return root_;
135 }
136
137 private:
138 typedef typename tokeniser::state state;
139 typedef std::stack<token> token_stack;
140 typedef node::node_stack tree_node_stack;
141
142 static void reduce (token_stack &token_stack_,
143 const macro_map &macromap_, node_ptr_vector &node_vector_ptr_,
144 tree_node_stack &tree_node_stack_)
145 {
146 typename tokeniser::num_token lhs_;
147 typename tokeniser::num_token rhs_;
148 token_stack handle_;
149 char action_ = 0;
150
151 do
152 {
153 rhs_ = token_stack_.top ();
154 token_stack_.pop ();
155 handle_.push (rhs_);
156
157 if (!token_stack_.empty ())
158 {
159 lhs_ = token_stack_.top ();
160 action_ = lhs_.precedence (rhs_._type);
161 }
162 } while (!token_stack_.empty () && action_ == '=');
163
164 BOOST_ASSERT(token_stack_.empty () || action_ == '<');
165
166 switch (rhs_._type)
167 {
168 case token::BEGIN:
169 // finished processing so exit
170 break;
171 case token::REGEX:
172 // finished parsing, nothing to do
173 break;
174 case token::OREXP:
175 orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
176 break;
177 case token::SEQUENCE:
178 token_stack_.push (token::OREXP);
179 break;
180 case token::SUB:
181 sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
182 break;
183 case token::EXPRESSION:
184 token_stack_.push (token::SUB);
185 break;
186 case token::REPEAT:
187 repeat (handle_, token_stack_);
188 break;
189 case token::CHARSET:
190 charset (handle_, token_stack_, node_vector_ptr_,
191 tree_node_stack_);
192 break;
193 case token::MACRO:
194 macro (handle_, token_stack_, macromap_, node_vector_ptr_,
195 tree_node_stack_);
196 break;
197 case token::OPENPAREN:
198 openparen (handle_, token_stack_);
199 break;
200 case token::OPT:
201 case token::AOPT:
202 optional (rhs_._type == token::OPT, node_vector_ptr_,
203 tree_node_stack_);
204 token_stack_.push (token::DUP);
205 break;
206 case token::ZEROORMORE:
207 case token::AZEROORMORE:
208 zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_,
209 tree_node_stack_);
210 token_stack_.push (token::DUP);
211 break;
212 case token::ONEORMORE:
213 case token::AONEORMORE:
214 one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_,
215 tree_node_stack_);
216 token_stack_.push (token::DUP);
217 break;
218 case token::REPEATN:
219 case token::AREPEATN:
220 repeatn (rhs_._type == token::REPEATN, handle_.top (),
221 node_vector_ptr_, tree_node_stack_);
222 token_stack_.push (token::DUP);
223 break;
224 default:
225 throw runtime_error
226 ("Internal error regex_parser::reduce");
227 break;
228 }
229 }
230
231 static void orexp (token_stack &handle_, token_stack &token_stack_,
232 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
233 {
234 BOOST_ASSERT(handle_.top ()._type == token::OREXP &&
235 (handle_.size () == 1 || handle_.size () == 3));
236
237 if (handle_.size () == 1)
238 {
239 token_stack_.push (token::REGEX);
240 }
241 else
242 {
243 handle_.pop ();
244 BOOST_ASSERT(handle_.top ()._type == token::OR);
245 handle_.pop ();
246 BOOST_ASSERT(handle_.top ()._type == token::SEQUENCE);
247 perform_or (node_ptr_vector_, tree_node_stack_);
248 token_stack_.push (token::OREXP);
249 }
250 }
251
252 static void sub (token_stack &handle_, token_stack &token_stack_,
253 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
254 {
255 BOOST_ASSERT(handle_.top ()._type == token::SUB &&
256 (handle_.size () == 1 || handle_.size () == 2));
257
258 if (handle_.size () == 1)
259 {
260 token_stack_.push (token::SEQUENCE);
261 }
262 else
263 {
264 handle_.pop ();
265 BOOST_ASSERT(handle_.top ()._type == token::EXPRESSION);
266 // perform join
267 sequence (node_ptr_vector_, tree_node_stack_);
268 token_stack_.push (token::SUB);
269 }
270 }
271
272 static void repeat (token_stack &handle_, token_stack &token_stack_)
273 {
274 BOOST_ASSERT(handle_.top ()._type == token::REPEAT &&
275 handle_.size () >= 1 && handle_.size () <= 3);
276
277 if (handle_.size () == 1)
278 {
279 token_stack_.push (token::EXPRESSION);
280 }
281 else
282 {
283 handle_.pop ();
284 BOOST_ASSERT(handle_.top ()._type == token::DUP);
285 token_stack_.push (token::REPEAT);
286 }
287 }
288
289 static void charset (token_stack &handle_, token_stack &token_stack_,
290 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
291 {
292 BOOST_ASSERT(handle_.top ()._type == token::CHARSET &&
293 handle_.size () == 1);
294 // store charset
295 node_ptr_vector_->push_back (static_cast<leaf_node *>(0));
296
297 const size_t id_ = handle_.top ()._id;
298
299 node_ptr_vector_->back () = new leaf_node (id_, true);
300 tree_node_stack_.push (node_ptr_vector_->back ());
301 token_stack_.push (token::REPEAT);
302 }
303
304 static void macro (token_stack &handle_, token_stack &token_stack_,
305 const macro_map &macromap_, node_ptr_vector &node_ptr_vector_,
306 tree_node_stack &tree_node_stack_)
307 {
308 token &top_ = handle_.top ();
309
310 BOOST_ASSERT(top_._type == token::MACRO && handle_.size () == 1);
311
312 typename macro_map::const_iterator iter_ =
313 macromap_.find (top_._macro);
314
315 if (iter_ == macromap_.end ())
316 {
317 const CharT *name_ = top_._macro;
318 std::basic_stringstream<CharT> ss_;
319 std::ostringstream os_;
320
321 os_ << "Unknown MACRO name '";
322
323 while (*name_)
324 {
325 os_ << ss_.narrow (*name_++, ' ');
326 }
327
328 os_ << "'.";
329 throw runtime_error (os_.str ());
330 }
331
332 tree_node_stack_.push (iter_->second->copy (node_ptr_vector_));
333 token_stack_.push (token::REPEAT);
334 }
335
336 static void openparen (token_stack &handle_, token_stack &token_stack_)
337 {
338 BOOST_ASSERT(handle_.top ()._type == token::OPENPAREN &&
339 handle_.size () == 3);
340 handle_.pop ();
341 BOOST_ASSERT(handle_.top ()._type == token::REGEX);
342 handle_.pop ();
343 BOOST_ASSERT(handle_.top ()._type == token::CLOSEPAREN);
344 token_stack_.push (token::REPEAT);
345 }
346
347 static void perform_or (node_ptr_vector &node_ptr_vector_,
348 tree_node_stack &tree_node_stack_)
349 {
350 // perform or
351 node *rhs_ = tree_node_stack_.top ();
352
353 tree_node_stack_.pop ();
354
355 node *lhs_ = tree_node_stack_.top ();
356
357 node_ptr_vector_->push_back (static_cast<selection_node *>(0));
358 node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
359 tree_node_stack_.top () = node_ptr_vector_->back ();
360 }
361
362 static void sequence (node_ptr_vector &node_ptr_vector_,
363 tree_node_stack &tree_node_stack_)
364 {
365 node *rhs_ = tree_node_stack_.top ();
366
367 tree_node_stack_.pop ();
368
369 node *lhs_ = tree_node_stack_.top ();
370
371 node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
372 node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
373 tree_node_stack_.top () = node_ptr_vector_->back ();
374 }
375
376 static void optional (const bool greedy_,
377 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
378 {
379 // perform ?
380 node *lhs_ = tree_node_stack_.top ();
381 // You don't know if lhs_ is a leaf_node, so get firstpos.
382 node::node_vector &firstpos_ = lhs_->firstpos ();
383
384 for (node::node_vector::iterator iter_ = firstpos_.begin (),
385 end_ = firstpos_.end (); iter_ != end_; ++iter_)
386 {
387 // These are leaf_nodes!
388 (*iter_)->greedy (greedy_);
389 }
390
391 node_ptr_vector_->push_back (static_cast<leaf_node *>(0));
392
393 node *rhs_ = new leaf_node (null_token, greedy_);
394
395 node_ptr_vector_->back () = rhs_;
396 node_ptr_vector_->push_back (static_cast<selection_node *>(0));
397 node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
398 tree_node_stack_.top () = node_ptr_vector_->back ();
399 }
400
401 static void zero_or_more (const bool greedy_,
402 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
403 {
404 // perform *
405 node *ptr_ = tree_node_stack_.top ();
406
407 node_ptr_vector_->push_back (static_cast<iteration_node *>(0));
408 node_ptr_vector_->back () = new iteration_node (ptr_, greedy_);
409 tree_node_stack_.top () = node_ptr_vector_->back ();
410 }
411
412 static void one_or_more (const bool greedy_,
413 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
414 {
415 // perform +
416 node *lhs_ = tree_node_stack_.top ();
417 node *copy_ = lhs_->copy (node_ptr_vector_);
418
419 node_ptr_vector_->push_back (static_cast<iteration_node *>(0));
420
421 node *rhs_ = new iteration_node (copy_, greedy_);
422
423 node_ptr_vector_->back () = rhs_;
424 node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
425 node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
426 tree_node_stack_.top () = node_ptr_vector_->back ();
427 }
428
429 // This is one of the most mind bending routines in this code...
430 static void repeatn (const bool greedy_, const token &token_,
431 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
432 {
433 // perform {n[,[m]]}
434 // Semantic checks have already been performed.
435 // {0,} = *
436 // {0,1} = ?
437 // {1,} = +
438 // therefore we do not check for these cases.
439 if (!(token_._min == 1 && !token_._comma))
440 {
441 const std::size_t top_ = token_._min > 0 ?
442 token_._min : token_._max;
443
444 if (token_._min == 0)
445 {
446 optional (greedy_, node_ptr_vector_, tree_node_stack_);
447 }
448
449 node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_);
450 node *curr_ = 0;
451
452 for (std::size_t i_ = 2; i_ < top_; ++i_)
453 {
454 curr_ = prev_->copy (node_ptr_vector_);
455 tree_node_stack_.push (static_cast<node *>(0));
456 tree_node_stack_.top () = prev_;
457 sequence (node_ptr_vector_, tree_node_stack_);
458 prev_ = curr_;
459 }
460
461 if (token_._comma && token_._min > 0)
462 {
463 if (token_._min > 1)
464 {
465 curr_ = prev_->copy (node_ptr_vector_);
466 tree_node_stack_.push (static_cast<node *>(0));
467 tree_node_stack_.top () = prev_;
468 sequence (node_ptr_vector_, tree_node_stack_);
469 prev_ = curr_;
470 }
471
472 if (token_._comma && token_._max)
473 {
474 tree_node_stack_.push (static_cast<node *>(0));
475 tree_node_stack_.top () = prev_;
476 optional (greedy_, node_ptr_vector_, tree_node_stack_);
477 prev_ = tree_node_stack_.top ();
478 tree_node_stack_.pop ();
479
480 const std::size_t count_ = token_._max - token_._min;
481
482 for (std::size_t i_ = 1; i_ < count_; ++i_)
483 {
484 curr_ = prev_->copy (node_ptr_vector_);
485 tree_node_stack_.push (static_cast<node *>(0));
486 tree_node_stack_.top () = prev_;
487 sequence (node_ptr_vector_, tree_node_stack_);
488 prev_ = curr_;
489 }
490 }
491 else
492 {
493 tree_node_stack_.push (static_cast<node *>(0));
494 tree_node_stack_.top () = prev_;
495 zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_);
496 prev_ = tree_node_stack_.top ();
497 tree_node_stack_.pop ();
498 }
499 }
500
501 tree_node_stack_.push (static_cast<node *>(0));
502 tree_node_stack_.top () = prev_;
503 sequence (node_ptr_vector_, tree_node_stack_);
504 }
505 }
506 };
507 }
508 }
509 }
510
511 #endif