]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/spirit/home/support/detail/lexer/rules.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / spirit / home / support / detail / lexer / rules.hpp
1 // rules.hpp
2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_RULES_HPP
7 #define BOOST_LEXER_RULES_HPP
8
9 #include "consts.hpp"
10 #include <deque>
11 #include <locale>
12 #include <map>
13 #include "runtime_error.hpp"
14 #include <set>
15 #include "size_t.hpp"
16 #include <sstream>
17 #include <string>
18 #include <vector>
19
20 namespace boost
21 {
22 namespace lexer
23 {
24 namespace detail
25 {
26 // return name of initial state
27 template <typename CharT>
28 struct strings;
29
30 template <>
31 struct strings<char>
32 {
33 static const char *initial ()
34 {
35 return "INITIAL";
36 }
37
38 static const char *dot ()
39 {
40 return ".";
41 }
42
43 static const char *all_states ()
44 {
45 return "*";
46 }
47
48 static const char *char_name ()
49 {
50 return "char";
51 }
52
53 static const char *char_prefix ()
54 {
55 return "";
56 }
57 };
58
59 template <>
60 struct strings<wchar_t>
61 {
62 static const wchar_t *initial ()
63 {
64 return L"INITIAL";
65 }
66
67 static const wchar_t *dot ()
68 {
69 return L".";
70 }
71
72 static const wchar_t *all_states ()
73 {
74 return L"*";
75 }
76
77 static const char *char_name ()
78 {
79 return "wchar_t";
80 }
81
82 static const char *char_prefix ()
83 {
84 return "L";
85 }
86 };
87 }
88
89 template<typename CharT>
90 class basic_rules
91 {
92 public:
93 typedef std::vector<std::size_t> id_vector;
94 typedef std::deque<id_vector> id_vector_deque;
95 typedef std::basic_string<CharT> string;
96 typedef std::deque<string> string_deque;
97 typedef std::deque<string_deque> string_deque_deque;
98 typedef std::set<string> string_set;
99 typedef std::pair<string, string> string_pair;
100 typedef std::deque<string_pair> string_pair_deque;
101 typedef std::map<string, std::size_t> string_size_t_map;
102 typedef std::pair<string, std::size_t> string_size_t_pair;
103
104 basic_rules (const regex_flags flags_ = dot_not_newline,
105 std::size_t (*counter_ptr_) () = 0) :
106 _flags (flags_),
107 _counter (0),
108 _counter_ptr (counter_ptr_)
109 {
110 add_state (initial ());
111 }
112
113 void clear ()
114 {
115 _statemap.clear ();
116 _macrodeque.clear ();
117 _macroset.clear ();
118 _regexes.clear ();
119 _ids.clear ();
120 _unique_ids.clear ();
121 _states.clear ();
122 _flags = dot_not_newline;
123 _locale = std::locale ();
124 add_state (initial ());
125 }
126
127 void clear (const CharT *state_name_)
128 {
129 std::size_t state_ = state (state_name_);
130
131 if (state_ != npos)
132 {
133 _regexes[state_].clear ();
134 _ids[state_].clear ();
135 _unique_ids[state_].clear ();
136 _states[state_].clear ();
137 }
138 }
139
140 void flags (const regex_flags flags_)
141 {
142 _flags = flags_;
143 }
144
145 regex_flags flags () const
146 {
147 return _flags;
148 }
149
150 std::size_t next_unique_id ()
151 {
152 return _counter_ptr ? _counter_ptr () : _counter++;
153 }
154
155 std::locale imbue (std::locale &locale_)
156 {
157 std::locale loc_ = _locale;
158
159 _locale = locale_;
160 return loc_;
161 }
162
163 const std::locale &locale () const
164 {
165 return _locale;
166 }
167
168 std::size_t state (const CharT *name_) const
169 {
170 std::size_t state_ = npos;
171 typename string_size_t_map::const_iterator iter_ =
172 _statemap.find (name_);
173
174 if (iter_ != _statemap.end ())
175 {
176 state_ = iter_->second;
177 }
178
179 return state_;
180 }
181
182 const CharT *state (const std::size_t index_) const
183 {
184 if (index_ == 0)
185 {
186 return initial ();
187 }
188 else
189 {
190 const std::size_t vec_index_ = index_ - 1;
191
192 if (vec_index_ > _lexer_state_names.size () - 1)
193 {
194 return 0;
195 }
196 else
197 {
198 return _lexer_state_names[vec_index_].c_str ();
199 }
200 }
201 }
202
203 std::size_t add_state (const CharT *name_)
204 {
205 validate (name_);
206
207 if (_statemap.insert (string_size_t_pair (name_,
208 _statemap.size ())).second)
209 {
210 _regexes.push_back (string_deque ());
211 _ids.push_back (id_vector ());
212 _unique_ids.push_back (id_vector ());
213 _states.push_back (id_vector ());
214
215 if (string (name_) != initial ())
216 {
217 _lexer_state_names.push_back (name_);
218 }
219 }
220
221 // Initial is not stored, so no need to - 1.
222 return _lexer_state_names.size ();
223 }
224
225 void add_macro (const CharT *name_, const CharT *regex_)
226 {
227 add_macro (name_, string (regex_));
228 }
229
230 void add_macro (const CharT *name_, const CharT *regex_start_,
231 const CharT *regex_end_)
232 {
233 add_macro (name_, string (regex_start_, regex_end_));
234 }
235
236 void add_macro (const CharT *name_, const string &regex_)
237 {
238 validate (name_);
239
240 typename string_set::const_iterator iter_ = _macroset.find (name_);
241
242 if (iter_ == _macroset.end ())
243 {
244 _macrodeque.push_back (string_pair (name_, regex_));
245 _macroset.insert (name_);
246 }
247 else
248 {
249 std::basic_stringstream<CharT> ss_;
250 std::ostringstream os_;
251
252 os_ << "Attempt to redefine MACRO '";
253
254 while (*name_)
255 {
256 os_ << ss_.narrow (*name_++, static_cast<CharT> (' '));
257 }
258
259 os_ << "'.";
260 throw runtime_error (os_.str ());
261 }
262 }
263
264 void add_macros (const basic_rules<CharT> &rules_)
265 {
266 const string_pair_deque &macros_ = rules_.macrodeque ();
267 typename string_pair_deque::const_iterator macro_iter_ =
268 macros_.begin ();
269 typename string_pair_deque::const_iterator macro_end_ =
270 macros_.end ();
271
272 for (; macro_iter_ != macro_end_; ++macro_iter_)
273 {
274 add_macro (macro_iter_->first.c_str (),
275 macro_iter_->second.c_str ());
276 }
277 }
278
279 void merge_macros (const basic_rules<CharT> &rules_)
280 {
281 const string_pair_deque &macros_ = rules_.macrodeque ();
282 typename string_pair_deque::const_iterator macro_iter_ =
283 macros_.begin ();
284 typename string_pair_deque::const_iterator macro_end_ =
285 macros_.end ();
286 typename string_set::const_iterator macro_dest_iter_;
287 typename string_set::const_iterator macro_dest_end_ = _macroset.end ();
288
289 for (; macro_iter_ != macro_end_; ++macro_iter_)
290 {
291 macro_dest_iter_ = _macroset.find (macro_iter_->first);
292
293 if (macro_dest_iter_ == macro_dest_end_)
294 {
295 add_macro (macro_iter_->first.c_str (),
296 macro_iter_->second.c_str ());
297 }
298 }
299 }
300
301 std::size_t add (const CharT *regex_, const std::size_t id_)
302 {
303 return add (string (regex_), id_);
304 }
305
306 std::size_t add (const CharT *regex_start_, const CharT *regex_end_,
307 const std::size_t id_)
308 {
309 return add (string (regex_start_, regex_end_), id_);
310 }
311
312 std::size_t add (const string &regex_, const std::size_t id_)
313 {
314 const std::size_t counter_ = next_unique_id ();
315
316 check_for_invalid_id (id_);
317 _regexes.front ().push_back (regex_);
318 _ids.front ().push_back (id_);
319 _unique_ids.front ().push_back (counter_);
320 _states.front ().push_back (0);
321 return counter_;
322 }
323
324 std::size_t add (const CharT *curr_state_, const CharT *regex_,
325 const CharT *new_state_)
326 {
327 return add (curr_state_, string (regex_), new_state_);
328 }
329
330 std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
331 const CharT *regex_end_, const CharT *new_state_)
332 {
333 return add (curr_state_, string (regex_start_, regex_end_),
334 new_state_);
335 }
336
337 std::size_t add (const CharT *curr_state_, const string &regex_,
338 const CharT *new_state_)
339 {
340 return add (curr_state_, regex_, 0, new_state_, false);
341 }
342
343 std::size_t add (const CharT *curr_state_, const CharT *regex_,
344 const std::size_t id_, const CharT *new_state_)
345 {
346 return add (curr_state_, string (regex_), id_, new_state_);
347 }
348
349 std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
350 const CharT *regex_end_, const std::size_t id_,
351 const CharT *new_state_)
352 {
353 return add (curr_state_, string (regex_start_, regex_end_), id_,
354 new_state_);
355 }
356
357 std::size_t add (const CharT *curr_state_, const string &regex_,
358 const std::size_t id_, const CharT *new_state_)
359 {
360 return add (curr_state_, regex_, id_, new_state_, true);
361 }
362
363 void add (const CharT *source_, const basic_rules<CharT> &rules_,
364 const CharT *dest_, const CharT *to_ = detail::strings<CharT>::dot ())
365 {
366 const bool star_ = *source_ == '*' && *(source_ + 1) == 0;
367 const bool dest_dot_ = *dest_ == '.' && *(dest_ + 1) == 0;
368 const bool to_dot_ = *to_ == '.' && *(to_ + 1) == 0;
369 std::size_t state_ = 0;
370 const string_deque_deque &all_regexes_ = rules_.regexes ();
371 const id_vector_deque &all_ids_ = rules_.ids ();
372 const id_vector_deque &all_unique_ids_ = rules_.unique_ids ();
373 const id_vector_deque &all_states_ = rules_.states ();
374 typename string_deque::const_iterator regex_iter_;
375 typename string_deque::const_iterator regex_end_;
376 typename id_vector::const_iterator id_iter_;
377 typename id_vector::const_iterator uid_iter_;
378 typename id_vector::const_iterator state_iter_;
379
380 if (star_)
381 {
382 typename string_deque_deque::const_iterator all_regexes_iter_ =
383 all_regexes_.begin ();
384 typename string_deque_deque::const_iterator all_regexes_end_ =
385 all_regexes_.end ();
386 typename id_vector_deque::const_iterator all_ids_iter_ =
387 all_ids_.begin ();
388 typename id_vector_deque::const_iterator all_uids_iter_ =
389 all_unique_ids_.begin ();
390 typename id_vector_deque::const_iterator all_states_iter_ =
391 all_states_.begin ();
392
393 for (; all_regexes_iter_ != all_regexes_end_;
394 ++state_, ++all_regexes_iter_, ++all_ids_iter_,
395 ++all_uids_iter_, ++all_states_iter_)
396 {
397 regex_iter_ = all_regexes_iter_->begin ();
398 regex_end_ = all_regexes_iter_->end ();
399 id_iter_ = all_ids_iter_->begin ();
400 uid_iter_ = all_uids_iter_->begin ();
401 state_iter_ = all_states_iter_->begin ();
402
403 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
404 ++uid_iter_, ++state_iter_)
405 {
406 // If ..._dot_ then lookup state name from rules_; otherwise
407 // pass name through.
408 add (dest_dot_ ? rules_.state (state_) : dest_, *regex_iter_,
409 *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true,
410 *uid_iter_);
411 }
412 }
413 }
414 else
415 {
416 const CharT *start_ = source_;
417 string state_name_;
418
419 while (*source_)
420 {
421 while (*source_ && *source_ != ',')
422 {
423 ++source_;
424 }
425
426 state_name_.assign (start_, source_);
427
428 if (*source_)
429 {
430 ++source_;
431 start_ = source_;
432 }
433
434 state_ = rules_.state (state_name_.c_str ());
435
436 if (state_ == npos)
437 {
438 std::basic_stringstream<CharT> ss_;
439 std::ostringstream os_;
440
441 os_ << "Unknown state name '";
442 source_ = state_name_.c_str ();
443
444 while (*source_)
445 {
446 os_ << ss_.narrow (*source_++, ' ');
447 }
448
449 os_ << "'.";
450 throw runtime_error (os_.str ());
451 }
452
453 regex_iter_ = all_regexes_[state_].begin ();
454 regex_end_ = all_regexes_[state_].end ();
455 id_iter_ = all_ids_[state_].begin ();
456 uid_iter_ = all_unique_ids_[state_].begin ();
457 state_iter_ = all_states_[state_].begin ();
458
459 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
460 ++uid_iter_, ++state_iter_)
461 {
462 // If ..._dot_ then lookup state name from rules_; otherwise
463 // pass name through.
464 add (dest_dot_ ? state_name_.c_str () : dest_, *regex_iter_,
465 *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true,
466 *uid_iter_);
467 }
468 }
469 }
470 }
471 /*
472 void add (const CharT *curr_state_, const basic_rules<CharT> &rules_)
473 {
474 const string_deque_deque &regexes_ = rules_.regexes ();
475 const id_vector_deque &ids_ = rules_.ids ();
476 const id_vector_deque &unique_ids_ = rules_.unique_ids ();
477 typename string_deque_deque::const_iterator state_regex_iter_ =
478 regexes_.begin ();
479 typename string_deque_deque::const_iterator state_regex_end_ =
480 regexes_.end ();
481 typename id_vector_deque::const_iterator state_id_iter_ =
482 ids_.begin ();
483 typename id_vector_deque::const_iterator state_uid_iter_ =
484 unique_ids_.begin ();
485 typename string_deque::const_iterator regex_iter_;
486 typename string_deque::const_iterator regex_end_;
487 typename id_vector::const_iterator id_iter_;
488 typename id_vector::const_iterator uid_iter_;
489
490 for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
491 {
492 regex_iter_ = state_regex_iter_->begin ();
493 regex_end_ = state_regex_iter_->end ();
494 id_iter_ = state_id_iter_->begin ();
495 uid_iter_ = state_uid_iter_->begin ();
496
497 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
498 ++uid_iter_)
499 {
500 add (curr_state_, *regex_iter_, *id_iter_, curr_state_, true,
501 *uid_iter_);
502 }
503 }
504 }
505 */
506 const string_size_t_map &statemap () const
507 {
508 return _statemap;
509 }
510
511 const string_pair_deque &macrodeque () const
512 {
513 return _macrodeque;
514 }
515
516 const string_deque_deque &regexes () const
517 {
518 return _regexes;
519 }
520
521 const id_vector_deque &ids () const
522 {
523 return _ids;
524 }
525
526 const id_vector_deque &unique_ids () const
527 {
528 return _unique_ids;
529 }
530
531 const id_vector_deque &states () const
532 {
533 return _states;
534 }
535
536 bool empty () const
537 {
538 typename string_deque_deque::const_iterator iter_ = _regexes.begin ();
539 typename string_deque_deque::const_iterator end_ = _regexes.end ();
540 bool empty_ = true;
541
542 for (; iter_ != end_; ++iter_)
543 {
544 if (!iter_->empty ())
545 {
546 empty_ = false;
547 break;
548 }
549 }
550
551 return empty_;
552 }
553
554 static const CharT *initial ()
555 {
556 return detail::strings<CharT>::initial ();
557 }
558
559 static const CharT *all_states ()
560 {
561 return detail::strings<CharT>::all_states ();
562 }
563
564 static const CharT *dot ()
565 {
566 return detail::strings<CharT>::dot ();
567 }
568
569 private:
570 string_size_t_map _statemap;
571 string_pair_deque _macrodeque;
572 string_set _macroset;
573 string_deque_deque _regexes;
574 id_vector_deque _ids;
575 id_vector_deque _unique_ids;
576 id_vector_deque _states;
577 regex_flags _flags;
578 std::size_t _counter;
579 std::size_t (*_counter_ptr) ();
580 std::locale _locale;
581 string_deque _lexer_state_names;
582
583 std::size_t add (const CharT *curr_state_, const string &regex_,
584 const std::size_t id_, const CharT *new_state_, const bool check_,
585 const std::size_t uid_ = npos)
586 {
587 const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
588 const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
589
590 if (check_)
591 {
592 check_for_invalid_id (id_);
593 }
594
595 if (!dot_)
596 {
597 validate (new_state_);
598 }
599
600 std::size_t new_ = string::npos;
601 typename string_size_t_map::const_iterator iter_;
602 typename string_size_t_map::const_iterator end_ = _statemap.end ();
603 id_vector states_;
604
605 if (!dot_)
606 {
607 iter_ = _statemap.find (new_state_);
608
609 if (iter_ == end_)
610 {
611 std::basic_stringstream<CharT> ss_;
612 std::ostringstream os_;
613
614 os_ << "Unknown state name '";
615
616 while (*new_state_)
617 {
618 os_ << ss_.narrow (*new_state_++, ' ');
619 }
620
621 os_ << "'.";
622 throw runtime_error (os_.str ());
623 }
624
625 new_ = iter_->second;
626 }
627
628 if (star_)
629 {
630 const std::size_t size_ = _statemap.size ();
631
632 for (std::size_t i_ = 0; i_ < size_; ++i_)
633 {
634 states_.push_back (i_);
635 }
636 }
637 else
638 {
639 const CharT *start_ = curr_state_;
640 string state_;
641
642 while (*curr_state_)
643 {
644 while (*curr_state_ && *curr_state_ != ',')
645 {
646 ++curr_state_;
647 }
648
649 state_.assign (start_, curr_state_);
650
651 if (*curr_state_)
652 {
653 ++curr_state_;
654 start_ = curr_state_;
655 }
656
657 validate (state_.c_str ());
658 iter_ = _statemap.find (state_.c_str ());
659
660 if (iter_ == end_)
661 {
662 std::basic_stringstream<CharT> ss_;
663 std::ostringstream os_;
664
665 os_ << "Unknown state name '";
666 curr_state_ = state_.c_str ();
667
668 while (*curr_state_)
669 {
670 os_ << ss_.narrow (*curr_state_++, ' ');
671 }
672
673 os_ << "'.";
674 throw runtime_error (os_.str ());
675 }
676
677 states_.push_back (iter_->second);
678 }
679 }
680
681 std::size_t first_counter_ = npos;
682
683 for (std::size_t i_ = 0, size_ = states_.size (); i_ < size_; ++i_)
684 {
685 const std::size_t curr_ = states_[i_];
686
687 _regexes[curr_].push_back (regex_);
688 _ids[curr_].push_back (id_);
689
690 if (uid_ == npos)
691 {
692 const std::size_t counter_ = next_unique_id ();
693
694 if (first_counter_ == npos)
695 {
696 first_counter_ = counter_;
697 }
698
699 _unique_ids[curr_].push_back (counter_);
700 }
701 else
702 {
703 if (first_counter_ == npos)
704 {
705 first_counter_ = uid_;
706 }
707
708 _unique_ids[curr_].push_back (uid_);
709 }
710
711 _states[curr_].push_back (dot_ ? curr_ : new_);
712 }
713
714 return first_counter_;
715 }
716
717 void validate (const CharT *name_) const
718 {
719 const CharT *start_ = name_;
720
721 if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') &&
722 !(*name_ >= 'a' && *name_ <= 'z'))
723 {
724 std::basic_stringstream<CharT> ss_;
725 std::ostringstream os_;
726
727 os_ << "Invalid name '";
728
729 while (*name_)
730 {
731 os_ << ss_.narrow (*name_++, ' ');
732 }
733
734 os_ << "'.";
735 throw runtime_error (os_.str ());
736 }
737 else if (*name_)
738 {
739 ++name_;
740 }
741
742 while (*name_)
743 {
744 if (*name_ != '_' && *name_ != '-' &&
745 !(*name_ >= 'A' && *name_ <= 'Z') &&
746 !(*name_ >= 'a' && *name_ <= 'z') &&
747 !(*name_ >= '0' && *name_ <= '9'))
748 {
749 std::basic_stringstream<CharT> ss_;
750 std::ostringstream os_;
751
752 os_ << "Invalid name '";
753 name_ = start_;
754
755 while (*name_)
756 {
757 os_ << ss_.narrow (*name_++, ' ');
758 }
759
760 os_ << "'.";
761 throw runtime_error (os_.str ());
762 }
763
764 ++name_;
765 }
766
767 if (name_ - start_ > static_cast<std::ptrdiff_t>(max_macro_len))
768 {
769 std::basic_stringstream<CharT> ss_;
770 std::ostringstream os_;
771
772 os_ << "Name '";
773 name_ = start_;
774
775 while (*name_)
776 {
777 os_ << ss_.narrow (*name_++, ' ');
778 }
779
780 os_ << "' too long.";
781 throw runtime_error (os_.str ());
782 }
783 }
784
785 void check_for_invalid_id (const std::size_t id_) const
786 {
787 switch (id_)
788 {
789 case 0:
790 throw runtime_error ("id 0 is reserved for EOF.");
791 case npos:
792 throw runtime_error ("id npos is reserved for the "
793 "UNKNOWN token.");
794 default:
795 // OK
796 break;
797 }
798 }
799 };
800
801 typedef basic_rules<char> rules;
802 typedef basic_rules<wchar_t> wrules;
803 }
804 }
805
806 #endif