2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_RULES_HPP
7 #define BOOST_LEXER_RULES_HPP
13 #include "runtime_error.hpp"
26 // return name of initial state
27 template <typename CharT>
33 static const char *initial ()
38 static const char *dot ()
43 static const char *all_states ()
48 static const char *char_name ()
53 static const char *char_prefix ()
60 struct strings<wchar_t>
62 static const wchar_t *initial ()
67 static const wchar_t *dot ()
72 static const wchar_t *all_states ()
77 static const char *char_name ()
82 static const char *char_prefix ()
89 template<typename CharT>
93 typedef std::vector<std::size_t> id_vector;
94 typedef std::deque<id_vector> id_vector_deque;
95 typedef std::basic_string<CharT> string;
96 typedef std::deque<string> string_deque;
97 typedef std::deque<string_deque> string_deque_deque;
98 typedef std::set<string> string_set;
99 typedef std::pair<string, string> string_pair;
100 typedef std::deque<string_pair> string_pair_deque;
101 typedef std::map<string, std::size_t> string_size_t_map;
102 typedef std::pair<string, std::size_t> string_size_t_pair;
104 basic_rules (const regex_flags flags_ = dot_not_newline,
105 std::size_t (*counter_ptr_) () = 0) :
108 _counter_ptr (counter_ptr_)
110 add_state (initial ());
116 _macrodeque.clear ();
120 _unique_ids.clear ();
122 _flags = dot_not_newline;
123 _locale = std::locale ();
124 add_state (initial ());
127 void clear (const CharT *state_name_)
129 std::size_t state_ = state (state_name_);
133 _regexes[state_].clear ();
134 _ids[state_].clear ();
135 _unique_ids[state_].clear ();
136 _states[state_].clear ();
140 void flags (const regex_flags flags_)
145 regex_flags flags () const
150 std::size_t next_unique_id ()
152 return _counter_ptr ? _counter_ptr () : _counter++;
155 std::locale imbue (std::locale &locale_)
157 std::locale loc_ = _locale;
163 const std::locale &locale () const
168 std::size_t state (const CharT *name_) const
170 std::size_t state_ = npos;
171 typename string_size_t_map::const_iterator iter_ =
172 _statemap.find (name_);
174 if (iter_ != _statemap.end ())
176 state_ = iter_->second;
182 const CharT *state (const std::size_t index_) const
190 const std::size_t vec_index_ = index_ - 1;
192 if (vec_index_ > _lexer_state_names.size () - 1)
198 return _lexer_state_names[vec_index_].c_str ();
203 std::size_t add_state (const CharT *name_)
207 if (_statemap.insert (string_size_t_pair (name_,
208 _statemap.size ())).second)
210 _regexes.push_back (string_deque ());
211 _ids.push_back (id_vector ());
212 _unique_ids.push_back (id_vector ());
213 _states.push_back (id_vector ());
215 if (string (name_) != initial ())
217 _lexer_state_names.push_back (name_);
221 // Initial is not stored, so no need to - 1.
222 return _lexer_state_names.size ();
225 void add_macro (const CharT *name_, const CharT *regex_)
227 add_macro (name_, string (regex_));
230 void add_macro (const CharT *name_, const CharT *regex_start_,
231 const CharT *regex_end_)
233 add_macro (name_, string (regex_start_, regex_end_));
236 void add_macro (const CharT *name_, const string ®ex_)
240 typename string_set::const_iterator iter_ = _macroset.find (name_);
242 if (iter_ == _macroset.end ())
244 _macrodeque.push_back (string_pair (name_, regex_));
245 _macroset.insert (name_);
249 std::basic_stringstream<CharT> ss_;
250 std::ostringstream os_;
252 os_ << "Attempt to redefine MACRO '";
256 os_ << ss_.narrow (*name_++, static_cast<CharT> (' '));
260 throw runtime_error (os_.str ());
264 void add_macros (const basic_rules<CharT> &rules_)
266 const string_pair_deque ¯os_ = rules_.macrodeque ();
267 typename string_pair_deque::const_iterator macro_iter_ =
269 typename string_pair_deque::const_iterator macro_end_ =
272 for (; macro_iter_ != macro_end_; ++macro_iter_)
274 add_macro (macro_iter_->first.c_str (),
275 macro_iter_->second.c_str ());
279 void merge_macros (const basic_rules<CharT> &rules_)
281 const string_pair_deque ¯os_ = rules_.macrodeque ();
282 typename string_pair_deque::const_iterator macro_iter_ =
284 typename string_pair_deque::const_iterator macro_end_ =
286 typename string_set::const_iterator macro_dest_iter_;
287 typename string_set::const_iterator macro_dest_end_ = _macroset.end ();
289 for (; macro_iter_ != macro_end_; ++macro_iter_)
291 macro_dest_iter_ = _macroset.find (macro_iter_->first);
293 if (macro_dest_iter_ == macro_dest_end_)
295 add_macro (macro_iter_->first.c_str (),
296 macro_iter_->second.c_str ());
301 std::size_t add (const CharT *regex_, const std::size_t id_)
303 return add (string (regex_), id_);
306 std::size_t add (const CharT *regex_start_, const CharT *regex_end_,
307 const std::size_t id_)
309 return add (string (regex_start_, regex_end_), id_);
312 std::size_t add (const string ®ex_, const std::size_t id_)
314 const std::size_t counter_ = next_unique_id ();
316 check_for_invalid_id (id_);
317 _regexes.front ().push_back (regex_);
318 _ids.front ().push_back (id_);
319 _unique_ids.front ().push_back (counter_);
320 _states.front ().push_back (0);
324 std::size_t add (const CharT *curr_state_, const CharT *regex_,
325 const CharT *new_state_)
327 return add (curr_state_, string (regex_), new_state_);
330 std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
331 const CharT *regex_end_, const CharT *new_state_)
333 return add (curr_state_, string (regex_start_, regex_end_),
337 std::size_t add (const CharT *curr_state_, const string ®ex_,
338 const CharT *new_state_)
340 return add (curr_state_, regex_, 0, new_state_, false);
343 std::size_t add (const CharT *curr_state_, const CharT *regex_,
344 const std::size_t id_, const CharT *new_state_)
346 return add (curr_state_, string (regex_), id_, new_state_);
349 std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
350 const CharT *regex_end_, const std::size_t id_,
351 const CharT *new_state_)
353 return add (curr_state_, string (regex_start_, regex_end_), id_,
357 std::size_t add (const CharT *curr_state_, const string ®ex_,
358 const std::size_t id_, const CharT *new_state_)
360 return add (curr_state_, regex_, id_, new_state_, true);
363 void add (const CharT *source_, const basic_rules<CharT> &rules_,
364 const CharT *dest_, const CharT *to_ = detail::strings<CharT>::dot ())
366 const bool star_ = *source_ == '*' && *(source_ + 1) == 0;
367 const bool dest_dot_ = *dest_ == '.' && *(dest_ + 1) == 0;
368 const bool to_dot_ = *to_ == '.' && *(to_ + 1) == 0;
369 std::size_t state_ = 0;
370 const string_deque_deque &all_regexes_ = rules_.regexes ();
371 const id_vector_deque &all_ids_ = rules_.ids ();
372 const id_vector_deque &all_unique_ids_ = rules_.unique_ids ();
373 const id_vector_deque &all_states_ = rules_.states ();
374 typename string_deque::const_iterator regex_iter_;
375 typename string_deque::const_iterator regex_end_;
376 typename id_vector::const_iterator id_iter_;
377 typename id_vector::const_iterator uid_iter_;
378 typename id_vector::const_iterator state_iter_;
382 typename string_deque_deque::const_iterator all_regexes_iter_ =
383 all_regexes_.begin ();
384 typename string_deque_deque::const_iterator all_regexes_end_ =
386 typename id_vector_deque::const_iterator all_ids_iter_ =
388 typename id_vector_deque::const_iterator all_uids_iter_ =
389 all_unique_ids_.begin ();
390 typename id_vector_deque::const_iterator all_states_iter_ =
391 all_states_.begin ();
393 for (; all_regexes_iter_ != all_regexes_end_;
394 ++state_, ++all_regexes_iter_, ++all_ids_iter_,
395 ++all_uids_iter_, ++all_states_iter_)
397 regex_iter_ = all_regexes_iter_->begin ();
398 regex_end_ = all_regexes_iter_->end ();
399 id_iter_ = all_ids_iter_->begin ();
400 uid_iter_ = all_uids_iter_->begin ();
401 state_iter_ = all_states_iter_->begin ();
403 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
404 ++uid_iter_, ++state_iter_)
406 // If ..._dot_ then lookup state name from rules_; otherwise
407 // pass name through.
408 add (dest_dot_ ? rules_.state (state_) : dest_, *regex_iter_,
409 *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true,
416 const CharT *start_ = source_;
421 while (*source_ && *source_ != ',')
426 state_name_.assign (start_, source_);
434 state_ = rules_.state (state_name_.c_str ());
438 std::basic_stringstream<CharT> ss_;
439 std::ostringstream os_;
441 os_ << "Unknown state name '";
442 source_ = state_name_.c_str ();
446 os_ << ss_.narrow (*source_++, ' ');
450 throw runtime_error (os_.str ());
453 regex_iter_ = all_regexes_[state_].begin ();
454 regex_end_ = all_regexes_[state_].end ();
455 id_iter_ = all_ids_[state_].begin ();
456 uid_iter_ = all_unique_ids_[state_].begin ();
457 state_iter_ = all_states_[state_].begin ();
459 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
460 ++uid_iter_, ++state_iter_)
462 // If ..._dot_ then lookup state name from rules_; otherwise
463 // pass name through.
464 add (dest_dot_ ? state_name_.c_str () : dest_, *regex_iter_,
465 *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true,
472 void add (const CharT *curr_state_, const basic_rules<CharT> &rules_)
474 const string_deque_deque ®exes_ = rules_.regexes ();
475 const id_vector_deque &ids_ = rules_.ids ();
476 const id_vector_deque &unique_ids_ = rules_.unique_ids ();
477 typename string_deque_deque::const_iterator state_regex_iter_ =
479 typename string_deque_deque::const_iterator state_regex_end_ =
481 typename id_vector_deque::const_iterator state_id_iter_ =
483 typename id_vector_deque::const_iterator state_uid_iter_ =
484 unique_ids_.begin ();
485 typename string_deque::const_iterator regex_iter_;
486 typename string_deque::const_iterator regex_end_;
487 typename id_vector::const_iterator id_iter_;
488 typename id_vector::const_iterator uid_iter_;
490 for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
492 regex_iter_ = state_regex_iter_->begin ();
493 regex_end_ = state_regex_iter_->end ();
494 id_iter_ = state_id_iter_->begin ();
495 uid_iter_ = state_uid_iter_->begin ();
497 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
500 add (curr_state_, *regex_iter_, *id_iter_, curr_state_, true,
506 const string_size_t_map &statemap () const
511 const string_pair_deque ¯odeque () const
516 const string_deque_deque ®exes () const
521 const id_vector_deque &ids () const
526 const id_vector_deque &unique_ids () const
531 const id_vector_deque &states () const
538 typename string_deque_deque::const_iterator iter_ = _regexes.begin ();
539 typename string_deque_deque::const_iterator end_ = _regexes.end ();
542 for (; iter_ != end_; ++iter_)
544 if (!iter_->empty ())
554 static const CharT *initial ()
556 return detail::strings<CharT>::initial ();
559 static const CharT *all_states ()
561 return detail::strings<CharT>::all_states ();
564 static const CharT *dot ()
566 return detail::strings<CharT>::dot ();
570 string_size_t_map _statemap;
571 string_pair_deque _macrodeque;
572 string_set _macroset;
573 string_deque_deque _regexes;
574 id_vector_deque _ids;
575 id_vector_deque _unique_ids;
576 id_vector_deque _states;
578 std::size_t _counter;
579 std::size_t (*_counter_ptr) ();
581 string_deque _lexer_state_names;
583 std::size_t add (const CharT *curr_state_, const string ®ex_,
584 const std::size_t id_, const CharT *new_state_, const bool check_,
585 const std::size_t uid_ = npos)
587 const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
588 const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
592 check_for_invalid_id (id_);
597 validate (new_state_);
600 std::size_t new_ = string::npos;
601 typename string_size_t_map::const_iterator iter_;
602 typename string_size_t_map::const_iterator end_ = _statemap.end ();
607 iter_ = _statemap.find (new_state_);
611 std::basic_stringstream<CharT> ss_;
612 std::ostringstream os_;
614 os_ << "Unknown state name '";
618 os_ << ss_.narrow (*new_state_++, ' ');
622 throw runtime_error (os_.str ());
625 new_ = iter_->second;
630 const std::size_t size_ = _statemap.size ();
632 for (std::size_t i_ = 0; i_ < size_; ++i_)
634 states_.push_back (i_);
639 const CharT *start_ = curr_state_;
644 while (*curr_state_ && *curr_state_ != ',')
649 state_.assign (start_, curr_state_);
654 start_ = curr_state_;
657 validate (state_.c_str ());
658 iter_ = _statemap.find (state_.c_str ());
662 std::basic_stringstream<CharT> ss_;
663 std::ostringstream os_;
665 os_ << "Unknown state name '";
666 curr_state_ = state_.c_str ();
670 os_ << ss_.narrow (*curr_state_++, ' ');
674 throw runtime_error (os_.str ());
677 states_.push_back (iter_->second);
681 std::size_t first_counter_ = npos;
683 for (std::size_t i_ = 0, size_ = states_.size (); i_ < size_; ++i_)
685 const std::size_t curr_ = states_[i_];
687 _regexes[curr_].push_back (regex_);
688 _ids[curr_].push_back (id_);
692 const std::size_t counter_ = next_unique_id ();
694 if (first_counter_ == npos)
696 first_counter_ = counter_;
699 _unique_ids[curr_].push_back (counter_);
703 if (first_counter_ == npos)
705 first_counter_ = uid_;
708 _unique_ids[curr_].push_back (uid_);
711 _states[curr_].push_back (dot_ ? curr_ : new_);
714 return first_counter_;
717 void validate (const CharT *name_) const
719 const CharT *start_ = name_;
721 if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') &&
722 !(*name_ >= 'a' && *name_ <= 'z'))
724 std::basic_stringstream<CharT> ss_;
725 std::ostringstream os_;
727 os_ << "Invalid name '";
731 os_ << ss_.narrow (*name_++, ' ');
735 throw runtime_error (os_.str ());
744 if (*name_ != '_' && *name_ != '-' &&
745 !(*name_ >= 'A' && *name_ <= 'Z') &&
746 !(*name_ >= 'a' && *name_ <= 'z') &&
747 !(*name_ >= '0' && *name_ <= '9'))
749 std::basic_stringstream<CharT> ss_;
750 std::ostringstream os_;
752 os_ << "Invalid name '";
757 os_ << ss_.narrow (*name_++, ' ');
761 throw runtime_error (os_.str ());
767 if (name_ - start_ > static_cast<std::ptrdiff_t>(max_macro_len))
769 std::basic_stringstream<CharT> ss_;
770 std::ostringstream os_;
777 os_ << ss_.narrow (*name_++, ' ');
780 os_ << "' too long.";
781 throw runtime_error (os_.str ());
785 void check_for_invalid_id (const std::size_t id_) const
790 throw runtime_error ("id 0 is reserved for EOF.");
792 throw runtime_error ("id npos is reserved for the "
801 typedef basic_rules<char> rules;
802 typedef basic_rules<wchar_t> wrules;