]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/spirit/home/support/detail/lexer/generator.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / spirit / home / support / detail / lexer / generator.hpp
1 // generator.hpp
2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_GENERATOR_HPP
7 #define BOOST_LEXER_GENERATOR_HPP
8
9 #include "char_traits.hpp"
10 // memcmp()
11 #include <cstring>
12 #include "partition/charset.hpp"
13 #include "partition/equivset.hpp"
14 #include <memory>
15 #include <limits>
16 #include "parser/tree/node.hpp"
17 #include "parser/parser.hpp"
18 #include "containers/ptr_list.hpp"
19 #include <boost/move/unique_ptr.hpp>
20 #include "rules.hpp"
21 #include "state_machine.hpp"
22
23 namespace boost
24 {
25 namespace lexer
26 {
27 template<typename CharT, typename Traits = char_traits<CharT> >
28 class basic_generator
29 {
30 public:
31 typedef typename detail::internals::size_t_vector size_t_vector;
32 typedef basic_rules<CharT> rules;
33
34 static void build (const rules &rules_,
35 basic_state_machine<CharT> &state_machine_)
36 {
37 std::size_t index_ = 0;
38 std::size_t size_ = rules_.statemap ().size ();
39 node_ptr_vector node_ptr_vector_;
40 detail::internals &internals_ = const_cast<detail::internals &>
41 (state_machine_.data ());
42 bool seen_BOL_assertion_ = false;
43 bool seen_EOL_assertion_ = false;
44
45 state_machine_.clear ();
46
47 for (; index_ < size_; ++index_)
48 {
49 internals_._lookup->push_back (static_cast<size_t_vector *>(0));
50 internals_._lookup->back () = new size_t_vector;
51 internals_._dfa_alphabet.push_back (0);
52 internals_._dfa->push_back (static_cast<size_t_vector *>(0));
53 internals_._dfa->back () = new size_t_vector;
54 }
55
56 for (index_ = 0, size_ = internals_._lookup->size ();
57 index_ < size_; ++index_)
58 {
59 internals_._lookup[index_]->resize (sizeof (CharT) == 1 ?
60 num_chars : num_wchar_ts, dead_state_index);
61
62 if (!rules_.regexes ()[index_].empty ())
63 {
64 // vector mapping token indexes to partitioned token index sets
65 index_set_vector set_mapping_;
66 // syntax tree
67 detail::node *root_ = build_tree (rules_, index_,
68 node_ptr_vector_, internals_, set_mapping_);
69
70 build_dfa (root_, set_mapping_,
71 internals_._dfa_alphabet[index_],
72 *internals_._dfa[index_]);
73
74 if (internals_._seen_BOL_assertion)
75 {
76 seen_BOL_assertion_ = true;
77 }
78
79 if (internals_._seen_EOL_assertion)
80 {
81 seen_EOL_assertion_ = true;
82 }
83
84 internals_._seen_BOL_assertion = false;
85 internals_._seen_EOL_assertion = false;
86 }
87 }
88
89 internals_._seen_BOL_assertion = seen_BOL_assertion_;
90 internals_._seen_EOL_assertion = seen_EOL_assertion_;
91 }
92
93 static void minimise (basic_state_machine<CharT> &state_machine_)
94 {
95 detail::internals &internals_ = const_cast<detail::internals &>
96 (state_machine_.data ());
97 const std::size_t machines_ = internals_._dfa->size ();
98
99 for (std::size_t i_ = 0; i_ < machines_; ++i_)
100 {
101 const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_];
102 size_t_vector *dfa_ = internals_._dfa[i_];
103
104 if (dfa_alphabet_ != 0)
105 {
106 std::size_t size_ = 0;
107
108 do
109 {
110 size_ = dfa_->size ();
111 minimise_dfa (dfa_alphabet_, *dfa_, size_);
112 } while (dfa_->size () != size_);
113 }
114 }
115 }
116
117 protected:
118 typedef detail::basic_charset<CharT> charset;
119 typedef detail::ptr_list<charset> charset_list;
120 typedef boost::movelib::unique_ptr<charset> charset_ptr;
121 typedef detail::equivset equivset;
122 typedef detail::ptr_list<equivset> equivset_list;
123 typedef boost::movelib::unique_ptr<equivset> equivset_ptr;
124 typedef typename charset::index_set index_set;
125 typedef std::vector<index_set> index_set_vector;
126 typedef detail::basic_parser<CharT> parser;
127 typedef typename parser::node_ptr_vector node_ptr_vector;
128 typedef std::set<const detail::node *> node_set;
129 typedef detail::ptr_vector<node_set> node_set_vector;
130 typedef std::vector<const detail::node *> node_vector;
131 typedef detail::ptr_vector<node_vector> node_vector_vector;
132 typedef typename parser::string string;
133 typedef std::pair<string, string> string_pair;
134 typedef typename parser::tokeniser::string_token string_token;
135 typedef std::deque<string_pair> macro_deque;
136 typedef std::pair<string, const detail::node *> macro_pair;
137 typedef typename parser::macro_map::iterator macro_iter;
138 typedef std::pair<macro_iter, bool> macro_iter_pair;
139 typedef typename parser::tokeniser::token_map token_map;
140
141 static detail::node *build_tree (const rules &rules_,
142 const std::size_t state_, node_ptr_vector &node_ptr_vector_,
143 detail::internals &internals_, index_set_vector &set_mapping_)
144 {
145 size_t_vector *lookup_ = internals_._lookup[state_];
146 const typename rules::string_deque_deque &regexes_ =
147 rules_.regexes ();
148 const typename rules::id_vector_deque &ids_ = rules_.ids ();
149 const typename rules::id_vector_deque &unique_ids_ =
150 rules_.unique_ids ();
151 const typename rules::id_vector_deque &states_ = rules_.states ();
152 typename rules::string_deque::const_iterator regex_iter_ =
153 regexes_[state_].begin ();
154 typename rules::string_deque::const_iterator regex_iter_end_ =
155 regexes_[state_].end ();
156 typename rules::id_vector::const_iterator ids_iter_ =
157 ids_[state_].begin ();
158 typename rules::id_vector::const_iterator unique_ids_iter_ =
159 unique_ids_[state_].begin ();
160 typename rules::id_vector::const_iterator states_iter_ =
161 states_[state_].begin ();
162 const typename rules::string &regex_ = *regex_iter_;
163 // map of regex charset tokens (strings) to index
164 token_map token_map_;
165 const typename rules::string_pair_deque &macrodeque_ =
166 rules_.macrodeque ();
167 typename parser::macro_map macromap_;
168 typename detail::node::node_vector tree_vector_;
169
170 build_macros (token_map_, macrodeque_, macromap_,
171 rules_.flags (), rules_.locale (), node_ptr_vector_,
172 internals_._seen_BOL_assertion, internals_._seen_EOL_assertion);
173
174 detail::node *root_ = parser::parse (regex_.c_str (),
175 regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
176 *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
177 macromap_, token_map_, internals_._seen_BOL_assertion,
178 internals_._seen_EOL_assertion);
179
180 ++regex_iter_;
181 ++ids_iter_;
182 ++unique_ids_iter_;
183 ++states_iter_;
184 tree_vector_.push_back (root_);
185
186 // build syntax trees
187 while (regex_iter_ != regex_iter_end_)
188 {
189 // re-declare var, otherwise we perform an assignment..!
190 const typename rules::string &regex2_ = *regex_iter_;
191
192 root_ = parser::parse (regex2_.c_str (),
193 regex2_.c_str () + regex2_.size (), *ids_iter_,
194 *unique_ids_iter_, *states_iter_, rules_.flags (),
195 rules_.locale (), node_ptr_vector_, macromap_, token_map_,
196 internals_._seen_BOL_assertion,
197 internals_._seen_EOL_assertion);
198 tree_vector_.push_back (root_);
199 ++regex_iter_;
200 ++ids_iter_;
201 ++unique_ids_iter_;
202 ++states_iter_;
203 }
204
205 if (internals_._seen_BOL_assertion)
206 {
207 // Fixup BOLs
208 typename detail::node::node_vector::iterator iter_ =
209 tree_vector_.begin ();
210 typename detail::node::node_vector::iterator end_ =
211 tree_vector_.end ();
212
213 for (; iter_ != end_; ++iter_)
214 {
215 fixup_bol (*iter_, node_ptr_vector_);
216 }
217 }
218
219 // join trees
220 {
221 typename detail::node::node_vector::iterator iter_ =
222 tree_vector_.begin ();
223 typename detail::node::node_vector::iterator end_ =
224 tree_vector_.end ();
225
226 if (iter_ != end_)
227 {
228 root_ = *iter_;
229 ++iter_;
230 }
231
232 for (; iter_ != end_; ++iter_)
233 {
234 node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0));
235 node_ptr_vector_->back () = new detail::selection_node
236 (root_, *iter_);
237 root_ = node_ptr_vector_->back ();
238 }
239 }
240
241 // partitioned token list
242 charset_list token_list_;
243
244 set_mapping_.resize (token_map_.size ());
245 partition_tokens (token_map_, token_list_);
246
247 typename charset_list::list::const_iterator iter_ =
248 token_list_->begin ();
249 typename charset_list::list::const_iterator end_ =
250 token_list_->end ();
251 std::size_t index_ = 0;
252
253 for (; iter_ != end_; ++iter_, ++index_)
254 {
255 const charset *cs_ = *iter_;
256 typename charset::index_set::const_iterator set_iter_ =
257 cs_->_index_set.begin ();
258 typename charset::index_set::const_iterator set_end_ =
259 cs_->_index_set.end ();
260
261 fill_lookup (cs_->_token, lookup_, index_);
262
263 for (; set_iter_ != set_end_; ++set_iter_)
264 {
265 set_mapping_[*set_iter_].insert (index_);
266 }
267 }
268
269 internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset;
270 return root_;
271 }
272
273 static void build_macros (token_map &token_map_,
274 const macro_deque &macrodeque_,
275 typename parser::macro_map &macromap_, const regex_flags flags_,
276 const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
277 bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
278 {
279 for (typename macro_deque::const_iterator iter_ =
280 macrodeque_.begin (), end_ = macrodeque_.end ();
281 iter_ != end_; ++iter_)
282 {
283 const typename rules::string &name_ = iter_->first;
284 const typename rules::string &regex_ = iter_->second;
285 detail::node *node_ = parser::parse (regex_.c_str (),
286 regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
287 locale_, node_ptr_vector_, macromap_, token_map_,
288 seen_BOL_assertion_, seen_EOL_assertion_);
289 macro_iter_pair map_iter_ = macromap_.
290 insert (macro_pair (name_, static_cast<const detail::node *>
291 (0)));
292
293 map_iter_.first->second = node_;
294 }
295 }
296
297 static void build_dfa (detail::node *root_,
298 const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
299 size_t_vector &dfa_)
300 {
301 typename detail::node::node_vector *followpos_ =
302 &root_->firstpos ();
303 node_set_vector seen_sets_;
304 node_vector_vector seen_vectors_;
305 size_t_vector hash_vector_;
306
307 // 'jam' state
308 dfa_.resize (dfa_alphabet_, 0);
309 closure (followpos_, seen_sets_, seen_vectors_,
310 hash_vector_, dfa_alphabet_, dfa_);
311
312 std::size_t *ptr_ = 0;
313
314 for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
315 {
316 equivset_list equiv_list_;
317
318 build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
319
320 for (typename equivset_list::list::const_iterator iter_ =
321 equiv_list_->begin (), end_ = equiv_list_->end ();
322 iter_ != end_; ++iter_)
323 {
324 equivset *equivset_ = *iter_;
325 const std::size_t transition_ = closure
326 (&equivset_->_followpos, seen_sets_, seen_vectors_,
327 hash_vector_, dfa_alphabet_, dfa_);
328
329 if (transition_ != npos)
330 {
331 ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
332
333 // Prune abstemious transitions from end states.
334 if (*ptr_ && !equivset_->_greedy) continue;
335
336 for (typename detail::equivset::index_vector::const_iterator
337 equiv_iter_ = equivset_->_index_vector.begin (),
338 equiv_end_ = equivset_->_index_vector.end ();
339 equiv_iter_ != equiv_end_; ++equiv_iter_)
340 {
341 const std::size_t equiv_index_ = *equiv_iter_;
342
343 if (equiv_index_ == bol_token)
344 {
345 if (ptr_[eol_index] == 0)
346 {
347 ptr_[bol_index] = transition_;
348 }
349 }
350 else if (equiv_index_ == eol_token)
351 {
352 if (ptr_[bol_index] == 0)
353 {
354 ptr_[eol_index] = transition_;
355 }
356 }
357 else
358 {
359 ptr_[equiv_index_ + dfa_offset] = transition_;
360 }
361 }
362 }
363 }
364 }
365 }
366
367 static std::size_t closure (typename detail::node::node_vector *followpos_,
368 node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
369 size_t_vector &hash_vector_, const std::size_t size_,
370 size_t_vector &dfa_)
371 {
372 bool end_state_ = false;
373 std::size_t id_ = 0;
374 std::size_t unique_id_ = npos;
375 std::size_t state_ = 0;
376 std::size_t hash_ = 0;
377
378 if (followpos_->empty ()) return npos;
379
380 std::size_t index_ = 0;
381 boost::movelib::unique_ptr<node_set> set_ptr_ (new node_set);
382 boost::movelib::unique_ptr<node_vector> vector_ptr_ (new node_vector);
383
384 for (typename detail::node::node_vector::const_iterator iter_ =
385 followpos_->begin (), end_ = followpos_->end ();
386 iter_ != end_; ++iter_)
387 {
388 closure_ex (*iter_, end_state_, id_, unique_id_, state_,
389 set_ptr_.get (), vector_ptr_.get (), hash_);
390 }
391
392 bool found_ = false;
393 typename size_t_vector::const_iterator hash_iter_ =
394 hash_vector_.begin ();
395 typename size_t_vector::const_iterator hash_end_ =
396 hash_vector_.end ();
397 typename node_set_vector::vector::const_iterator set_iter_ =
398 seen_sets_->begin ();
399
400 for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
401 {
402 found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
403 ++index_;
404
405 if (found_) break;
406 }
407
408 if (!found_)
409 {
410 seen_sets_->push_back (static_cast<node_set *>(0));
411 seen_sets_->back () = set_ptr_.release ();
412 seen_vectors_->push_back (static_cast<node_vector *>(0));
413 seen_vectors_->back () = vector_ptr_.release ();
414 hash_vector_.push_back (hash_);
415 // State 0 is the jam state...
416 index_ = seen_sets_->size ();
417
418 const std::size_t old_size_ = dfa_.size ();
419
420 dfa_.resize (old_size_ + size_, 0);
421
422 if (end_state_)
423 {
424 dfa_[old_size_] |= end_state;
425 dfa_[old_size_ + id_index] = id_;
426 dfa_[old_size_ + unique_id_index] = unique_id_;
427 dfa_[old_size_ + state_index] = state_;
428 }
429 }
430
431 return index_;
432 }
433
434 static void closure_ex (detail::node *node_, bool &end_state_,
435 std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
436 node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
437 {
438 const bool temp_end_state_ = node_->end_state ();
439
440 if (temp_end_state_)
441 {
442 if (!end_state_)
443 {
444 end_state_ = true;
445 id_ = node_->id ();
446 unique_id_ = node_->unique_id ();
447 state_ = node_->lexer_state ();
448 }
449 }
450
451 if (set_ptr_->insert (node_).second)
452 {
453 vector_ptr_->push_back (node_);
454 hash_ += reinterpret_cast<std::size_t> (node_);
455 }
456 }
457
458 static void partition_tokens (const token_map &map_,
459 charset_list &lhs_)
460 {
461 charset_list rhs_;
462
463 fill_rhs_list (map_, rhs_);
464
465 if (!rhs_->empty ())
466 {
467 typename charset_list::list::iterator iter_;
468 typename charset_list::list::iterator end_;
469 charset_ptr overlap_ (new charset);
470
471 lhs_->push_back (static_cast<charset *>(0));
472 lhs_->back () = rhs_->front ();
473 rhs_->pop_front ();
474
475 while (!rhs_->empty ())
476 {
477 charset_ptr r_ (rhs_->front ());
478
479 rhs_->pop_front ();
480 iter_ = lhs_->begin ();
481 end_ = lhs_->end ();
482
483 while (!r_->empty () && iter_ != end_)
484 {
485 typename charset_list::list::iterator l_iter_ = iter_;
486
487 (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
488
489 if (overlap_->empty ())
490 {
491 ++iter_;
492 }
493 else if ((*l_iter_)->empty ())
494 {
495 delete *l_iter_;
496 *l_iter_ = overlap_.release ();
497
498 overlap_.reset (new charset);
499 ++iter_;
500 }
501 else if (r_->empty ())
502 {
503 overlap_.swap (r_);
504 overlap_.reset (new charset);
505 break;
506 }
507 else
508 {
509 iter_ = lhs_->insert (++iter_,
510 static_cast<charset *>(0));
511 *iter_ = overlap_.release ();
512
513 overlap_.reset(new charset);
514 ++iter_;
515 end_ = lhs_->end ();
516 }
517 }
518
519 if (!r_->empty ())
520 {
521 lhs_->push_back (static_cast<charset *>(0));
522 lhs_->back () = r_.release ();
523 }
524 }
525 }
526 }
527
528 static void fill_rhs_list (const token_map &map_,
529 charset_list &list_)
530 {
531 typename parser::tokeniser::token_map::const_iterator iter_ =
532 map_.begin ();
533 typename parser::tokeniser::token_map::const_iterator end_ =
534 map_.end ();
535
536 for (; iter_ != end_; ++iter_)
537 {
538 list_->push_back (static_cast<charset *>(0));
539 list_->back () = new charset (iter_->first, iter_->second);
540 }
541 }
542
543 static void fill_lookup (const string_token &token_,
544 size_t_vector *lookup_, const std::size_t index_)
545 {
546 const CharT *curr_ = token_._charset.c_str ();
547 const CharT *chars_end_ = curr_ + token_._charset.size ();
548 std::size_t *ptr_ = &lookup_->front ();
549 const std::size_t max_ = sizeof (CharT) == 1 ?
550 num_chars : num_wchar_ts;
551
552 if (token_._negated)
553 {
554 // $$$ FIXME JDG July 2014 $$$
555 // this code is problematic on platforms where wchar_t is signed
556 // with min generating negative numbers. This crashes with BAD_ACCESS
557 // because of the vector index below:
558 // ptr_[static_cast<typename Traits::index_type>(curr_char_)]
559 CharT curr_char_ = 0; // (std::numeric_limits<CharT>::min)();
560 std::size_t i_ = 0;
561
562 while (curr_ < chars_end_)
563 {
564 while (*curr_ > curr_char_)
565 {
566 ptr_[static_cast<typename Traits::index_type>
567 (curr_char_)] = index_ + dfa_offset;
568 ++curr_char_;
569 ++i_;
570 }
571
572 ++curr_char_;
573 ++curr_;
574 ++i_;
575 }
576
577 for (; i_ < max_; ++i_)
578 {
579 ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
580 index_ + dfa_offset;
581 ++curr_char_;
582 }
583 }
584 else
585 {
586 while (curr_ < chars_end_)
587 {
588 ptr_[static_cast<typename Traits::index_type>(*curr_)] =
589 index_ + dfa_offset;
590 ++curr_;
591 }
592 }
593 }
594
595 static void build_equiv_list (const node_vector *vector_,
596 const index_set_vector &set_mapping_, equivset_list &lhs_)
597 {
598 equivset_list rhs_;
599
600 fill_rhs_list (vector_, set_mapping_, rhs_);
601
602 if (!rhs_->empty ())
603 {
604 typename equivset_list::list::iterator iter_;
605 typename equivset_list::list::iterator end_;
606 equivset_ptr overlap_ (new equivset);
607
608 lhs_->push_back (static_cast<equivset *>(0));
609 lhs_->back () = rhs_->front ();
610 rhs_->pop_front ();
611
612 while (!rhs_->empty ())
613 {
614 equivset_ptr r_ (rhs_->front ());
615
616 rhs_->pop_front ();
617 iter_ = lhs_->begin ();
618 end_ = lhs_->end ();
619
620 while (!r_->empty () && iter_ != end_)
621 {
622 typename equivset_list::list::iterator l_iter_ = iter_;
623
624 (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
625
626 if (overlap_->empty ())
627 {
628 ++iter_;
629 }
630 else if ((*l_iter_)->empty ())
631 {
632 delete *l_iter_;
633 *l_iter_ = overlap_.release ();
634
635 overlap_.reset (new equivset);
636 ++iter_;
637 }
638 else if (r_->empty ())
639 {
640 overlap_.swap (r_);
641 overlap_.reset (new equivset);
642 break;
643 }
644 else
645 {
646 iter_ = lhs_->insert (++iter_,
647 static_cast<equivset *>(0));
648 *iter_ = overlap_.release ();
649
650 overlap_.reset (new equivset);
651 ++iter_;
652 end_ = lhs_->end ();
653 }
654 }
655
656 if (!r_->empty ())
657 {
658 lhs_->push_back (static_cast<equivset *>(0));
659 lhs_->back () = r_.release ();
660 }
661 }
662 }
663 }
664
665 static void fill_rhs_list (const node_vector *vector_,
666 const index_set_vector &set_mapping_, equivset_list &list_)
667 {
668 typename node_vector::const_iterator iter_ =
669 vector_->begin ();
670 typename node_vector::const_iterator end_ =
671 vector_->end ();
672
673 for (; iter_ != end_; ++iter_)
674 {
675 const detail::node *node_ = *iter_;
676
677 if (!node_->end_state ())
678 {
679 const std::size_t token_ = node_->token ();
680
681 if (token_ != null_token)
682 {
683 list_->push_back (static_cast<equivset *>(0));
684
685 if (token_ == bol_token || token_ == eol_token)
686 {
687 std::set<std::size_t> index_set_;
688
689 index_set_.insert (token_);
690 list_->back () = new equivset (index_set_,
691 node_->greedy (), token_, node_->followpos ());
692 }
693 else
694 {
695 list_->back () = new equivset (set_mapping_[token_],
696 node_->greedy (), token_, node_->followpos ());
697 }
698 }
699 }
700 }
701 }
702
703 static void fixup_bol (detail::node * &root_,
704 node_ptr_vector &node_ptr_vector_)
705 {
706 typename detail::node::node_vector *first_ = &root_->firstpos ();
707 bool found_ = false;
708 typename detail::node::node_vector::const_iterator iter_ =
709 first_->begin ();
710 typename detail::node::node_vector::const_iterator end_ =
711 first_->end ();
712
713 for (; iter_ != end_; ++iter_)
714 {
715 const detail::node *node_ = *iter_;
716
717 found_ = !node_->end_state () && node_->token () == bol_token;
718
719 if (found_) break;
720 }
721
722 if (!found_)
723 {
724 node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
725 node_ptr_vector_->back () = new detail::leaf_node
726 (bol_token, true);
727
728 detail::node *lhs_ = node_ptr_vector_->back ();
729
730 node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
731 node_ptr_vector_->back () = new detail::leaf_node
732 (null_token, true);
733
734 detail::node *rhs_ = node_ptr_vector_->back ();
735
736 node_ptr_vector_->push_back
737 (static_cast<detail::selection_node *>(0));
738 node_ptr_vector_->back () =
739 new detail::selection_node (lhs_, rhs_);
740 lhs_ = node_ptr_vector_->back ();
741
742 node_ptr_vector_->push_back
743 (static_cast<detail::sequence_node *>(0));
744 node_ptr_vector_->back () =
745 new detail::sequence_node (lhs_, root_);
746 root_ = node_ptr_vector_->back ();
747 }
748 }
749
750 static void minimise_dfa (const std::size_t dfa_alphabet_,
751 size_t_vector &dfa_, std::size_t size_)
752 {
753 const std::size_t *first_ = &dfa_.front ();
754 const std::size_t *second_ = 0;
755 const std::size_t *end_ = first_ + size_;
756 std::size_t index_ = 1;
757 std::size_t new_index_ = 1;
758 std::size_t curr_index_ = 0;
759 index_set index_set_;
760 size_t_vector lookup_;
761 std::size_t *lookup_ptr_ = 0;
762
763 lookup_.resize (size_ / dfa_alphabet_, null_token);
764 lookup_ptr_ = &lookup_.front ();
765 *lookup_ptr_ = 0;
766 // Only one 'jam' state, so skip it.
767 first_ += dfa_alphabet_;
768
769 for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
770 {
771 for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
772 second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
773 {
774 if (index_set_.find (curr_index_) != index_set_.end ())
775 {
776 continue;
777 }
778
779 // Some systems have memcmp in namespace std.
780 using namespace std;
781
782 if (memcmp (first_, second_, sizeof (std::size_t) *
783 dfa_alphabet_) == 0)
784 {
785 index_set_.insert (curr_index_);
786 lookup_ptr_[curr_index_] = new_index_;
787 }
788 }
789
790 if (lookup_ptr_[index_] == null_token)
791 {
792 lookup_ptr_[index_] = new_index_;
793 ++new_index_;
794 }
795 }
796
797 if (!index_set_.empty ())
798 {
799 const std::size_t *front_ = &dfa_.front ();
800 size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
801 typename index_set::iterator set_end_ =
802 index_set_.end ();
803 const std::size_t *ptr_ = front_ + dfa_alphabet_;
804 std::size_t *new_ptr_ = 0;
805
806 new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
807 new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
808 size_ /= dfa_alphabet_;
809
810 for (index_ = 1; index_ < size_; ++index_)
811 {
812 if (index_set_.find (index_) != set_end_)
813 {
814 ptr_ += dfa_alphabet_;
815 continue;
816 }
817
818 new_ptr_[end_state_index] = ptr_[end_state_index];
819 new_ptr_[id_index] = ptr_[id_index];
820 new_ptr_[unique_id_index] = ptr_[unique_id_index];
821 new_ptr_[state_index] = ptr_[state_index];
822 new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
823 new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
824 new_ptr_ += dfa_offset;
825 ptr_ += dfa_offset;
826
827 for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
828 {
829 *new_ptr_++ = lookup_ptr_[*ptr_++];
830 }
831 }
832
833 dfa_.swap (new_dfa_);
834 }
835 }
836 };
837
838 typedef basic_generator<char> generator;
839 typedef basic_generator<wchar_t> wgenerator;
840 }
841 }
842
843 #endif