]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/spirit/include/boost/spirit/home/support/detail/lexer/generator.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / spirit / include / boost / spirit / home / support / detail / lexer / generator.hpp
CommitLineData
7c673cae
FG
1// generator.hpp
2// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6#ifndef BOOST_LEXER_GENERATOR_HPP
7#define BOOST_LEXER_GENERATOR_HPP
8
9#include "char_traits.hpp"
10// memcmp()
11#include <cstring>
12#include "partition/charset.hpp"
13#include "partition/equivset.hpp"
14#include <memory>
15#include <limits>
16#include "parser/tree/node.hpp"
17#include "parser/parser.hpp"
18#include "containers/ptr_list.hpp"
19#include "rules.hpp"
20#include "state_machine.hpp"
21
22namespace boost
23{
24namespace lexer
25{
26template<typename CharT, typename Traits = char_traits<CharT> >
27class basic_generator
28{
29public:
30 typedef typename detail::internals::size_t_vector size_t_vector;
31 typedef basic_rules<CharT> rules;
32
33 static void build (const rules &rules_,
34 basic_state_machine<CharT> &state_machine_)
35 {
36 std::size_t index_ = 0;
37 std::size_t size_ = rules_.statemap ().size ();
38 node_ptr_vector node_ptr_vector_;
39 detail::internals &internals_ = const_cast<detail::internals &>
40 (state_machine_.data ());
41 bool seen_BOL_assertion_ = false;
42 bool seen_EOL_assertion_ = false;
43
44 state_machine_.clear ();
45
46 for (; index_ < size_; ++index_)
47 {
48 internals_._lookup->push_back (static_cast<size_t_vector *>(0));
49 internals_._lookup->back () = new size_t_vector;
50 internals_._dfa_alphabet.push_back (0);
51 internals_._dfa->push_back (static_cast<size_t_vector *>(0));
52 internals_._dfa->back () = new size_t_vector;
53 }
54
55 for (index_ = 0, size_ = internals_._lookup->size ();
56 index_ < size_; ++index_)
57 {
58 internals_._lookup[index_]->resize (sizeof (CharT) == 1 ?
59 num_chars : num_wchar_ts, dead_state_index);
60
61 if (!rules_.regexes ()[index_].empty ())
62 {
63 // vector mapping token indexes to partitioned token index sets
64 index_set_vector set_mapping_;
65 // syntax tree
66 detail::node *root_ = build_tree (rules_, index_,
67 node_ptr_vector_, internals_, set_mapping_);
68
69 build_dfa (root_, set_mapping_,
70 internals_._dfa_alphabet[index_],
71 *internals_._dfa[index_]);
72
73 if (internals_._seen_BOL_assertion)
74 {
75 seen_BOL_assertion_ = true;
76 }
77
78 if (internals_._seen_EOL_assertion)
79 {
80 seen_EOL_assertion_ = true;
81 }
82
83 internals_._seen_BOL_assertion = false;
84 internals_._seen_EOL_assertion = false;
85 }
86 }
87
88 internals_._seen_BOL_assertion = seen_BOL_assertion_;
89 internals_._seen_EOL_assertion = seen_EOL_assertion_;
90 }
91
92 static void minimise (basic_state_machine<CharT> &state_machine_)
93 {
94 detail::internals &internals_ = const_cast<detail::internals &>
95 (state_machine_.data ());
96 const std::size_t machines_ = internals_._dfa->size ();
97
98 for (std::size_t i_ = 0; i_ < machines_; ++i_)
99 {
100 const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_];
101 size_t_vector *dfa_ = internals_._dfa[i_];
102
103 if (dfa_alphabet_ != 0)
104 {
105 std::size_t size_ = 0;
106
107 do
108 {
109 size_ = dfa_->size ();
110 minimise_dfa (dfa_alphabet_, *dfa_, size_);
111 } while (dfa_->size () != size_);
112 }
113 }
114 }
115
116protected:
117 typedef detail::basic_charset<CharT> charset;
118 typedef detail::ptr_list<charset> charset_list;
119 typedef std::auto_ptr<charset> charset_ptr;
120 typedef detail::equivset equivset;
121 typedef detail::ptr_list<equivset> equivset_list;
122 typedef std::auto_ptr<equivset> equivset_ptr;
123 typedef typename charset::index_set index_set;
124 typedef std::vector<index_set> index_set_vector;
125 typedef detail::basic_parser<CharT> parser;
126 typedef typename parser::node_ptr_vector node_ptr_vector;
127 typedef std::set<const detail::node *> node_set;
128 typedef detail::ptr_vector<node_set> node_set_vector;
129 typedef std::vector<const detail::node *> node_vector;
130 typedef detail::ptr_vector<node_vector> node_vector_vector;
131 typedef typename parser::string string;
132 typedef std::pair<string, string> string_pair;
133 typedef typename parser::tokeniser::string_token string_token;
134 typedef std::deque<string_pair> macro_deque;
135 typedef std::pair<string, const detail::node *> macro_pair;
136 typedef typename parser::macro_map::iterator macro_iter;
137 typedef std::pair<macro_iter, bool> macro_iter_pair;
138 typedef typename parser::tokeniser::token_map token_map;
139
140 static detail::node *build_tree (const rules &rules_,
141 const std::size_t state_, node_ptr_vector &node_ptr_vector_,
142 detail::internals &internals_, index_set_vector &set_mapping_)
143 {
144 size_t_vector *lookup_ = internals_._lookup[state_];
145 const typename rules::string_deque_deque &regexes_ =
146 rules_.regexes ();
147 const typename rules::id_vector_deque &ids_ = rules_.ids ();
148 const typename rules::id_vector_deque &unique_ids_ =
149 rules_.unique_ids ();
150 const typename rules::id_vector_deque &states_ = rules_.states ();
151 typename rules::string_deque::const_iterator regex_iter_ =
152 regexes_[state_].begin ();
153 typename rules::string_deque::const_iterator regex_iter_end_ =
154 regexes_[state_].end ();
155 typename rules::id_vector::const_iterator ids_iter_ =
156 ids_[state_].begin ();
157 typename rules::id_vector::const_iterator unique_ids_iter_ =
158 unique_ids_[state_].begin ();
159 typename rules::id_vector::const_iterator states_iter_ =
160 states_[state_].begin ();
161 const typename rules::string &regex_ = *regex_iter_;
162 // map of regex charset tokens (strings) to index
163 token_map token_map_;
164 const typename rules::string_pair_deque &macrodeque_ =
165 rules_.macrodeque ();
166 typename parser::macro_map macromap_;
167 typename detail::node::node_vector tree_vector_;
168
169 build_macros (token_map_, macrodeque_, macromap_,
170 rules_.flags (), rules_.locale (), node_ptr_vector_,
171 internals_._seen_BOL_assertion, internals_._seen_EOL_assertion);
172
173 detail::node *root_ = parser::parse (regex_.c_str (),
174 regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
175 *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
176 macromap_, token_map_, internals_._seen_BOL_assertion,
177 internals_._seen_EOL_assertion);
178
179 ++regex_iter_;
180 ++ids_iter_;
181 ++unique_ids_iter_;
182 ++states_iter_;
183 tree_vector_.push_back (root_);
184
185 // build syntax trees
186 while (regex_iter_ != regex_iter_end_)
187 {
188 // re-declare var, otherwise we perform an assignment..!
189 const typename rules::string &regex2_ = *regex_iter_;
190
191 root_ = parser::parse (regex2_.c_str (),
192 regex2_.c_str () + regex2_.size (), *ids_iter_,
193 *unique_ids_iter_, *states_iter_, rules_.flags (),
194 rules_.locale (), node_ptr_vector_, macromap_, token_map_,
195 internals_._seen_BOL_assertion,
196 internals_._seen_EOL_assertion);
197 tree_vector_.push_back (root_);
198 ++regex_iter_;
199 ++ids_iter_;
200 ++unique_ids_iter_;
201 ++states_iter_;
202 }
203
204 if (internals_._seen_BOL_assertion)
205 {
206 // Fixup BOLs
207 typename detail::node::node_vector::iterator iter_ =
208 tree_vector_.begin ();
209 typename detail::node::node_vector::iterator end_ =
210 tree_vector_.end ();
211
212 for (; iter_ != end_; ++iter_)
213 {
214 fixup_bol (*iter_, node_ptr_vector_);
215 }
216 }
217
218 // join trees
219 {
220 typename detail::node::node_vector::iterator iter_ =
221 tree_vector_.begin ();
222 typename detail::node::node_vector::iterator end_ =
223 tree_vector_.end ();
224
225 if (iter_ != end_)
226 {
227 root_ = *iter_;
228 ++iter_;
229 }
230
231 for (; iter_ != end_; ++iter_)
232 {
233 node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0));
234 node_ptr_vector_->back () = new detail::selection_node
235 (root_, *iter_);
236 root_ = node_ptr_vector_->back ();
237 }
238 }
239
240 // partitioned token list
241 charset_list token_list_;
242
243 set_mapping_.resize (token_map_.size ());
244 partition_tokens (token_map_, token_list_);
245
246 typename charset_list::list::const_iterator iter_ =
247 token_list_->begin ();
248 typename charset_list::list::const_iterator end_ =
249 token_list_->end ();
250 std::size_t index_ = 0;
251
252 for (; iter_ != end_; ++iter_, ++index_)
253 {
254 const charset *cs_ = *iter_;
255 typename charset::index_set::const_iterator set_iter_ =
256 cs_->_index_set.begin ();
257 typename charset::index_set::const_iterator set_end_ =
258 cs_->_index_set.end ();
259
260 fill_lookup (cs_->_token, lookup_, index_);
261
262 for (; set_iter_ != set_end_; ++set_iter_)
263 {
264 set_mapping_[*set_iter_].insert (index_);
265 }
266 }
267
268 internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset;
269 return root_;
270 }
271
272 static void build_macros (token_map &token_map_,
273 const macro_deque &macrodeque_,
274 typename parser::macro_map &macromap_, const regex_flags flags_,
275 const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
276 bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
277 {
278 for (typename macro_deque::const_iterator iter_ =
279 macrodeque_.begin (), end_ = macrodeque_.end ();
280 iter_ != end_; ++iter_)
281 {
282 const typename rules::string &name_ = iter_->first;
283 const typename rules::string &regex_ = iter_->second;
284 detail::node *node_ = parser::parse (regex_.c_str (),
285 regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
286 locale_, node_ptr_vector_, macromap_, token_map_,
287 seen_BOL_assertion_, seen_EOL_assertion_);
288 macro_iter_pair map_iter_ = macromap_.
289 insert (macro_pair (name_, static_cast<const detail::node *>
290 (0)));
291
292 map_iter_.first->second = node_;
293 }
294 }
295
296 static void build_dfa (detail::node *root_,
297 const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
298 size_t_vector &dfa_)
299 {
300 typename detail::node::node_vector *followpos_ =
301 &root_->firstpos ();
302 node_set_vector seen_sets_;
303 node_vector_vector seen_vectors_;
304 size_t_vector hash_vector_;
305
306 // 'jam' state
307 dfa_.resize (dfa_alphabet_, 0);
308 closure (followpos_, seen_sets_, seen_vectors_,
309 hash_vector_, dfa_alphabet_, dfa_);
310
311 std::size_t *ptr_ = 0;
312
313 for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
314 {
315 equivset_list equiv_list_;
316
317 build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
318
319 for (typename equivset_list::list::const_iterator iter_ =
320 equiv_list_->begin (), end_ = equiv_list_->end ();
321 iter_ != end_; ++iter_)
322 {
323 equivset *equivset_ = *iter_;
324 const std::size_t transition_ = closure
325 (&equivset_->_followpos, seen_sets_, seen_vectors_,
326 hash_vector_, dfa_alphabet_, dfa_);
327
328 if (transition_ != npos)
329 {
330 ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
331
332 // Prune abstemious transitions from end states.
333 if (*ptr_ && !equivset_->_greedy) continue;
334
335 for (typename detail::equivset::index_vector::const_iterator
336 equiv_iter_ = equivset_->_index_vector.begin (),
337 equiv_end_ = equivset_->_index_vector.end ();
338 equiv_iter_ != equiv_end_; ++equiv_iter_)
339 {
340 const std::size_t equiv_index_ = *equiv_iter_;
341
342 if (equiv_index_ == bol_token)
343 {
344 if (ptr_[eol_index] == 0)
345 {
346 ptr_[bol_index] = transition_;
347 }
348 }
349 else if (equiv_index_ == eol_token)
350 {
351 if (ptr_[bol_index] == 0)
352 {
353 ptr_[eol_index] = transition_;
354 }
355 }
356 else
357 {
358 ptr_[equiv_index_ + dfa_offset] = transition_;
359 }
360 }
361 }
362 }
363 }
364 }
365
366 static std::size_t closure (typename detail::node::node_vector *followpos_,
367 node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
368 size_t_vector &hash_vector_, const std::size_t size_,
369 size_t_vector &dfa_)
370 {
371 bool end_state_ = false;
372 std::size_t id_ = 0;
373 std::size_t unique_id_ = npos;
374 std::size_t state_ = 0;
375 std::size_t hash_ = 0;
376
377 if (followpos_->empty ()) return npos;
378
379 std::size_t index_ = 0;
380 std::auto_ptr<node_set> set_ptr_ (new node_set);
381 std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
382
383 for (typename detail::node::node_vector::const_iterator iter_ =
384 followpos_->begin (), end_ = followpos_->end ();
385 iter_ != end_; ++iter_)
386 {
387 closure_ex (*iter_, end_state_, id_, unique_id_, state_,
388 set_ptr_.get (), vector_ptr_.get (), hash_);
389 }
390
391 bool found_ = false;
392 typename size_t_vector::const_iterator hash_iter_ =
393 hash_vector_.begin ();
394 typename size_t_vector::const_iterator hash_end_ =
395 hash_vector_.end ();
396 typename node_set_vector::vector::const_iterator set_iter_ =
397 seen_sets_->begin ();
398
399 for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
400 {
401 found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
402 ++index_;
403
404 if (found_) break;
405 }
406
407 if (!found_)
408 {
409 seen_sets_->push_back (static_cast<node_set *>(0));
410 seen_sets_->back () = set_ptr_.release ();
411 seen_vectors_->push_back (static_cast<node_vector *>(0));
412 seen_vectors_->back () = vector_ptr_.release ();
413 hash_vector_.push_back (hash_);
414 // State 0 is the jam state...
415 index_ = seen_sets_->size ();
416
417 const std::size_t old_size_ = dfa_.size ();
418
419 dfa_.resize (old_size_ + size_, 0);
420
421 if (end_state_)
422 {
423 dfa_[old_size_] |= end_state;
424 dfa_[old_size_ + id_index] = id_;
425 dfa_[old_size_ + unique_id_index] = unique_id_;
426 dfa_[old_size_ + state_index] = state_;
427 }
428 }
429
430 return index_;
431 }
432
433 static void closure_ex (detail::node *node_, bool &end_state_,
434 std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
435 node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
436 {
437 const bool temp_end_state_ = node_->end_state ();
438
439 if (temp_end_state_)
440 {
441 if (!end_state_)
442 {
443 end_state_ = true;
444 id_ = node_->id ();
445 unique_id_ = node_->unique_id ();
446 state_ = node_->lexer_state ();
447 }
448 }
449
450 if (set_ptr_->insert (node_).second)
451 {
452 vector_ptr_->push_back (node_);
453 hash_ += reinterpret_cast<std::size_t> (node_);
454 }
455 }
456
457 static void partition_tokens (const token_map &map_,
458 charset_list &lhs_)
459 {
460 charset_list rhs_;
461
462 fill_rhs_list (map_, rhs_);
463
464 if (!rhs_->empty ())
465 {
466 typename charset_list::list::iterator iter_;
467 typename charset_list::list::iterator end_;
468 charset_ptr overlap_ (new charset);
469
470 lhs_->push_back (static_cast<charset *>(0));
471 lhs_->back () = rhs_->front ();
472 rhs_->pop_front ();
473
474 while (!rhs_->empty ())
475 {
476 charset_ptr r_ (rhs_->front ());
477
478 rhs_->pop_front ();
479 iter_ = lhs_->begin ();
480 end_ = lhs_->end ();
481
482 while (!r_->empty () && iter_ != end_)
483 {
484 typename charset_list::list::iterator l_iter_ = iter_;
485
486 (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
487
488 if (overlap_->empty ())
489 {
490 ++iter_;
491 }
492 else if ((*l_iter_)->empty ())
493 {
494 delete *l_iter_;
495 *l_iter_ = overlap_.release ();
496
497 // VC++ 6 Hack:
498 charset_ptr temp_overlap_ (new charset);
499
500 overlap_ = temp_overlap_;
501 ++iter_;
502 }
503 else if (r_->empty ())
504 {
505 delete r_.release ();
506 r_ = overlap_;
507
508 // VC++ 6 Hack:
509 charset_ptr temp_overlap_ (new charset);
510
511 overlap_ = temp_overlap_;
512 break;
513 }
514 else
515 {
516 iter_ = lhs_->insert (++iter_,
517 static_cast<charset *>(0));
518 *iter_ = overlap_.release ();
519
520 // VC++ 6 Hack:
521 charset_ptr temp_overlap_ (new charset);
522
523 overlap_ = temp_overlap_;
524 ++iter_;
525 end_ = lhs_->end ();
526 }
527 }
528
529 if (!r_->empty ())
530 {
531 lhs_->push_back (static_cast<charset *>(0));
532 lhs_->back () = r_.release ();
533 }
534 }
535 }
536 }
537
538 static void fill_rhs_list (const token_map &map_,
539 charset_list &list_)
540 {
541 typename parser::tokeniser::token_map::const_iterator iter_ =
542 map_.begin ();
543 typename parser::tokeniser::token_map::const_iterator end_ =
544 map_.end ();
545
546 for (; iter_ != end_; ++iter_)
547 {
548 list_->push_back (static_cast<charset *>(0));
549 list_->back () = new charset (iter_->first, iter_->second);
550 }
551 }
552
553 static void fill_lookup (const string_token &token_,
554 size_t_vector *lookup_, const std::size_t index_)
555 {
556 const CharT *curr_ = token_._charset.c_str ();
557 const CharT *chars_end_ = curr_ + token_._charset.size ();
558 std::size_t *ptr_ = &lookup_->front ();
559 const std::size_t max_ = sizeof (CharT) == 1 ?
560 num_chars : num_wchar_ts;
561
562 if (token_._negated)
563 {
564 // $$$ FIXME JDG July 2014 $$$
565 // this code is problematic on platforms where wchar_t is signed
566 // with min generating negative numbers. This crashes with BAD_ACCESS
567 // because of the vector index below:
568 // ptr_[static_cast<typename Traits::index_type>(curr_char_)]
569 CharT curr_char_ = 0; // (std::numeric_limits<CharT>::min)();
570 std::size_t i_ = 0;
571
572 while (curr_ < chars_end_)
573 {
574 while (*curr_ > curr_char_)
575 {
576 ptr_[static_cast<typename Traits::index_type>
577 (curr_char_)] = index_ + dfa_offset;
578 ++curr_char_;
579 ++i_;
580 }
581
582 ++curr_char_;
583 ++curr_;
584 ++i_;
585 }
586
587 for (; i_ < max_; ++i_)
588 {
589 ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
590 index_ + dfa_offset;
591 ++curr_char_;
592 }
593 }
594 else
595 {
596 while (curr_ < chars_end_)
597 {
598 ptr_[static_cast<typename Traits::index_type>(*curr_)] =
599 index_ + dfa_offset;
600 ++curr_;
601 }
602 }
603 }
604
605 static void build_equiv_list (const node_vector *vector_,
606 const index_set_vector &set_mapping_, equivset_list &lhs_)
607 {
608 equivset_list rhs_;
609
610 fill_rhs_list (vector_, set_mapping_, rhs_);
611
612 if (!rhs_->empty ())
613 {
614 typename equivset_list::list::iterator iter_;
615 typename equivset_list::list::iterator end_;
616 equivset_ptr overlap_ (new equivset);
617
618 lhs_->push_back (static_cast<equivset *>(0));
619 lhs_->back () = rhs_->front ();
620 rhs_->pop_front ();
621
622 while (!rhs_->empty ())
623 {
624 equivset_ptr r_ (rhs_->front ());
625
626 rhs_->pop_front ();
627 iter_ = lhs_->begin ();
628 end_ = lhs_->end ();
629
630 while (!r_->empty () && iter_ != end_)
631 {
632 typename equivset_list::list::iterator l_iter_ = iter_;
633
634 (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
635
636 if (overlap_->empty ())
637 {
638 ++iter_;
639 }
640 else if ((*l_iter_)->empty ())
641 {
642 delete *l_iter_;
643 *l_iter_ = overlap_.release ();
644
645 // VC++ 6 Hack:
646 equivset_ptr temp_overlap_ (new equivset);
647
648 overlap_ = temp_overlap_;
649 ++iter_;
650 }
651 else if (r_->empty ())
652 {
653 delete r_.release ();
654 r_ = overlap_;
655
656 // VC++ 6 Hack:
657 equivset_ptr temp_overlap_ (new equivset);
658
659 overlap_ = temp_overlap_;
660 break;
661 }
662 else
663 {
664 iter_ = lhs_->insert (++iter_,
665 static_cast<equivset *>(0));
666 *iter_ = overlap_.release ();
667
668 // VC++ 6 Hack:
669 equivset_ptr temp_overlap_ (new equivset);
670
671 overlap_ = temp_overlap_;
672 ++iter_;
673 end_ = lhs_->end ();
674 }
675 }
676
677 if (!r_->empty ())
678 {
679 lhs_->push_back (static_cast<equivset *>(0));
680 lhs_->back () = r_.release ();
681 }
682 }
683 }
684 }
685
686 static void fill_rhs_list (const node_vector *vector_,
687 const index_set_vector &set_mapping_, equivset_list &list_)
688 {
689 typename node_vector::const_iterator iter_ =
690 vector_->begin ();
691 typename node_vector::const_iterator end_ =
692 vector_->end ();
693
694 for (; iter_ != end_; ++iter_)
695 {
696 const detail::node *node_ = *iter_;
697
698 if (!node_->end_state ())
699 {
700 const std::size_t token_ = node_->token ();
701
702 if (token_ != null_token)
703 {
704 list_->push_back (static_cast<equivset *>(0));
705
706 if (token_ == bol_token || token_ == eol_token)
707 {
708 std::set<std::size_t> index_set_;
709
710 index_set_.insert (token_);
711 list_->back () = new equivset (index_set_,
712 node_->greedy (), token_, node_->followpos ());
713 }
714 else
715 {
716 list_->back () = new equivset (set_mapping_[token_],
717 node_->greedy (), token_, node_->followpos ());
718 }
719 }
720 }
721 }
722 }
723
724 static void fixup_bol (detail::node * &root_,
725 node_ptr_vector &node_ptr_vector_)
726 {
727 typename detail::node::node_vector *first_ = &root_->firstpos ();
728 bool found_ = false;
729 typename detail::node::node_vector::const_iterator iter_ =
730 first_->begin ();
731 typename detail::node::node_vector::const_iterator end_ =
732 first_->end ();
733
734 for (; iter_ != end_; ++iter_)
735 {
736 const detail::node *node_ = *iter_;
737
738 found_ = !node_->end_state () && node_->token () == bol_token;
739
740 if (found_) break;
741 }
742
743 if (!found_)
744 {
745 node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
746 node_ptr_vector_->back () = new detail::leaf_node
747 (bol_token, true);
748
749 detail::node *lhs_ = node_ptr_vector_->back ();
750
751 node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
752 node_ptr_vector_->back () = new detail::leaf_node
753 (null_token, true);
754
755 detail::node *rhs_ = node_ptr_vector_->back ();
756
757 node_ptr_vector_->push_back
758 (static_cast<detail::selection_node *>(0));
759 node_ptr_vector_->back () =
760 new detail::selection_node (lhs_, rhs_);
761 lhs_ = node_ptr_vector_->back ();
762
763 node_ptr_vector_->push_back
764 (static_cast<detail::sequence_node *>(0));
765 node_ptr_vector_->back () =
766 new detail::sequence_node (lhs_, root_);
767 root_ = node_ptr_vector_->back ();
768 }
769 }
770
771 static void minimise_dfa (const std::size_t dfa_alphabet_,
772 size_t_vector &dfa_, std::size_t size_)
773 {
774 const std::size_t *first_ = &dfa_.front ();
775 const std::size_t *second_ = 0;
776 const std::size_t *end_ = first_ + size_;
777 std::size_t index_ = 1;
778 std::size_t new_index_ = 1;
779 std::size_t curr_index_ = 0;
780 index_set index_set_;
781 size_t_vector lookup_;
782 std::size_t *lookup_ptr_ = 0;
783
784 lookup_.resize (size_ / dfa_alphabet_, null_token);
785 lookup_ptr_ = &lookup_.front ();
786 *lookup_ptr_ = 0;
787 // Only one 'jam' state, so skip it.
788 first_ += dfa_alphabet_;
789
790 for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
791 {
792 for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
793 second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
794 {
795 if (index_set_.find (curr_index_) != index_set_.end ())
796 {
797 continue;
798 }
799
800 // Some systems have memcmp in namespace std.
801 using namespace std;
802
803 if (memcmp (first_, second_, sizeof (std::size_t) *
804 dfa_alphabet_) == 0)
805 {
806 index_set_.insert (curr_index_);
807 lookup_ptr_[curr_index_] = new_index_;
808 }
809 }
810
811 if (lookup_ptr_[index_] == null_token)
812 {
813 lookup_ptr_[index_] = new_index_;
814 ++new_index_;
815 }
816 }
817
818 if (!index_set_.empty ())
819 {
820 const std::size_t *front_ = &dfa_.front ();
821 size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
822 typename index_set::iterator set_end_ =
823 index_set_.end ();
824 const std::size_t *ptr_ = front_ + dfa_alphabet_;
825 std::size_t *new_ptr_ = 0;
826
827 new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
828 new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
829 size_ /= dfa_alphabet_;
830
831 for (index_ = 1; index_ < size_; ++index_)
832 {
833 if (index_set_.find (index_) != set_end_)
834 {
835 ptr_ += dfa_alphabet_;
836 continue;
837 }
838
839 new_ptr_[end_state_index] = ptr_[end_state_index];
840 new_ptr_[id_index] = ptr_[id_index];
841 new_ptr_[unique_id_index] = ptr_[unique_id_index];
842 new_ptr_[state_index] = ptr_[state_index];
843 new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
844 new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
845 new_ptr_ += dfa_offset;
846 ptr_ += dfa_offset;
847
848 for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
849 {
850 *new_ptr_++ = lookup_ptr_[*ptr_++];
851 }
852 }
853
854 dfa_.swap (new_dfa_);
855 }
856 }
857};
858
859typedef basic_generator<char> generator;
860typedef basic_generator<wchar_t> wgenerator;
861}
862}
863
864#endif