]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / wave / samples / cpp_tokens / slex / lexer.hpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Spirit based lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001, Daniel C. Nuffer.
9 Copyright (c) 2001-2012 Hartmut Kaiser.
10 Distributed under the Boost Software License, Version 1.0. (See accompanying
11 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
12
13 TODO List:
14 X callback objects (called when a match is made.)
15 X callback passed first & last iterator, and
16 a reference to a lexercontrol object that supports some
17 operations on the lexer.
18 set state
19 terminate
20 state stack (push, pop, top)
21 set new token return value
22 ignore the current token
23 yymore
24 get length of matched token
25 get current lexer state
26 X DFA serialization to save recomputing the DFA.
27
28 lexer states.
29 organize the file into hpp and ipp. arrange the classes in a logical order.
30 use buffering - iterator only needs be an input iterator,
31 lexer & callback are not templatized on iterator type, but char type.
32 next_token is templatized on iterator type.
33 beginning/ending contexts.
34 ^ and $
35 DFA minimization.
36 DFA table compression.
37
38=============================================================================*/
39#ifndef BOOST_SPIRIT_LEXER_HPP
40#define BOOST_SPIRIT_LEXER_HPP
41
42///////////////////////////////////////////////////////////////////////////////
43#include <boost/throw_exception.hpp>
44
45#include <boost/spirit/include/classic_core.hpp>
46#include <boost/spirit/include/classic_symbols.hpp>
47#include <boost/spirit/include/classic_chset.hpp>
48#include <boost/spirit/include/classic_escape_char.hpp>
49
50#include <set>
51#include <map>
52#include <vector>
53#include <stack>
54#include <utility> // for pair
55#include <iostream>
56#include <fstream>
57#include <boost/assert.hpp>
58#include <boost/limits.hpp>
59
60#if defined(BOOST_NO_STD_ITERATOR_TRAITS)
61#define BOOST_SPIRIT_IT_NS impl
62#else
63#define BOOST_SPIRIT_IT_NS std
64#endif
65
66///////////////////////////////////////////////////////////////////////////////
67namespace boost {
68namespace spirit {
69namespace classic {
70
71typedef unsigned char uchar;
72typedef unsigned int node_id_t;
73const node_id_t invalid_node = node_id_t(-1);
74typedef std::set<node_id_t> node_set;
75typedef std::vector<uchar> uchar_vector;
76typedef std::map<node_id_t, node_set> followpos_t;
77typedef std::vector<uchar_vector> state_match_t;
78
79template <typename TokenT>
80class lexer_control;
81
82class bad_regex : public std::exception
83{
84};
85
86namespace lexerimpl
87{
88
89class node
90{
91public:
92
93 virtual ~node() {}
94
95 virtual node* clone() const = 0;
96 virtual bool nullable() const = 0;
97 virtual node_set firstpos() const = 0;
98 virtual node_set lastpos() const = 0;
99 virtual void compute_followpos(followpos_t& followpos) const = 0;
100 virtual void compute_state_match(state_match_t& state_match) const = 0;
101 virtual void get_eof_ids(node_set& eof_set) const = 0;
102 virtual void assign_node_ids(node_id_t& node_count) = 0;
103#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
104 virtual void dump(std::ostream& out) const = 0;
105#endif
106
107};
108
109class char_node : public node
110{
111
112public:
113
114 char_node(const uchar c);
115 char_node(const char_node& x);
116 virtual ~char_node(){}
117
118 virtual node* clone() const;
119 virtual bool nullable() const;
120 virtual node_set firstpos() const;
121 virtual node_set lastpos() const;
122 virtual void compute_followpos(followpos_t& followpos) const;
123 virtual void compute_state_match(state_match_t& state_match ) const;
124 virtual void get_eof_ids(node_set& eof_set) const;
125 virtual void assign_node_ids(node_id_t& node_count);
126#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
127 virtual void dump(std::ostream& out) const;
128#endif
129
130private:
131
132 uchar m_char;
133 node_id_t m_node_num;
134};
135
136inline
137char_node::char_node(const uchar c)
138 : node()
139 , m_char(c)
140 , m_node_num(0)
141{
142}
143
144inline
145char_node::char_node(const char_node& x)
146 : node(x)
147 , m_char(x.m_char)
148 , m_node_num(x.m_node_num)
149{
150}
151
152inline node *
153char_node::clone() const
154{
155 return new char_node(*this);
156}
157
158inline bool
159char_node::nullable() const
160{
161 return false;
162}
163
164inline node_set
165char_node::firstpos() const
166{
167 node_set rval;
168 rval.insert(m_node_num);
169 return rval;
170}
171
172inline node_set
173char_node::lastpos() const
174{
175 return firstpos();
176}
177
178inline void
179char_node::compute_followpos(followpos_t&) const
180{
181 return;
182}
183
184inline void
185char_node::compute_state_match(state_match_t& state_match) const
186{
187 if (state_match.size() < m_node_num + 1)
188 state_match.resize(m_node_num + 1);
189 state_match[m_node_num].resize(256);
190 state_match[m_node_num][m_char] = 1;
191}
192
193inline void
194char_node::get_eof_ids(node_set&) const
195{
196 return;
197}
198
199inline void
200char_node::assign_node_ids(node_id_t& node_count)
201{
202 m_node_num = node_count++;
203}
204
205#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
206inline void
207char_node::dump(std::ostream& out) const
208{
209 out << "\nchar_node m_char = " << m_char;
210 out << " m_node_num = " << m_node_num;
211 out << " nullable() = " << (nullable() ? "true" : "false");
212 out << " firstpos() = ";
213 node_set fp = firstpos();
214 std::copy(fp.begin(), fp.end(),
215 std::ostream_iterator<node_id_t>(out, ","));
216
217 out << " lastpos() = ";
218 node_set lp = lastpos();
219 std::copy(lp.begin(), lp.end(),
220 std::ostream_iterator<node_id_t>(out, ","));
221}
222#endif
223
224
225class epsilon_node : public node
226{
227
228public:
229
230 epsilon_node();
231 epsilon_node(const epsilon_node& x);
232 virtual ~epsilon_node(){}
233
234 virtual node* clone() const;
235 virtual bool nullable() const;
236 virtual node_set firstpos() const;
237 virtual node_set lastpos() const;
238 virtual void compute_followpos(followpos_t& followpos) const;
239 virtual void compute_state_match(state_match_t& state_match ) const;
240 virtual void get_eof_ids(node_set& eof_set) const;
241 virtual void assign_node_ids(node_id_t& node_count);
242#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
243 virtual void dump(std::ostream& out) const;
244#endif
245
246private:
247
248 node_id_t m_node_num;
249};
250
251inline
252epsilon_node::epsilon_node()
253 : node()
254 , m_node_num(0)
255{
256}
257
258inline
259epsilon_node::epsilon_node(const epsilon_node& x)
260 : node(x)
261 , m_node_num(x.m_node_num)
262{
263}
264
265inline node *
266epsilon_node::clone() const
267{
268 return new epsilon_node(*this);
269}
270
271inline bool
272epsilon_node::nullable() const
273{
274 return true;
275}
276
277inline node_set
278epsilon_node::firstpos() const
279{
280 return node_set();
281}
282
283inline node_set
284epsilon_node::lastpos() const
285{
286 return node_set();
287}
288
289inline void
290epsilon_node::compute_followpos(followpos_t&) const
291{
292 return;
293}
294
295inline void
296epsilon_node::compute_state_match(state_match_t& state_match) const
297{
298 if (state_match.size() < m_node_num + 1)
299 state_match.resize(m_node_num + 1);
300 state_match[m_node_num].resize(256, 1);
301}
302
303inline void
304epsilon_node::get_eof_ids(node_set&) const
305{
306 return;
307}
308
309inline void
310epsilon_node::assign_node_ids(node_id_t& node_count)
311{
312 m_node_num = node_count++;
313}
314
315#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
316inline void
317epsilon_node::dump(std::ostream& out) const
318{
319 out << "\nepsilon_node";
320 out << " m_node_num = " << m_node_num;
321 out << " nullable() = " << (nullable() ? "true" : "false");
322 out << " firstpos() = ";
323 node_set fp = firstpos();
324 std::copy(fp.begin(), fp.end(),
325 std::ostream_iterator<node_id_t>(out, ","));
326
327 out << " lastpos() = ";
328 node_set lp = lastpos();
329 std::copy(lp.begin(), lp.end(),
330 std::ostream_iterator<node_id_t>(out, ","));
331}
332#endif
333
334
335class or_node : public node
336{
337
338public:
339
340 or_node(node* left, node* right);
341 or_node(const or_node& x);
342 virtual ~or_node(){}
343
344 virtual node* clone() const;
345 virtual bool nullable() const;
346 virtual node_set firstpos() const;
347 virtual node_set lastpos() const;
348 virtual void compute_followpos(followpos_t& followpos) const;
349 virtual void compute_state_match(state_match_t& state_match ) const;
350 virtual void get_eof_ids(node_set& eof_set) const;
351 virtual void assign_node_ids(node_id_t& node_count);
352#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
353 virtual void dump(std::ostream& out) const;
354#endif
355
356private:
357
358 std::auto_ptr<node> m_left;
359 std::auto_ptr<node> m_right;
360};
361
362inline
363or_node::or_node(node* left, node* right)
364 : node()
365 , m_left(left)
366 , m_right(right)
367{
368}
369
370inline
371or_node::or_node(const or_node& x)
372 : node(x)
373 , m_left(x.m_left->clone())
374 , m_right(x.m_right->clone())
375{
376}
377
378inline node *
379or_node::clone() const
380{
381 return new or_node(m_left->clone(), m_right->clone());
382}
383
384inline bool
385or_node::nullable() const
386{
387 return m_left->nullable() || m_right->nullable();
388}
389
390inline node_set
391or_node::firstpos() const
392{
393 node_set rval;
394 node_set l = m_left->firstpos();
395 node_set r = m_right->firstpos();
396 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
397 std::inserter(rval, rval.begin()));
398 return rval;
399}
400
401inline node_set
402or_node::lastpos() const
403{
404 node_set rval;
405 node_set l = m_left->lastpos();
406 node_set r = m_right->lastpos();
407 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
408 std::inserter(rval, rval.begin()));
409 return rval;
410}
411
412inline void
413or_node::compute_followpos(followpos_t& followpos) const
414{
415 m_left->compute_followpos(followpos);
416 m_right->compute_followpos(followpos);
417}
418
419inline void
420or_node::compute_state_match(state_match_t& state_match) const
421{
422 m_left->compute_state_match(state_match);
423 m_right->compute_state_match(state_match);
424}
425
426inline void
427or_node::get_eof_ids(node_set& eof_nodes) const
428{
429 m_left->get_eof_ids(eof_nodes);
430 m_right->get_eof_ids(eof_nodes);
431}
432
433inline void
434or_node::assign_node_ids(node_id_t& node_count)
435{
436 m_left->assign_node_ids(node_count);
437 m_right->assign_node_ids(node_count);
438}
439
440#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
441inline void
442or_node::dump(std::ostream& out) const
443{
444 m_left->dump(out);
445
446 out << "\nor_node";
447 out << " nullable() = " << (nullable() ? "true" : "false");
448 out << " firstpos() = ";
449 node_set fp = firstpos();
450 std::copy(fp.begin(), fp.end(),
451 std::ostream_iterator<node_id_t>(out, ","));
452
453 out << " lastpos() = ";
454 node_set lp = lastpos();
455 std::copy(lp.begin(), lp.end(),
456 std::ostream_iterator<node_id_t>(out, ","));
457
458 m_right->dump(out);
459}
460#endif
461
462
463class cat_node : public node
464{
465
466public:
467
468 cat_node(node* left, node* right);
469 cat_node(const cat_node& x);
470 virtual ~cat_node(){}
471
472 virtual node* clone() const;
473 virtual bool nullable() const;
474 virtual node_set firstpos() const;
475 virtual node_set lastpos() const;
476 virtual void compute_followpos(followpos_t& followpos) const;
477 virtual void compute_state_match(state_match_t& state_match ) const;
478 virtual void get_eof_ids(node_set& eof_set) const;
479 virtual void assign_node_ids(node_id_t& node_count);
480#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
481 virtual void dump(std::ostream& out) const;
482#endif
483
484private:
485
486 std::auto_ptr<node> m_left;
487 std::auto_ptr<node> m_right;
488};
489
490inline
491cat_node::cat_node(node* left, node* right)
492 : node()
493 , m_left(left)
494 , m_right(right)
495{
496}
497
498inline
499cat_node::cat_node(const cat_node& x)
500 : node(x)
501 , m_left(x.m_left->clone())
502 , m_right(x.m_right->clone())
503{
504}
505
506inline node *
507cat_node::clone() const
508{
509 return new cat_node(m_left->clone(), m_right->clone());
510}
511
512inline bool
513cat_node::nullable() const
514{
515 return m_left->nullable() && m_right->nullable();
516}
517
518inline node_set
519cat_node::firstpos() const
520{
521 if (m_left->nullable())
522 {
523 node_set rval;
524 node_set l = m_left->firstpos();
525 node_set r = m_right->firstpos();
526 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
527 std::inserter(rval, rval.begin()));
528 return rval;
529 }
530 else
531 {
532 return m_left->firstpos();
533 }
534}
535
536inline node_set
537cat_node::lastpos() const
538{
539 if (m_right->nullable())
540 {
541 node_set rval;
542 node_set l = m_left->lastpos();
543 node_set r = m_right->lastpos();
544 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
545 std::inserter(rval, rval.begin()));
546 return rval;
547 }
548 else
549 {
550 return m_right->lastpos();
551 }
552}
553
554inline void
555cat_node::compute_followpos(followpos_t& followpos) const
556{
557 node_set l = m_left->lastpos();
558 for (node_set::iterator i = l.begin();
559 i != l.end();
560 ++i)
561 {
562 node_set rf = m_right->firstpos();
563 followpos[*i].insert(rf.begin(), rf.end());
564 }
565
566 m_left->compute_followpos(followpos);
567 m_right->compute_followpos(followpos);
568}
569
570inline void
571cat_node::compute_state_match(state_match_t& state_match) const
572{
573 m_left->compute_state_match(state_match);
574 m_right->compute_state_match(state_match);
575}
576
577inline void
578cat_node::get_eof_ids(node_set& eof_nodes) const
579{
580 m_left->get_eof_ids(eof_nodes);
581 m_right->get_eof_ids(eof_nodes);
582}
583
584inline void
585cat_node::assign_node_ids(node_id_t& node_count)
586{
587 m_left->assign_node_ids(node_count);
588 m_right->assign_node_ids(node_count);
589}
590
591#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
592inline void
593cat_node::dump(std::ostream& out) const
594{
595 m_left->dump(out);
596
597 out << "\ncat_node";
598 out << " nullable() = " << (nullable() ? "true" : "false");
599 out << " firstpos() = ";
600 node_set fp = firstpos();
601 std::copy(fp.begin(), fp.end(),
602 std::ostream_iterator<node_id_t>(out, ","));
603
604 out << " lastpos() = ";
605 node_set lp = lastpos();
606 std::copy(lp.begin(), lp.end(),
607 std::ostream_iterator<node_id_t>(out, ","));
608
609 m_right->dump(out);
610}
611#endif
612
613
614class star_node : public node
615{
616
617public:
618
619 star_node(node* left);
620 star_node(const star_node& x);
621 virtual ~star_node(){}
622
623 virtual node* clone() const;
624 virtual bool nullable() const;
625 virtual node_set firstpos() const;
626 virtual node_set lastpos() const;
627 virtual void compute_followpos(followpos_t& followpos) const;
628 virtual void compute_state_match(state_match_t& state_match ) const;
629 virtual void get_eof_ids(node_set& eof_set) const;
630 virtual void assign_node_ids(node_id_t& node_count);
631#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
632 virtual void dump(std::ostream& out) const;
633#endif
634
635private:
636
637 std::auto_ptr<node> m_left;
638};
639
640inline
641star_node::star_node(node* left)
642 : node()
643 , m_left(left)
644{
645}
646
647inline
648star_node::star_node(const star_node& x)
649 : node(x)
650 , m_left(x.m_left->clone())
651{
652}
653
654inline node *
655star_node::clone() const
656{
657 return new star_node(m_left->clone());
658}
659
660inline bool
661star_node::nullable() const
662{
663 return true;
664}
665
666inline node_set
667star_node::firstpos() const
668{
669 return m_left->firstpos();
670}
671
672inline node_set
673star_node::lastpos() const
674{
675 return m_left->lastpos();
676}
677
678inline void
679star_node::compute_followpos(followpos_t& followpos) const
680{
681 node_set lp = this->lastpos();
682 for (node_set::iterator i = lp.begin();
683 i != lp.end();
684 ++i)
685 {
686 node_set fp = this->firstpos();
687 followpos[*i].insert(fp.begin(), fp.end());
688 }
689
690 m_left->compute_followpos(followpos);
691}
692
693inline void
694star_node::compute_state_match(state_match_t& state_match) const
695{
696 m_left->compute_state_match(state_match);
697}
698
699inline void
700star_node::get_eof_ids(node_set& eof_nodes) const
701{
702 m_left->get_eof_ids(eof_nodes);
703}
704
705inline void
706star_node::assign_node_ids(node_id_t& node_count)
707{
708 m_left->assign_node_ids(node_count);
709}
710
711#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
712inline void
713star_node::dump(std::ostream& out) const
714{
715 m_left->dump(out);
716
717 out << "\nstar_node";
718 out << " nullable() = " << (nullable() ? "true" : "false");
719 out << " firstpos() = ";
720 node_set fp = firstpos();
721 std::copy(fp.begin(), fp.end(),
722 std::ostream_iterator<node_id_t>(out, ","));
723
724 out << " lastpos() = ";
725 node_set lp = lastpos();
726 std::copy(lp.begin(), lp.end(),
727 std::ostream_iterator<node_id_t>(out, ","));
728
729}
730#endif
731
732
733class eof_node : public node
734{
735
736public:
737
738 eof_node();
739 eof_node(const eof_node& x);
740 virtual ~eof_node(){}
741
742 virtual node* clone() const;
743 virtual bool nullable() const;
744 virtual node_set firstpos() const;
745 virtual node_set lastpos() const;
746 virtual void compute_followpos(followpos_t& followpos) const;
747 virtual void compute_state_match(state_match_t& state_match ) const;
748 virtual void get_eof_ids(node_set& eof_set) const;
749 virtual void assign_node_ids(node_id_t& node_count);
750#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
751 virtual void dump(std::ostream& out) const;
752#endif
753
754private:
755
756 node_id_t m_node_num;
757};
758
759inline
760eof_node::eof_node()
761 : node()
762 , m_node_num(0)
763{
764}
765
766inline
767eof_node::eof_node(const eof_node& x)
768 : node(x)
769 , m_node_num(x.m_node_num)
770{
771}
772
773inline node *
774eof_node::clone() const
775{
776 return new eof_node(*this);
777}
778
779inline bool
780eof_node::nullable() const
781{
782 return false;
783}
784
785inline node_set
786eof_node::firstpos() const
787{
788 node_set rval;
789 rval.insert(m_node_num);
790 return rval;
791}
792
793inline node_set
794eof_node::lastpos() const
795{
796 node_set rval;
797 rval.insert(m_node_num);
798 return rval;
799}
800
801inline void
802eof_node::compute_followpos(followpos_t&) const
803{
804 return;
805}
806
807inline void
808eof_node::compute_state_match(state_match_t& state_match) const
809{
810 if (state_match.size() < m_node_num + 1)
811 state_match.resize(m_node_num + 1);
812 state_match[m_node_num].resize(256, 0);
813}
814
815inline void
816eof_node::get_eof_ids(node_set& eof_nodes) const
817{
818 eof_nodes.insert(m_node_num);
819}
820
821inline void
822eof_node::assign_node_ids(node_id_t& node_count)
823{
824 m_node_num = node_count++;
825}
826
827#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
828inline void
829eof_node::dump(std::ostream& out) const
830{
831 out << "\neof_node";
832 out << " m_node_num = " << m_node_num;
833 out << " nullable() = " << (nullable() ? "true" : "false");
834 out << " firstpos() = ";
835 node_set fp = firstpos();
836 std::copy(fp.begin(), fp.end(),
837 std::ostream_iterator<node_id_t>(out, ","));
838
839 out << " lastpos() = ";
840 node_set lp = lastpos();
841 std::copy(lp.begin(), lp.end(),
842 std::ostream_iterator<node_id_t>(out, ","));
843}
844#endif
845
846class ccl_node : public node
847{
848
849public:
850
851 ccl_node(const std::vector<uchar>& v);
852 ccl_node(const uchar c1, const uchar c2);
853 ccl_node(const ccl_node& x);
854 virtual ~ccl_node(){}
855
856 virtual node* clone() const;
857 virtual bool nullable() const;
858 virtual node_set firstpos() const;
859 virtual node_set lastpos() const;
860 virtual void compute_followpos(followpos_t& followpos) const;
861 virtual void compute_state_match(state_match_t& state_match ) const;
862 virtual void get_eof_ids(node_set& eof_set) const;
863 virtual void assign_node_ids(node_id_t& node_count);
864#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
865 virtual void dump(std::ostream& out) const;
866#endif
867
868private:
869
870 std::vector<uchar> m_match;
871 node_id_t m_node_num;
872};
873
874inline
875ccl_node::ccl_node(const std::vector<uchar>& v)
876 : node()
877 , m_match(v)
878 , m_node_num(0)
879{
880 m_match.resize(256); // make sure it's the right size
881}
882
883inline
884ccl_node::ccl_node(const uchar c1, const uchar c2)
885 : node()
886 , m_match(256, uchar(0))
887 , m_node_num(0)
888{
889 BOOST_ASSERT(c1 < c2);
890 for (std::size_t i = c1; i <= std::size_t(c2); ++i)
891 {
892 m_match[i] = 1;
893 }
894}
895
896inline
897ccl_node::ccl_node(const ccl_node& x)
898 : node(x)
899 , m_match(x.m_match)
900 , m_node_num(x.m_node_num)
901{
902}
903
904inline node *
905ccl_node::clone() const
906{
907 return new ccl_node(*this);
908}
909
910inline bool
911ccl_node::nullable() const
912{
913 return false;
914}
915
916inline node_set
917ccl_node::firstpos() const
918{
919 node_set rval;
920 rval.insert(m_node_num);
921 return rval;
922}
923
924inline node_set
925ccl_node::lastpos() const
926{
927 return firstpos();
928}
929
930inline void
931ccl_node::compute_followpos(followpos_t&) const
932{
933 return;
934}
935
936inline void
937ccl_node::compute_state_match(state_match_t& state_match) const
938{
939 if (state_match.size() < m_node_num + 1)
940 state_match.resize(m_node_num + 1);
941 state_match[m_node_num] = m_match;
942}
943
944inline void
945ccl_node::get_eof_ids(node_set&) const
946{
947 return;
948}
949
950inline void
951ccl_node::assign_node_ids(node_id_t& node_count)
952{
953 m_node_num = node_count++;
954}
955
956#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
957inline void
958ccl_node::dump(std::ostream& out) const
959{
960 out << "\nccl_node m_match = ";
961 for (std::size_t i = 0; i < m_match.size(); ++i)
962 {
963 if (m_match[i])
964 out << i << ", ";
965 }
966 out << " m_node_num = " << m_node_num;
967 out << " nullable() = " << (nullable() ? "true" : "false");
968 out << " firstpos() = ";
969 node_set fp = firstpos();
970 std::copy(fp.begin(), fp.end(),
971 std::ostream_iterator<node_id_t>(out, ","));
972
973 out << " lastpos() = ";
974 node_set lp = lastpos();
975 std::copy(lp.begin(), lp.end(),
976 std::ostream_iterator<node_id_t>(out, ","));
977}
978#endif
979
980template <typename ScannerT>
981class make_concat
982{
983 typedef typename ScannerT::iterator_t iterator_type;
984
985public:
986
987 make_concat(std::stack<node*>& the_stack)
988 : m_stack(the_stack)
989 {}
990
991 void operator()(iterator_type const &, iterator_type const &) const
992 {
993 node* right = m_stack.top();
994 m_stack.pop();
995 node* left = m_stack.top();
996 m_stack.pop();
997 node* newnode = new cat_node(left, right);
998 m_stack.push(newnode);
999 }
1000
1001 std::stack<node*>& m_stack;
1002};
1003
1004template <int CharTSize>
1005struct get_byte_aux;
1006
1007template<>
1008struct get_byte_aux<1>
1009{
1010 template <typename CharT>
1011 unsigned char operator()(CharT c, unsigned int byte)
1012 {
1013 BOOST_ASSERT(byte == 0);
1014 return c;
1015 }
1016};
1017
1018template<>
1019struct get_byte_aux<2>
1020{
1021 template <typename CharT>
1022 unsigned char operator()(CharT c, unsigned int byte)
1023 {
1024 static unsigned long mask[] =
1025 {
1026 0xFF00,
1027 0x00FF
1028 };
1029
1030 BOOST_ASSERT(byte < 2);
1031 return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1032 }
1033};
1034
1035template<>
1036struct get_byte_aux<4>
1037{
1038 template <typename CharT>
1039 unsigned char operator()(CharT c, unsigned int byte)
1040 {
1041 static unsigned long mask[] =
1042 {
1043 0xFF000000,
1044 0x00FF0000,
1045 0x0000FF00,
1046 0x000000FF
1047 };
1048
1049 BOOST_ASSERT(byte < 4);
1050 return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1051 }
1052};
1053
1054template <typename CharT>
1055inline unsigned char
1056get_byte(CharT c, unsigned int byte)
1057{
1058 return get_byte_aux<sizeof(c)>()(c, byte);
1059}
1060
1061template <typename ScannerT>
1062class make_star
1063{
1064 typedef typename ScannerT::iterator_t iterator_type;
1065
1066public:
1067 typedef
1068 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1069 char_t;
1070
1071 make_star(std::stack<node*>& the_stack)
1072 : m_stack(the_stack)
1073 {}
1074
1075 void operator()(const char_t) const
1076 {
1077 node* left = m_stack.top();
1078 m_stack.pop();
1079 node* newnode = new star_node(left);
1080 m_stack.push(newnode);
1081 }
1082
1083 std::stack<node*>& m_stack;
1084};
1085
1086template <typename ScannerT>
1087class make_or
1088{
1089 typedef typename ScannerT::iterator_t iterator_type;
1090
1091public:
1092
1093 make_or(std::stack<node*>& the_stack)
1094 : m_stack(the_stack)
1095 {}
1096
1097 void operator()(iterator_type const&, iterator_type const&) const
1098 {
1099 node* right = m_stack.top();
1100 m_stack.pop();
1101 node* left = m_stack.top();
1102 m_stack.pop();
1103 node* newnode = new or_node(left, right);
1104 m_stack.push(newnode);
1105 }
1106
1107 std::stack<node*>& m_stack;
1108};
1109
1110template <typename ScannerT>
1111class make_plus
1112{
1113 typedef typename ScannerT::iterator_t iterator_type;
1114
1115public:
1116 typedef
1117 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1118 char_t;
1119
1120 make_plus(std::stack<node*>& the_stack)
1121 : m_stack(the_stack)
1122 {}
1123
1124 void operator()(const char_t) const
1125 {
1126 node* left = m_stack.top();
1127 m_stack.pop();
1128
1129 node* copy = left->clone();
1130
1131 node* new_star = new star_node(copy);
1132 node* new_cat = new cat_node(left, new_star);
1133 m_stack.push(new_cat);
1134 }
1135
1136 std::stack<node*>& m_stack;
1137};
1138
1139template <typename ScannerT>
1140class make_optional
1141{
1142 typedef typename ScannerT::iterator_t iterator_type;
1143
1144public:
1145 typedef
1146 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1147 char_t;
1148
1149 make_optional(std::stack<node*>& the_stack)
1150 : m_stack(the_stack)
1151 {}
1152
1153 void operator()(const char_t) const
1154 {
1155 node* left = m_stack.top();
1156 m_stack.pop();
1157
1158 node* new_or = new or_node(left, new epsilon_node());
1159 m_stack.push(new_or);
1160 }
1161
1162 std::stack<node*>& m_stack;
1163};
1164
1165///////////////////////////////////////////////////////////////////////////////
1166// utility function
1167template <typename CharT>
1168inline utility::impl::range<CharT> const&
1169full_range()
1170{
1171 static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
1172 (std::numeric_limits<CharT>::max)());
1173 return full;
1174}
1175
1176namespace ccl_utils
1177{
1178 template <typename char_t>
1179 inline utility::impl::range_run<char_t>
1180 negate_range_run(
1181 const utility::impl::range_run<char_t>& rr)
1182 {
1183 utility::impl::range_run<char_t> newrr;
1184 newrr.set(full_range<char_t>());
1185 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1186 iter != rr.end(); ++iter)
1187 newrr.clear(*iter);
1188 return newrr;
1189 }
1190
1191 template <typename char_t>
1192 inline node*
1193 create_mb_node_seq(char_t c)
1194 {
1195 node* newnode = new char_node(get_byte(c, 0));
1196 for (unsigned int i = 1; i < sizeof(c); ++i)
1197 {
1198 node* cnode = new char_node(get_byte(c, i));
1199 node* top_node = new cat_node(newnode, cnode);
1200 newnode = top_node;
1201 }
1202 return newnode;
1203 }
1204
1205 template <typename char_t>
1206 inline void
1207 handle_mb_char(char_t c, bool first_time,
1208 std::stack<node*>& stack)
1209 {
1210 node* newnode = create_mb_node_seq(c);
1211
1212 if (first_time)
1213 {
1214 stack.push(newnode);
1215 }
1216 else
1217 {
1218 node* top = stack.top();
1219 stack.pop();
1220
1221 node* newtop = new or_node(top, newnode);
1222 stack.push(newtop);
1223 }
1224 }
1225
1226 // forward decl only
1227 template <typename char_t>
1228 inline void
1229 handle_mb_range(char_t c1, char_t c2, bool first_time,
1230 std::stack<node*>& stack);
1231
1232 template <typename char_t>
1233 inline void
1234 create_nodes(const utility::impl::range_run<char_t>& rr,
1235 std::stack<node*>& stack)
1236 {
1237
1238 if (sizeof(char_t) == 1)
1239 {
1240 std::vector<uchar> ccl;
1241 ccl.resize(256);
1242 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1243 iter != rr.end(); ++iter)
1244 {
1245 for (int i = iter->first; i <= iter->last; ++i)
1246 {
1247// this is always true because of the limited datatype
1248// BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
1249 ccl[uchar(i)] = 1;
1250 }
1251 }
1252
1253 node* new_ccl = new ccl_node(ccl);
1254 stack.push(new_ccl);
1255 }
1256 else
1257 {
1258 bool mb_first_time = true;
1259 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1260 iter != rr.end(); ++iter)
1261 {
1262 if (iter->first == iter->last)
1263 {
1264 handle_mb_char(iter->first, mb_first_time, stack);
1265 }
1266 else
1267 {
1268 handle_mb_range(iter->first, iter->last, mb_first_time, stack);
1269 }
1270 mb_first_time = false;
1271 }
1272 }
1273 }
1274
1275 template <typename char_t>
1276 inline std::size_t
1277 compute_differing_byte(char_t c1, char_t c2)
1278 {
1279 std::size_t rval = 0;
1280 while (rval < sizeof(c1) &&
1281 get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
1282 {
1283 ++rval;
1284 }
1285 return rval;
1286 }
1287
1288 template <typename char_t>
1289 inline node*
1290 create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
1291 {
1292 std::size_t diff = get_byte(c2, (unsigned int)j) -
1293 get_byte(c1, (unsigned int)j);
1294 if (diff == 1) {
1295 return 0;
1296 }
1297 else if (diff == 2) {
1298 return new char_node(get_byte(c1, (unsigned int)j)+1);
1299 }
1300 else {
1301 return new ccl_node(get_byte(c1, (unsigned int)j)+1,
1302 get_byte(c2, (unsigned int)j)-1);
1303 }
1304 }
1305
1306 template <typename char_t>
1307 inline node *
1308 create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
1309 std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
1310 {
1311 node* cnode;
1312 if (i == sizem1 && j == differing_byte && j != sizem1)
1313 {
1314 node* tmp = create_mb_node_type1(j, c1, c2);
1315 if (tmp == 0)
1316 {
1317 delete newnode;
1318 return 0;
1319 }
1320 else
1321 cnode = tmp;
1322 }
1323 else if (i == differing_byte && j == sizem1)
1324 {
1325 if (i != sizem1) {
1326 cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
1327 }
1328 else {
1329 cnode = new ccl_node(get_byte(c1, (unsigned int)j),
1330 get_byte(c2, (unsigned int)j));
1331 }
1332 }
1333 else if (i != differing_byte && i != sizem1 &&
1334 j == (sizem1 - i + differing_byte))
1335 {
1336 cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
1337 }
1338 else if (i + j - differing_byte > sizem1) {
1339 cnode = new ccl_node(0, 0xFF);
1340 }
1341 else {//if (is plain)
1342 cnode = new char_node(get_byte(c1, (unsigned int)j));
1343 }
1344
1345 node* top_node = new cat_node(newnode, cnode);
1346 return top_node;
1347 }
1348
1349// On platforms, where wchar_t is a typedef for unsigned short, the
1350// comparision for a negative value is pointless
1351 template <bool is_signed>
1352 struct correct_char_aux {
1353 };
1354
1355 template <>
1356 struct correct_char_aux<true> {
1357
1358 template <typename char_t>
1359 static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
1360 };
1361
1362 template <>
1363 struct correct_char_aux<false> {
1364
1365 template <typename char_t>
1366 static char_t correct(char_t c) { return c; }
1367 };
1368
1369 template <typename char_t>
1370 struct correct_char
1371 {
1372 static char_t correct(char_t c)
1373 {
1374 return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
1375 correct(c);
1376 }
1377 };
1378
1379 template <typename char_t>
1380 inline void
1381 handle_mb_range(char_t c1, char_t c2, bool first_time,
1382 std::stack<node*>& stack)
1383 {
1384 // The algorithm can't handle negative value chars, which don't make
1385 // much sense anyway. This comparision is pointless for wchar_t's on
1386 // platforms, where wchar_t is a typedef for unsigned short
1387
1388 c1 = correct_char<char_t>::correct(c1);
1389 //if (c1 < 0)
1390 // c1 = 0;
1391
1392 BOOST_ASSERT(c1 < c2);
1393 node* newnode = 0;
1394 node* savednode = 0;
1395 const std::size_t differing_byte = compute_differing_byte(c1, c2);
1396 const std::size_t sizem1 = sizeof(c1) - 1;
1397 const std::size_t ndb = sizem1 - differing_byte;
1398 for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
1399 {
1400 // generate node for the first byte
1401 if (differing_byte == 0 && i == ndb)
1402 {
1403 node* tmp = create_mb_node_type1(0, c1, c2);
1404 if (tmp == 0)
1405 continue;
1406 else
1407 newnode = tmp;
1408 }
1409 else
1410 {
1411 newnode = new char_node(get_byte(c1, 0));
1412 }
1413 for (std::size_t j = 1; j < sizeof(c1); ++j)
1414 {
1415 newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
1416 c1, c2, newnode);
1417 if (newnode == 0)
1418 goto end_outer_for;
1419 }
1420
1421 // or together the various parts
1422 if (savednode)
1423 {
1424 node* top_node = new or_node(savednode, newnode);
1425 savednode = top_node;
1426 }
1427 else
1428 {
1429 savednode = newnode;
1430 }
1431end_outer_for:
1432 continue;
1433 }
1434
1435 for (std::size_t k = 0; k < ndb; ++k)
1436 {
1437 newnode = new char_node(get_byte(c2, 0));
1438 for (std::size_t j = 1; j < sizeof(c2); ++j)
1439 {
1440 node* cnode;
1441 if (k == differing_byte && j == sizem1 && k != sizem1)
1442 cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
1443
1444 else if (k != differing_byte && k != sizem1 &&
1445 j == (sizem1 - k + differing_byte))
1446 cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
1447
1448 else if (k + j - differing_byte > sizem1)
1449 cnode = new ccl_node(0, 0xFF);
1450
1451 else //if (is plain)
1452 cnode = new char_node(get_byte(c2, (unsigned int)j));
1453
1454
1455 node* top_node = new cat_node(newnode, cnode);
1456 newnode = top_node;
1457 }
1458
1459 // or together the various parts
1460 if (savednode)
1461 {
1462 node* top_node = new or_node(savednode, newnode);
1463 savednode = top_node;
1464 }
1465 else
1466 {
1467 savednode = newnode;
1468 }
1469 }
1470
1471
1472 if (first_time)
1473 {
1474 stack.push(savednode);
1475 }
1476 else
1477 {
1478 node* top = stack.top();
1479 stack.pop();
1480
1481 node* newtop = new or_node(top, savednode);
1482 stack.push(newtop);
1483 }
1484 }
1485} // namespace ccl_utils
1486
1487template <typename ScannerT>
1488class make_char
1489{
1490 typedef typename ScannerT::iterator_t iterator_type;
1491
1492public:
1493 typedef
1494 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1495 char_t;
1496
1497 make_char(std::stack<node*>& the_stack)
1498 : m_stack(the_stack)
1499 {}
1500
1501 void operator()(iterator_type const& first, iterator_type const& last) const
1502 {
1503 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1504 escape_char_parser<lex_escapes, char_t>();
1505 char_t the_char;
1506 iterator_type first_ = first;
1507 ScannerT scan(first_, last);
1508 lex_escape_ch[assign(the_char)].parse(scan);
1509 node* newnode = ccl_utils::create_mb_node_seq(the_char);
1510 m_stack.push(newnode);
1511 }
1512
1513 std::stack<node*>& m_stack;
1514};
1515
1516
1517template <typename ScannerT>
1518class make_ccl
1519{
1520 typedef typename ScannerT::iterator_t iterator_type;
1521
1522public:
1523 typedef
1524 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1525 char_t;
1526
1527 make_ccl(std::stack<node*>& the_stack)
1528 : m_stack(the_stack)
1529 {}
1530
1531 static bool is_equal_to_string(iterator_type first,
1532 iterator_type const & last, const char* str)
1533 {
1534 while (first != last &&*str &&*first ==*str)
1535 {
1536 ++first;
1537 ++str;
1538 }
1539 return*str == 0;
1540 }
1541
1542 template <typename ParserT>
1543 static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
1544 {
1545 for (int i = 0; i < 256; ++i)
1546 {
1547 if (parser.test(static_cast<char_t>(uchar(i))))
1548 rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
1549 }
1550 }
1551
1552 void operator()(iterator_type const& first_, iterator_type const& last) const
1553 {
1554 BOOST_ASSERT(*first_ == '[');
1555
1556 iterator_type first = first_;
1557 ++first; // skip over '['
1558 bool negated_ccl = false;
1559 if (*first == '^')
1560 {
1561 negated_ccl = true;
1562 ++first;
1563 }
1564
1565 utility::impl::range_run<char_t> rr;
1566 while (first != last &&*first != ']')
1567 {
1568 if (*first == '[') // it's a ccl_expr like [:space:]
1569 {
1570 // check for [:space:], etc.
1571 if (is_equal_to_string(first, last, "[:alnum:]"))
1572 {
1573 fill_ccl(rr, alnum_p);
1574 }
1575 else if (is_equal_to_string(first, last, "[:alpha:]"))
1576 {
1577 fill_ccl(rr, alpha_p);
1578 }
1579 else if (is_equal_to_string(first, last, "[:blank:]"))
1580 {
1581 fill_ccl(rr, blank_p);
1582 }
1583 else if (is_equal_to_string(first, last, "[:cntrl:]"))
1584 {
1585 fill_ccl(rr, cntrl_p);
1586 }
1587 else if (is_equal_to_string(first, last, "[:digit:]"))
1588 {
1589 fill_ccl(rr, digit_p);
1590 }
1591 else if (is_equal_to_string(first, last, "[:graph:]"))
1592 {
1593 fill_ccl(rr, graph_p);
1594 }
1595 else if (is_equal_to_string(first, last, "[:lower:]"))
1596 {
1597 fill_ccl(rr, lower_p);
1598 }
1599 else if (is_equal_to_string(first, last, "[:print:]"))
1600 {
1601 fill_ccl(rr, print_p);
1602 }
1603 else if (is_equal_to_string(first, last, "[:punct:]"))
1604 {
1605 fill_ccl(rr, punct_p);
1606 }
1607 else if (is_equal_to_string(first, last, "[:space:]"))
1608 {
1609 fill_ccl(rr, space_p);
1610 }
1611 else if (is_equal_to_string(first, last, "[:upper:]"))
1612 {
1613 fill_ccl(rr, upper_p);
1614 }
1615 else if (is_equal_to_string(first, last, "[:xdigit:]"))
1616 {
1617 fill_ccl(rr, xdigit_p);
1618 }
1619 // this can't happen, because it's parsed before we get here.
1620 //else
1621 // throw bad_regex();
1622
1623 // Advance past the character class expression
1624 while (first != last &&*first != ']')
1625 ++first;
1626 BOOST_ASSERT(*first == ']');
1627 ++first;
1628 }
1629 else {
1630 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1631 escape_char_parser<lex_escapes, char_t>();
1632
1633 char_t c1;
1634 ScannerT scan(first, last);
1635 lex_escape_ch[assign(c1)].parse(scan);
1636 if (*scan.first == '-') // insert a range
1637 {
1638 ++scan.first;
1639 char_t c2;
1640 lex_escape_ch[assign(c2)].parse(scan);
1641 BOOST_ASSERT(c1 < c2); // Throw exception?
1642 rr.set(utility::impl::range<char_t>(c1, c2));
1643 }
1644 else // insert 1 char
1645 {
1646 rr.set(utility::impl::range<char_t>(c1, c1));
1647 }
1648 }
1649 }
1650
1651 if (negated_ccl)
1652 {
1653 rr = ccl_utils::negate_range_run(rr);
1654 }
1655
1656 ccl_utils::create_nodes(rr, m_stack);
1657 }
1658
1659 std::stack<node*>& m_stack;
1660};
1661
1662template <typename ScannerT>
1663class make_any_char
1664{
1665 typedef typename ScannerT::iterator_t iterator_type;
1666
1667public:
1668 typedef
1669 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1670 char_t;
1671
1672 std::stack<node*>& m_stack;
1673
1674 make_any_char(std::stack<node*>& the_stack)
1675 : m_stack(the_stack)
1676 {}
1677
1678 void operator()(const char_t c) const
1679 {
1680 BOOST_ASSERT(c == '.');
1681 do_any_char();
1682 }
1683
1684 void do_any_char() const
1685 {
1686 static utility::impl::range_run<char_t> rr;
1687 rr.set(full_range<char_t>());
1688 char_t newline = '\n';
1689 rr.clear(utility::impl::range<char_t>(newline, newline));
1690
1691 ccl_utils::create_nodes(rr, m_stack);
1692 }
1693};
1694
1695template <typename ScannerT>
1696class make_string
1697{
1698 typedef typename ScannerT::iterator_t iterator_type;
1699
1700public:
1701 typedef
1702 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1703 char_t;
1704
1705 std::stack<node*>& m_stack;
1706
1707 make_string(std::stack<node*>& the_stack)
1708 : m_stack(the_stack)
1709 {}
1710
1711 void operator()(iterator_type const& first, iterator_type const& last) const
1712 {
1713 BOOST_ASSERT(*first == '"');
1714
1715 iterator_type first_ = first;
1716 ScannerT scan(first_, last);
1717 ++scan.first; // skip over '"'
1718
1719 // empty string not allowed
1720 if (*scan.first == '"')
1721 {
1722 boost::throw_exception(bad_regex());
1723 }
1724
1725 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1726 escape_char_parser<lex_escapes, char_t>();
1727
1728 char_t c;
1729 lex_escape_ch[assign(c)].parse(scan);
1730 node* top_node = ccl_utils::create_mb_node_seq(c);
1731
1732 while (*scan.first != '"' && scan.first != scan.last)
1733 {
1734 lex_escape_ch[assign(c)].parse(scan);
1735 node* cur_node = ccl_utils::create_mb_node_seq(c);
1736 top_node = new cat_node(top_node, cur_node);
1737 }
1738 m_stack.push(top_node);
1739 }
1740};
1741
1742inline
1743node* repeat_node(node* n, int num)
1744{
1745 node* list_of_nodes = n;
1746 for (int i = 1; i < num; ++i)
1747 {
1748 list_of_nodes = new cat_node(list_of_nodes, n->clone());
1749 }
1750 return list_of_nodes;
1751}
1752
1753inline
1754node* optional_node(node* n)
1755{
1756 return new or_node(n, new epsilon_node());
1757}
1758
1759template <typename ScannerT>
1760class make_rep1
1761{
1762 typedef typename ScannerT::iterator_t iterator_type;
1763
1764public:
1765 typedef
1766 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1767 char_t;
1768
1769 std::stack<node*>& m_stack;
1770
1771 make_rep1(std::stack<node*>& the_stack)
1772 : m_stack(the_stack)
1773 {}
1774
1775 void operator()(iterator_type const& first, iterator_type const& last) const
1776 {
1777 BOOST_ASSERT(*first == '{');
1778
1779 iterator_type first_ = first;
1780 ScannerT scan(first_, last);
1781 ++scan.first; // skip over '{'
1782
1783 unsigned int count;
1784 uint_p[assign(count)].parse(scan);
1785 if (count == 0)
1786 boost::throw_exception(bad_regex());
1787
1788 node* top_node = m_stack.top();
1789 m_stack.pop();
1790 top_node = repeat_node(top_node, count);
1791 m_stack.push(top_node);
1792 }
1793};
1794
1795template <typename ScannerT>
1796class make_rep2
1797{
1798 typedef typename ScannerT::iterator_t iterator_type;
1799
1800public:
1801 typedef
1802 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1803 char_t;
1804
1805 std::stack<node*>& m_stack;
1806
1807 make_rep2(std::stack<node*>& the_stack)
1808 : m_stack(the_stack)
1809 {}
1810
1811 void operator()(iterator_type const& first, iterator_type const& last) const
1812 {
1813 BOOST_ASSERT(*first == '{');
1814
1815 iterator_type first_ = first;
1816 ScannerT scan (first_, last);
1817 ++scan.first; // skip over '{'
1818
1819 unsigned int count;
1820 uint_p[assign(count)].parse(scan);
1821 if (count == 0)
1822 boost::throw_exception(bad_regex());
1823
1824 node* top_node = m_stack.top();
1825 m_stack.pop();
1826 top_node = new cat_node(repeat_node(top_node, count),
1827 new star_node(top_node->clone()));
1828 m_stack.push(top_node);
1829
1830 }
1831};
1832
1833template <typename ScannerT>
1834class make_rep3
1835{
1836 typedef typename ScannerT::iterator_t iterator_type;
1837
1838public:
1839 typedef
1840 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1841 char_t;
1842
1843 std::stack<node*>& m_stack;
1844
1845 make_rep3(std::stack<node*>& the_stack)
1846 : m_stack(the_stack)
1847 {}
1848
1849 void operator()(iterator_type const& first, iterator_type const& last) const
1850 {
1851 BOOST_ASSERT(*first == '{');
1852
1853 iterator_type first_ = first;
1854 ScannerT scan(first_, last);
1855 ++scan.first; // skip over '{'
1856
1857 unsigned int count1, count2;
1858 uint_p[assign(count1)].parse(scan);
1859 if (count1 == 0)
1860 boost::throw_exception(bad_regex());
1861
1862 ++scan.first; // skip over ','
1863
1864 uint_p[assign(count2)].parse(scan);
1865 if (count2 <= count1)
1866 boost::throw_exception(bad_regex());
1867
1868 node* top_node = m_stack.top();
1869 m_stack.pop();
1870 node* repeats = repeat_node(top_node, count1);
1871 top_node = new cat_node(repeats,
1872 repeat_node(optional_node(top_node->clone()),
1873 count2 - count1));
1874
1875 m_stack.push(top_node);
1876 }
1877};
1878
1879///////////////////////////////////////////////////////////////////////////////
1880//
1881// Lexer grammar
1882//
1883// Defines the grammar, which mandates the syntax of the understood
1884// lexeme definitions passed to lexer::register_regex.
1885//
1886///////////////////////////////////////////////////////////////////////////////
1887class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
1888{
1889public:
1890 lexer_grammar(std::stack<node*> &node_stack_)
1891 : node_stack(node_stack_) {}
1892
1893 template <typename ScannerT>
1894 struct definition
1895 {
1896 typedef rule<ScannerT> rule_t;
1897 typedef typename ScannerT::iterator_t iterator_type;
1898 typedef
1899 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1900 char_t;
1901
1902 rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
1903 escseq, ccl_char;
1904 symbols<> ccl_expr;
1905
1906 definition(lexer_grammar const &self)
1907 {
1908 regex =
1909 re >> !('/' >> re) >> !ch_p('$')
1910 ;
1911
1912 re =
1913 series
1914 >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
1915 ;
1916
1917 series =
1918 singleton
1919 >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
1920 ;
1921
1922 singleton =
1923 ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
1924 >> singleton2
1925 | fullccl
1926 >> singleton2
1927 | ('"' >> string >> '"')
1928 [
1929 make_string<ScannerT>(self.node_stack)
1930 ]
1931 >> singleton2
1932 | '(' >> re >> ')'
1933 >> singleton2
1934 | ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
1935 [
1936 make_char<ScannerT>(self.node_stack)
1937 ]
1938 >> singleton2
1939 ;
1940
1941 singleton2 =
1942 ch_p('*')[make_star<ScannerT>(self.node_stack)]
1943 >> singleton2
1944 | ch_p('+')[make_plus<ScannerT>(self.node_stack)]
1945 >> singleton2
1946 | ch_p('?')[make_optional<ScannerT>(self.node_stack)]
1947 >> singleton2
1948 | ('{' >> uint_p >> '}')
1949 [
1950 make_rep1<ScannerT>(self.node_stack)
1951 ]
1952 >> singleton2
1953 | ('{' >> uint_p >> ',' >> '}')
1954 [
1955 make_rep2<ScannerT>(self.node_stack)
1956 ]
1957 >> singleton2
1958 | ('{' >> uint_p >> ',' >> uint_p >> '}')
1959 [
1960 make_rep3<ScannerT>(self.node_stack)
1961 ]
1962 >> singleton2
1963 | epsilon_p
1964 ;
1965
1966 fullccl =
1967 ('[' >> !ch_p('^') >> ccl >> ']')
1968 [
1969 make_ccl<ScannerT>(self.node_stack)
1970 ]
1971 ;
1972
1973 ccl =
1974 *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
1975 ;
1976
1977 ccl_char =
1978 ( (anychar_p - chset<>("\\\n]")) | escseq )
1979 ;
1980
1981 ccl_expr =
1982 "[:alnum:]",
1983 "[:alpha:]",
1984 "[:blank:]",
1985 "[:cntrl:]",
1986 "[:digit:]",
1987 "[:graph:]",
1988 "[:lower:]",
1989 "[:print:]",
1990 "[:punct:]",
1991 "[:space:]",
1992 "[:upper:]",
1993 "[:xdigit:]"
1994 ;
1995
1996 string =
1997 +( (anychar_p - chset<>("\"\\")) | escseq )
1998 ;
1999
2000 typedef
2001 uint_parser<char_t, 8, 1,
2002 std::numeric_limits<char_t>::digits / 3 + 1
2003 > oct_parser_t;
2004 typedef
2005 uint_parser<char_t, 16, 1,
2006 std::numeric_limits<char_t>::digits / 4 + 1
2007 > hex_parser_t;
2008
2009 escseq =
2010 ch_p('\\')
2011 >> (
2012 oct_parser_t()
2013 | as_lower_d['x'] >> hex_parser_t()
2014 | (anychar_p - chset<>('\n'))
2015 )
2016 ;
2017
2018#define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2019
2020 BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
2021 BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
2022 BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
2023 BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
2024 BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
2025 BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
2026 BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
2027 BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
2028 BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
2029 BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
2030
2031#undef BOOST_SLEX_DEBUG
2032 }
2033
2034 rule<ScannerT> const&
2035 start() const { return regex; }
2036 };
2037
2038 std::stack<node*> &node_stack;
2039
2040}; // class lexer_grammar
2041
2042template <typename StringT>
2043inline node *
2044parse(lexer_grammar& g, StringT const& str)
2045{
2046 typedef
2047 scanner<typename StringT::const_iterator, scanner_policies<> >
2048 scanner_t;
2049 typedef typename rule<scanner_t>::template result<scanner_t>::type
2050 result_t;
2051
2052 typename StringT::const_iterator first = str.begin();
2053 typename StringT::const_iterator last = str.end();
2054
2055 scanner_t scan(first, last);
2056// typename rule<scanner_t>::result_t hit = g.parse(scan);
2057 result_t hit = g.parse(scan);
2058 if (!hit || !scan.at_end())
2059 {
2060 while (g.node_stack.size())
2061 {
2062 delete g.node_stack.top();
2063 g.node_stack.pop();
2064 }
2065 return 0;
2066 }
2067
2068 BOOST_ASSERT(g.node_stack.size() == 1);
2069 node* rval = g.node_stack.top();
2070 g.node_stack.pop();
2071 node* an_eof_node = new eof_node();
2072 rval = new cat_node(rval, an_eof_node);
2073 return rval;
2074}
2075
2076inline
2077void make_case_insensitive(state_match_t& state_match)
2078{
2079 // TODO: Fix this.
2080 // This doesn't take into account foreign languages, figure out how to
2081 // do that. Also this approach is broken for this implementation of
2082 // wide chars.
2083 for (state_match_t::iterator iter = state_match.begin();
2084 iter != state_match.end(); ++iter)
2085 {
2086 int i, j;
2087 for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
2088 {
2089 // if either is set, turn them both on
2090 (*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
2091 }
2092 }
2093}
2094
2095template<bool wide_char>
2096struct regex_match_helper;
2097
2098template<>
2099struct regex_match_helper<false> // single byte char
2100{
2101 template <typename DfaT, typename IteratorT>
2102 static bool
2103 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2104 int& regex_index,
2105 std::basic_string<
2106 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2107 > *token)
2108 {
2109 typedef std::basic_string<
2110 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2111 > string_type;
2112 typedef typename string_type::size_type size_type;
2113
2114 node_id_t s = 0;
2115 node_id_t last_accepting_index = invalid_node;
2116 IteratorT p = first;
2117 IteratorT last_accepting_cpos = first;
2118 while (p != last)
2119 {
2120 s = dfa.transition_table[s][(uchar)*p];
2121 if (s == invalid_node)
2122 break;
2123 if (token) token->append((size_type)1, *p);
2124 ++p;
2125 if (dfa.acceptance_index[s] != invalid_node)
2126 {
2127 last_accepting_index = s;
2128 last_accepting_cpos = p;
2129 }
2130 }
2131 if (last_accepting_index != invalid_node)
2132 {
2133#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2134 std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2135 dfa.acceptance_index[last_accepting_index] << '\n';
2136#endif
2137
2138 first = last_accepting_cpos;
2139 regex_index = dfa.acceptance_index[last_accepting_index];
2140 return true;
2141 }
2142 else
2143 return false;
2144 }
2145};
2146
2147template<>
2148struct regex_match_helper<true> // wide char
2149{
2150 template <typename DfaT, typename IteratorT>
2151 static bool
2152 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2153 int& regex_index,
2154 std::basic_string<
2155 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2156 > *token)
2157 {
2158 typedef
2159 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2160 char_t;
2161 typedef std::basic_string<char_t> string_type;
2162 typedef typename string_type::size_type size_type;
2163
2164 node_id_t s = 0;
2165 node_id_t last_accepting_index = invalid_node;
2166 IteratorT wp = first;
2167 IteratorT last_accepting_cpos = first;
2168
2169 while (wp != last)
2170 {
2171 for (unsigned int i = 0; i < sizeof(char_t); ++i)
2172 {
2173 s = dfa.transition_table[s][get_byte(*wp,i)];
2174 if (s == invalid_node)
2175 {
2176 goto break_while;
2177 }
2178 }
2179 if (token) token->append((size_type)1, *wp);
2180 ++wp;
2181 if (dfa.acceptance_index[s] != invalid_node)
2182 {
2183 last_accepting_index = s;
2184 last_accepting_cpos = wp;
2185 }
2186
2187 }
2188
2189 break_while:
2190 if (last_accepting_index != invalid_node)
2191 {
2192#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2193 std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2194 dfa.acceptance_index[last_accepting_index] << '\n';
2195#endif
2196 first = last_accepting_cpos;
2197 regex_index = dfa.acceptance_index[last_accepting_index];
2198
2199 return true;
2200 }
2201 else
2202 return false;
2203 }
2204};
2205
2206template <typename DfaT, typename IteratorT, bool wide_char>
2207struct regex_match
2208{
2209 static bool
2210 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2211 int& regex_index,
2212 std::basic_string<
2213 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2214 > *token)
2215 {
2216 return regex_match_helper<wide_char>::do_match(
2217 dfa, first, last, regex_index, token);
2218 }
2219};
2220
2221} // namespace lexerimpl
2222
2223///////////////////////////////////////////////////////////////////////////////
2224//
2225template <typename IteratorT = char const*, typename TokenT = int,
2226 typename CallbackT = void(*)(IteratorT const &,
2227 IteratorT &,
2228 IteratorT const&,
2229 TokenT const&,
2230 lexer_control<TokenT> &)>
2231class lexer
2232{
2233public:
2234 typedef CallbackT callback_t;
2235 typedef
2236 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2237 char_t;
2238
2239 struct regex_info
2240 {
2241 std::basic_string<char_t> str;
2242 TokenT token;
2243 CallbackT callback;
2244
2245 regex_info(const std::basic_string<char_t>& _str,
2246 const TokenT& _token,
2247 const CallbackT& _callback)
2248 : str(_str)
2249 , token(_token)
2250 , callback(_callback)
2251 {}
2252
2253 };
2254
2255 struct dfa_table
2256 {
2257 std::vector<std::vector<node_id_t> > transition_table;
2258 std::vector<node_id_t> acceptance_index;
2259 };
2260 typedef std::vector<node_id_t> node_table_t;
2261 typedef std::vector<node_table_t> transition_table_t;
2262 typedef std::vector<dfa_table> dfa_t;
2263
2264
2265 lexer(unsigned int states = 1);
2266
2267 void register_regex(const std::basic_string<char_t>& regex,
2268 const TokenT& id, const CallbackT& cb = CallbackT(),
2269 unsigned int state = 0);
2270
2271 TokenT next_token(IteratorT &first, IteratorT const &last,
2272 std::basic_string<char_t> *token = 0);
2273
2274 void create_dfa();
2275 bool has_compiled_dfa() { return m_compiled_dfa; }
2276
2277 void set_case_insensitive(bool insensitive);
2278
2279#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2280 void dump(std::ostream& out);
2281#endif
2282 typedef std::vector<std::vector<regex_info> > regex_list_t;
2283
2284 bool load (std::ifstream &in, long unique_id = 0);
2285 bool save (std::ofstream &out, long unique_id = 0);
2286 enum {
2287 SLEX_SIGNATURE = 0x58454C53, // "SLEX"
2288 SLEX_VERSION_100 = 0x0100, // persistance version
2289 SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
2290 SLEX_MINOR_VERSION_MASK = 0xFF
2291 };
2292
2293private:
2294
2295 void create_dfa_for_state(int state);
2296
2297 static bool regex_match(const dfa_t& dfa, IteratorT& first,
2298 IteratorT& last, int& regex_index);
2299
2300 mutable std::stack<lexerimpl::node*> node_stack;
2301 lexerimpl::lexer_grammar g;
2302
2303 mutable bool m_compiled_dfa;
2304 mutable dfa_t m_dfa;
2305
2306 regex_list_t m_regex_list;
2307 bool m_case_insensitive;
2308
2309 unsigned int m_state;
2310 std::stack<unsigned int> m_state_stack;
2311 unsigned int m_num_states;
2312};
2313
2314
2315template <typename IteratorT, typename TokenT, typename CallbackT>
2316inline
2317lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
2318 : g(node_stack)
2319 , m_compiled_dfa(false)
2320 , m_regex_list(states)
2321 , m_case_insensitive(false)
2322 , m_state(0)
2323 , m_state_stack()
2324 , m_num_states(states)
2325{
2326 BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
2327 BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
2328}
2329
2330template <typename IteratorT, typename TokenT, typename CallbackT>
2331inline void
2332lexer<IteratorT, TokenT, CallbackT>::register_regex(
2333 const std::basic_string<char_t>& regex, const TokenT& id,
2334 const CallbackT& callback, unsigned int state)
2335{
2336 if (state > m_num_states) {
2337 m_regex_list.resize(state);
2338 m_num_states = state;
2339 }
2340 m_regex_list[state].push_back(regex_info(regex, id, callback));
2341}
2342
2343template <typename IteratorT, typename TokenT, typename CallbackT>
2344inline TokenT
2345lexer<IteratorT, TokenT, CallbackT>::next_token(
2346 IteratorT &first, IteratorT const& last,
2347 std::basic_string<
2348 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2349 > *token)
2350{
2351 if (!m_compiled_dfa)
2352 {
2353 create_dfa();
2354 }
2355
2356 IteratorT saved = first;
2357 int regex_index;
2358 if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
2359 do_match(m_dfa[m_state], first, last, regex_index, token))
2360 return -1; // TODO: can't return -1, need to return some invalid token.
2361 // how to figure this out? We can use traits I guess.
2362 else
2363 {
2364 regex_info regex = m_regex_list[m_state][regex_index];
2365 TokenT rval = regex.token;
2366 if (regex.callback)
2367 {
2368 // execute corresponding callback
2369 lexer_control<TokenT> controller(rval, m_state, m_state_stack);
2370 regex.callback(saved, first, last, regex.token, controller);
2371 if (controller.ignore_current_token_set()) {
2372 if (token)
2373 token->erase();
2374 return next_token(first, last, token);
2375 }
2376 }
2377 return rval;
2378 }
2379}
2380
2381namespace lexerimpl
2382{
2383
2384inline
2385bool find_acceptance_state(const node_set& eof_node_ids,
2386 const node_set& current_set,
2387 node_id_t& acceptance_node_id)
2388{
2389 for(node_set::const_iterator nsi = eof_node_ids.begin();
2390 nsi != eof_node_ids.end(); ++nsi)
2391 {
2392 node_id_t eof_node_id =*nsi;
2393 if (current_set.end() != current_set.find(eof_node_id))
2394 {
2395 // store the first one we come to as the
2396 // matched pattern
2397 acceptance_node_id = eof_node_id;
2398 // don't bother searching for more
2399 return true;
2400 }
2401 }
2402 return false;
2403}
2404
2405template <typename RegexListT, typename GrammarT>
2406inline std::auto_ptr<node>
2407parse_regexes(const RegexListT& regex_list, GrammarT& g)
2408{
2409 // parse the expressions into a tree
2410 if (regex_list.begin() == regex_list.end())
2411 boost::throw_exception(bad_regex());
2412
2413 typename RegexListT::const_iterator ri = regex_list.begin();
2414 std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
2415 if (tree.get() == 0)
2416 boost::throw_exception(bad_regex());
2417
2418 ++ri;
2419 for (/**/; ri != regex_list.end(); ++ri)
2420 {
2421 std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
2422 if (next_tree.get() == 0)
2423 boost::throw_exception(bad_regex());
2424 std::auto_ptr<node> newnode(new or_node(tree.release(), next_tree.release()));
2425 tree = newnode;
2426 }
2427 return tree;
2428}
2429
2430} //namespace lexerimpl
2431
2432template <typename IteratorT, typename TokenT, typename CallbackT>
2433inline void
2434lexer<IteratorT, TokenT, CallbackT>::create_dfa()
2435{
2436 m_dfa.resize(m_num_states);
2437 for (unsigned int i = 0; i < m_num_states; ++i)
2438 create_dfa_for_state(i);
2439}
2440
2441// Algorithm from Compilers: Principles, Techniques, and Tools p. 141
2442template <typename IteratorT, typename TokenT, typename CallbackT>
2443inline void
2444lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
2445{
2446 using lexerimpl::node;
2447 std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
2448 node_id_t dummy = 0;
2449 tree->assign_node_ids(dummy);
2450
2451#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2452 tree->dump(std::cout);
2453#endif
2454
2455 // compute followpos(root)
2456 followpos_t followpos;
2457 tree->compute_followpos(followpos);
2458
2459 // the dfa states <-> nfa state groups
2460 std::map<node_set, node_id_t> dstates1;
2461 std::map<node_id_t, node_set> dstates2;
2462
2463 // the dfa transitions
2464 m_dfa[state].transition_table.push_back(
2465 std::vector<node_id_t>(256, invalid_node));
2466 m_dfa[state].acceptance_index.push_back(invalid_node);
2467
2468 // whether the dfa state has been processed yet
2469 std::vector<node_id_t> marked;
2470
2471 // used to give a unique id to each dfa state
2472 node_id_t num_states = 0;
2473
2474 // initially, the only unmarked state in Dstates is firstpos(root).
2475 marked.push_back(0);
2476 node_set fpr = tree->firstpos();
2477 dstates1[fpr] = 0;
2478 dstates2[0] = fpr;
2479 state_match_t state_match;
2480 tree->compute_state_match(state_match);
2481
2482 if (m_case_insensitive)
2483 lexerimpl::make_case_insensitive(state_match);
2484
2485 node_set eof_node_ids;
2486 tree->get_eof_ids(eof_node_ids);
2487 // translate the eof_node_ids into a 0-based index
2488 std::map<node_id_t, node_id_t> eof_node_id_map;
2489 unsigned int x = 0;
2490 for (node_set::iterator node_id_it = eof_node_ids.begin();
2491 node_id_it != eof_node_ids.end();
2492 ++node_id_it)
2493 {
2494 eof_node_id_map[*node_id_it] = x++;
2495 }
2496
2497 // figure out if this is an acceptance state
2498 node_id_t eof_node_id;
2499 if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
2500 {
2501 m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
2502 }
2503
2504 std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
2505 node_id_t(0));
2506 while (marked.end() != i)
2507 {
2508 *i = 1;
2509 node_id_t T = node_id_t(std::distance(marked.begin(), i));
2510 BOOST_ASSERT(T < dstates2.size());
2511 node_set Tstates = dstates2[T];
2512 for (node_id_t j = 0; j < 256; ++j)
2513 {
2514 node_set U;
2515 for (node_set::iterator k = Tstates.begin();
2516 k != Tstates.end(); ++k)
2517 {
2518 node_id_t p =*k;
2519 BOOST_ASSERT(p < state_match.size());
2520 BOOST_ASSERT(j < state_match[p].size());
2521 if (state_match[p][j])
2522 {
2523 node_set fpp = followpos[p];
2524 U.insert(fpp.begin(), fpp.end());
2525 }
2526 }
2527 if (U.size() > 0)
2528 {
2529 std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
2530 node_id_t target_state;
2531 if (l == dstates1.end()) // not in the states yet
2532 {
2533 ++num_states;
2534 dstates1[U] = target_state = num_states;
2535 dstates2[target_state] = U;
2536 marked.push_back(0);
2537 m_dfa[state].transition_table.push_back(
2538 std::vector<node_id_t>(256, invalid_node));
2539 m_dfa[state].acceptance_index.push_back(invalid_node);
2540 // figure out if this is an acceptance state
2541 node_id_t eof_node_id;
2542 if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
2543 {
2544 m_dfa[state].acceptance_index[target_state] =
2545 eof_node_id_map[eof_node_id];
2546 }
2547 }
2548 else
2549 {
2550 target_state = dstates1[U];
2551 }
2552
2553 BOOST_ASSERT(T < m_dfa[state].transition_table.size());
2554 BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
2555 m_dfa[state].transition_table[T][j] = target_state;
2556 }
2557
2558 }
2559
2560 i = std::find(marked.begin(), marked.end(), node_id_t(0));
2561 }
2562 m_compiled_dfa = true;
2563}
2564
2565template <typename IteratorT, typename TokenT, typename CallbackT>
2566inline void
2567lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
2568{
2569 m_case_insensitive = insensitive;
2570}
2571
2572
2573#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2574template <typename IteratorT, typename TokenT, typename CallbackT>
2575inline void
2576lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
2577{
2578 for (unsigned x = 0; x < m_dfa.size(); ++x)
2579 {
2580 out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
2581 for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
2582 {
2583 out << "state " << i << ":";
2584 for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
2585 {
2586 if (m_dfa[x].transition_table[i][j] != invalid_node)
2587 out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
2588 }
2589 out << "\n";
2590 }
2591 out << "acceptance states: ";
2592 for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
2593 {
2594 if (m_dfa[x].acceptance_index[k] != invalid_node)
2595 out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
2596 }
2597 out << endl;
2598 }
2599}
2600#endif
2601
2602///////////////////////////////////////////////////////////////////////////////
2603// load the lexer tables
2604#define slex_in(strm, val) \
2605 strm.read((char*)&val, sizeof(val)); \
2606 if (std::ios::goodbit != strm.rdstate()) return false
2607
2608template <typename IteratorT, typename TokenT, typename CallbackT>
2609inline bool
2610lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
2611{
2612// ensure correct signature and version
2613long signature = 0;
2614
2615 slex_in (in, signature);
2616 if (signature != SLEX_SIGNATURE)
2617 return false; // not for us
2618
2619long version = 0;
2620
2621 slex_in (in, version);
2622 if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
2623 return false; // to new for us
2624
2625long uid = 0;
2626
2627 slex_in (in, uid);
2628 if (uid != unique_id)
2629 return false; // not saved by us
2630
2631// load auxiliary members
2632int num_states = 0;
2633
2634 slex_in (in, num_states);
2635
2636// load the dfa tables
2637dfa_t in_dfa;
2638std::size_t dfa_size = 0;
2639
2640 slex_in (in, dfa_size);
2641 in_dfa.resize(dfa_size);
2642 for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
2643 {
2644 // load the transition tables
2645 std::size_t tt_size = 0;
2646 transition_table_t &tt_table = in_dfa[dfa].transition_table;
2647
2648 slex_in (in, tt_size);
2649 tt_table.resize(tt_size);
2650 for (std::size_t tt = 0; tt < tt_size; ++tt)
2651 {
2652 std::size_t nt_size = 0;
2653 node_table_t &nt_table = tt_table[tt];
2654
2655 slex_in (in, nt_size);
2656 nt_table.resize(nt_size);
2657 for (std::size_t nt = 0; nt < nt_size; ++nt)
2658 {
2659 slex_in (in, nt_table[nt]);
2660 }
2661 }
2662
2663 // load the acceptance index table
2664 std::size_t ai_size = 0;
2665 node_table_t &ai_table = in_dfa[dfa].acceptance_index;
2666
2667 slex_in (in, ai_size);
2668 ai_table.resize(ai_size);
2669 for (std::size_t ai = 0; ai < ai_size; ++ai)
2670 {
2671 slex_in (in, ai_table[ai]);
2672 }
2673 }
2674
2675 m_dfa.swap(in_dfa); // success, swap in the read values
2676 m_num_states = num_states;
2677
2678 m_compiled_dfa = true;
2679 return true;
2680}
2681
2682#undef slex_in
2683
2684///////////////////////////////////////////////////////////////////////////////
2685// save the lexer tables
2686#define slex_out(strm, val) \
2687 strm.write((char*)&val, sizeof(val)); \
2688 if (std::ios::goodbit != strm.rdstate()) return false
2689
2690template <typename IteratorT, typename TokenT, typename CallbackT>
2691inline bool
2692lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
2693{
2694// save signature and version information
2695long out_long = SLEX_SIGNATURE;
2696
2697 slex_out(out, out_long);
2698 out_long = SLEX_VERSION_100;
2699 slex_out(out, out_long);
2700 slex_out(out, unique_id);
2701
2702// save auxiliary members
2703 slex_out(out, m_num_states);
2704
2705// save the dfa tables
2706 typedef typename dfa_t::const_iterator dfa_iter_t;
2707 typedef transition_table_t::const_iterator transition_table_iter_t;
2708 typedef node_table_t::const_iterator node_table_iter_t;
2709
2710 std::size_t out_size_t = m_dfa.size();
2711 slex_out(out, out_size_t);
2712
2713 dfa_iter_t end = m_dfa.end();
2714 for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
2715 {
2716 // save the transition table
2717 out_size_t = (*it).transition_table.size();
2718 slex_out(out, out_size_t);
2719
2720 transition_table_iter_t tt_end = (*it).transition_table.end();
2721 for (transition_table_iter_t tt_it = (*it).transition_table.begin();
2722 tt_it != tt_end;
2723 ++tt_it)
2724 {
2725 out_size_t = (*tt_it).size();
2726 slex_out(out, out_size_t);
2727
2728 node_table_iter_t nt_end = (*tt_it).end();
2729 for (node_table_iter_t nt_it = (*tt_it).begin();
2730 nt_it != nt_end;
2731 ++nt_it)
2732 {
2733 slex_out(out, (*nt_it));
2734 }
2735 }
2736
2737 // save the acceptance index table
2738 out_size_t = (*it).acceptance_index.size();
2739 slex_out(out, out_size_t);
2740
2741 node_table_iter_t nt_end = (*it).acceptance_index.end();
2742 for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
2743 nt_it != nt_end;
2744 ++nt_it)
2745 {
2746 slex_out(out, (*nt_it));
2747 }
2748 }
2749 return true;
2750}
2751
2752#undef slex_out
2753
2754/*
2755a lexer_control object supports some operations on the lexer.
2756 get current lexer state
2757 set state
2758 terminate
2759 state stack (push, pop, top)
2760 set new token return value
2761 ignore the current token
2762 yymore
2763 get length of matched token
2764*/
2765template <typename TokenT>
2766class lexer_control
2767{
2768public:
2769
2770 lexer_control(TokenT& token, unsigned int& current_state,
2771 std::stack<unsigned int>& state_stack);
2772 // functions dealing with the lexer state
2773
2774 // set the state to state
2775 void set_state(unsigned int state);
2776
2777 // get the current state
2778 unsigned int get_state();
2779
2780 // pushes the current state onto the top of the state stack and
2781 // switches to new_state
2782 void push_state(unsigned int new_state);
2783
2784 // pops the top of the state stack and switches to it.
2785 void pop_state();
2786
2787 // returns the top of the stack without altering the stack's contents.
2788 unsigned int top_state();
2789
2790 // functions dealing with the token returned.
2791
2792 // set a new TokenT return value, overriding that one that was
2793 // registered via. register_regex()
2794 void set_token(const TokenT& token);
2795
2796 // tell the lexer to return an invalid token, signifying termination.
2797 void terminate();
2798
2799 // ignore the current token, and move on to the next one. The current
2800 // token will NOT be returned from next_token()
2801 void ignore_current_token();
2802
2803 // returns true if ignore_current_token() has been called,
2804 // false otherwise.
2805 bool ignore_current_token_set();
2806
2807private:
2808 TokenT& m_token;
2809 bool m_ignore_current_token;
2810 unsigned int& m_current_state;
2811 std::stack<unsigned int>& m_state_stack;
2812};
2813
2814template <typename TokenT>
2815inline
2816lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
2817 std::stack<unsigned int>& state_stack)
2818 : m_token(token)
2819 , m_ignore_current_token(false)
2820 , m_current_state(current_state)
2821 , m_state_stack(state_stack)
2822{
2823}
2824
2825template <typename TokenT>
2826inline void
2827lexer_control<TokenT>::set_state(unsigned int state)
2828{
2829 m_current_state = state;
2830}
2831
2832template <typename TokenT>
2833inline unsigned int
2834lexer_control<TokenT>::get_state()
2835{
2836 return m_current_state;
2837}
2838
2839template <typename TokenT>
2840inline void
2841lexer_control<TokenT>::push_state(unsigned int new_state)
2842{
2843 m_state_stack.push(m_current_state);
2844 m_current_state = new_state;
2845}
2846
2847template <typename TokenT>
2848inline void
2849lexer_control<TokenT>::pop_state()
2850{
2851 m_current_state = m_state_stack.top();
2852 m_state_stack.pop();
2853}
2854
2855template <typename TokenT>
2856inline unsigned int
2857lexer_control<TokenT>::top_state()
2858{
2859 return m_state_stack.top();
2860}
2861
2862template <typename TokenT>
2863inline void
2864lexer_control<TokenT>::set_token(const TokenT& token)
2865{
2866 m_token = token;
2867}
2868
2869template <typename TokenT>
2870inline void
2871lexer_control<TokenT>::terminate()
2872{
2873 m_token = -1; // TOOD: fix this, need to be determined by traits
2874}
2875
2876template <typename TokenT>
2877inline void
2878lexer_control<TokenT>::ignore_current_token()
2879{
2880 m_ignore_current_token = true;
2881}
2882
2883template <typename TokenT>
2884inline bool
2885lexer_control<TokenT>::ignore_current_token_set()
2886{
2887 return m_ignore_current_token;
2888}
2889
2890} // namespace classic
2891} // namespace spirit
2892} // namespace boost
2893
2894#undef BOOST_SPIRIT_IT_NS
2895
2896#endif
2897