]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / libs / wave / samples / cpp_tokens / slex / lexer.hpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Spirit based lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001, Daniel C. Nuffer.
9 Copyright (c) 2001-2012 Hartmut Kaiser.
10 Distributed under the Boost Software License, Version 1.0. (See accompanying
11 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
12
13 TODO List:
14 X callback objects (called when a match is made.)
15 X callback passed first & last iterator, and
16 a reference to a lexercontrol object that supports some
17 operations on the lexer.
18 set state
19 terminate
20 state stack (push, pop, top)
21 set new token return value
22 ignore the current token
23 yymore
24 get length of matched token
25 get current lexer state
26 X DFA serialization to save recomputing the DFA.
27
28 lexer states.
29 organize the file into hpp and ipp. arrange the classes in a logical order.
30 use buffering - iterator only needs be an input iterator,
31 lexer & callback are not templatized on iterator type, but char type.
32 next_token is templatized on iterator type.
33 beginning/ending contexts.
34 ^ and $
35 DFA minimization.
36 DFA table compression.
37
38=============================================================================*/
39#ifndef BOOST_SPIRIT_LEXER_HPP
40#define BOOST_SPIRIT_LEXER_HPP
41
42///////////////////////////////////////////////////////////////////////////////
43#include <boost/throw_exception.hpp>
44
45#include <boost/spirit/include/classic_core.hpp>
46#include <boost/spirit/include/classic_symbols.hpp>
47#include <boost/spirit/include/classic_chset.hpp>
48#include <boost/spirit/include/classic_escape_char.hpp>
49
50#include <set>
51#include <map>
52#include <vector>
53#include <stack>
54#include <utility> // for pair
55#include <iostream>
56#include <fstream>
57#include <boost/assert.hpp>
58#include <boost/limits.hpp>
59
60#if defined(BOOST_NO_STD_ITERATOR_TRAITS)
61#define BOOST_SPIRIT_IT_NS impl
62#else
63#define BOOST_SPIRIT_IT_NS std
64#endif
65
66///////////////////////////////////////////////////////////////////////////////
67namespace boost {
68namespace spirit {
69namespace classic {
70
71typedef unsigned char uchar;
72typedef unsigned int node_id_t;
73const node_id_t invalid_node = node_id_t(-1);
74typedef std::set<node_id_t> node_set;
75typedef std::vector<uchar> uchar_vector;
76typedef std::map<node_id_t, node_set> followpos_t;
77typedef std::vector<uchar_vector> state_match_t;
78
79template <typename TokenT>
80class lexer_control;
81
82class bad_regex : public std::exception
83{
84};
85
86namespace lexerimpl
87{
88
89class node
90{
91public:
92
93 virtual ~node() {}
94
95 virtual node* clone() const = 0;
96 virtual bool nullable() const = 0;
97 virtual node_set firstpos() const = 0;
98 virtual node_set lastpos() const = 0;
99 virtual void compute_followpos(followpos_t& followpos) const = 0;
100 virtual void compute_state_match(state_match_t& state_match) const = 0;
101 virtual void get_eof_ids(node_set& eof_set) const = 0;
102 virtual void assign_node_ids(node_id_t& node_count) = 0;
103#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
104 virtual void dump(std::ostream& out) const = 0;
105#endif
106
107};
108
109class char_node : public node
110{
111
112public:
113
114 char_node(const uchar c);
115 char_node(const char_node& x);
116 virtual ~char_node(){}
117
118 virtual node* clone() const;
119 virtual bool nullable() const;
120 virtual node_set firstpos() const;
121 virtual node_set lastpos() const;
122 virtual void compute_followpos(followpos_t& followpos) const;
123 virtual void compute_state_match(state_match_t& state_match ) const;
124 virtual void get_eof_ids(node_set& eof_set) const;
125 virtual void assign_node_ids(node_id_t& node_count);
126#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
127 virtual void dump(std::ostream& out) const;
128#endif
129
130private:
131
132 uchar m_char;
133 node_id_t m_node_num;
134};
135
136inline
137char_node::char_node(const uchar c)
138 : node()
139 , m_char(c)
140 , m_node_num(0)
141{
142}
143
144inline
145char_node::char_node(const char_node& x)
146 : node(x)
147 , m_char(x.m_char)
148 , m_node_num(x.m_node_num)
149{
150}
151
152inline node *
153char_node::clone() const
154{
155 return new char_node(*this);
156}
157
158inline bool
159char_node::nullable() const
160{
161 return false;
162}
163
164inline node_set
165char_node::firstpos() const
166{
167 node_set rval;
168 rval.insert(m_node_num);
169 return rval;
170}
171
172inline node_set
173char_node::lastpos() const
174{
175 return firstpos();
176}
177
178inline void
179char_node::compute_followpos(followpos_t&) const
180{
181 return;
182}
183
184inline void
185char_node::compute_state_match(state_match_t& state_match) const
186{
187 if (state_match.size() < m_node_num + 1)
188 state_match.resize(m_node_num + 1);
189 state_match[m_node_num].resize(256);
190 state_match[m_node_num][m_char] = 1;
191}
192
193inline void
194char_node::get_eof_ids(node_set&) const
195{
196 return;
197}
198
199inline void
200char_node::assign_node_ids(node_id_t& node_count)
201{
202 m_node_num = node_count++;
203}
204
205#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
206inline void
207char_node::dump(std::ostream& out) const
208{
209 out << "\nchar_node m_char = " << m_char;
210 out << " m_node_num = " << m_node_num;
211 out << " nullable() = " << (nullable() ? "true" : "false");
212 out << " firstpos() = ";
213 node_set fp = firstpos();
214 std::copy(fp.begin(), fp.end(),
215 std::ostream_iterator<node_id_t>(out, ","));
216
217 out << " lastpos() = ";
218 node_set lp = lastpos();
219 std::copy(lp.begin(), lp.end(),
220 std::ostream_iterator<node_id_t>(out, ","));
221}
222#endif
223
224
225class epsilon_node : public node
226{
227
228public:
229
230 epsilon_node();
231 epsilon_node(const epsilon_node& x);
232 virtual ~epsilon_node(){}
233
234 virtual node* clone() const;
235 virtual bool nullable() const;
236 virtual node_set firstpos() const;
237 virtual node_set lastpos() const;
238 virtual void compute_followpos(followpos_t& followpos) const;
239 virtual void compute_state_match(state_match_t& state_match ) const;
240 virtual void get_eof_ids(node_set& eof_set) const;
241 virtual void assign_node_ids(node_id_t& node_count);
242#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
243 virtual void dump(std::ostream& out) const;
244#endif
245
246private:
247
248 node_id_t m_node_num;
249};
250
251inline
252epsilon_node::epsilon_node()
253 : node()
254 , m_node_num(0)
255{
256}
257
258inline
259epsilon_node::epsilon_node(const epsilon_node& x)
260 : node(x)
261 , m_node_num(x.m_node_num)
262{
263}
264
265inline node *
266epsilon_node::clone() const
267{
268 return new epsilon_node(*this);
269}
270
271inline bool
272epsilon_node::nullable() const
273{
274 return true;
275}
276
277inline node_set
278epsilon_node::firstpos() const
279{
280 return node_set();
281}
282
283inline node_set
284epsilon_node::lastpos() const
285{
286 return node_set();
287}
288
289inline void
290epsilon_node::compute_followpos(followpos_t&) const
291{
292 return;
293}
294
295inline void
296epsilon_node::compute_state_match(state_match_t& state_match) const
297{
298 if (state_match.size() < m_node_num + 1)
299 state_match.resize(m_node_num + 1);
300 state_match[m_node_num].resize(256, 1);
301}
302
303inline void
304epsilon_node::get_eof_ids(node_set&) const
305{
306 return;
307}
308
309inline void
310epsilon_node::assign_node_ids(node_id_t& node_count)
311{
312 m_node_num = node_count++;
313}
314
315#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
316inline void
317epsilon_node::dump(std::ostream& out) const
318{
319 out << "\nepsilon_node";
320 out << " m_node_num = " << m_node_num;
321 out << " nullable() = " << (nullable() ? "true" : "false");
322 out << " firstpos() = ";
323 node_set fp = firstpos();
324 std::copy(fp.begin(), fp.end(),
325 std::ostream_iterator<node_id_t>(out, ","));
326
327 out << " lastpos() = ";
328 node_set lp = lastpos();
329 std::copy(lp.begin(), lp.end(),
330 std::ostream_iterator<node_id_t>(out, ","));
331}
332#endif
333
334
335class or_node : public node
336{
337
338public:
339
340 or_node(node* left, node* right);
341 or_node(const or_node& x);
342 virtual ~or_node(){}
343
344 virtual node* clone() const;
345 virtual bool nullable() const;
346 virtual node_set firstpos() const;
347 virtual node_set lastpos() const;
348 virtual void compute_followpos(followpos_t& followpos) const;
349 virtual void compute_state_match(state_match_t& state_match ) const;
350 virtual void get_eof_ids(node_set& eof_set) const;
351 virtual void assign_node_ids(node_id_t& node_count);
352#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
353 virtual void dump(std::ostream& out) const;
354#endif
355
356private:
357
b32b8144
FG
358#ifdef BOOST_NO_AUTO_PTR
359 std::unique_ptr<node> m_left;
360 std::unique_ptr<node> m_right;
361#else
7c673cae
FG
362 std::auto_ptr<node> m_left;
363 std::auto_ptr<node> m_right;
b32b8144 364#endif
7c673cae
FG
365};
366
367inline
368or_node::or_node(node* left, node* right)
369 : node()
370 , m_left(left)
371 , m_right(right)
372{
373}
374
375inline
376or_node::or_node(const or_node& x)
377 : node(x)
378 , m_left(x.m_left->clone())
379 , m_right(x.m_right->clone())
380{
381}
382
383inline node *
384or_node::clone() const
385{
386 return new or_node(m_left->clone(), m_right->clone());
387}
388
389inline bool
390or_node::nullable() const
391{
392 return m_left->nullable() || m_right->nullable();
393}
394
395inline node_set
396or_node::firstpos() const
397{
398 node_set rval;
399 node_set l = m_left->firstpos();
400 node_set r = m_right->firstpos();
401 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
402 std::inserter(rval, rval.begin()));
403 return rval;
404}
405
406inline node_set
407or_node::lastpos() const
408{
409 node_set rval;
410 node_set l = m_left->lastpos();
411 node_set r = m_right->lastpos();
412 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
413 std::inserter(rval, rval.begin()));
414 return rval;
415}
416
417inline void
418or_node::compute_followpos(followpos_t& followpos) const
419{
420 m_left->compute_followpos(followpos);
421 m_right->compute_followpos(followpos);
422}
423
424inline void
425or_node::compute_state_match(state_match_t& state_match) const
426{
427 m_left->compute_state_match(state_match);
428 m_right->compute_state_match(state_match);
429}
430
431inline void
432or_node::get_eof_ids(node_set& eof_nodes) const
433{
434 m_left->get_eof_ids(eof_nodes);
435 m_right->get_eof_ids(eof_nodes);
436}
437
438inline void
439or_node::assign_node_ids(node_id_t& node_count)
440{
441 m_left->assign_node_ids(node_count);
442 m_right->assign_node_ids(node_count);
443}
444
445#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
446inline void
447or_node::dump(std::ostream& out) const
448{
449 m_left->dump(out);
450
451 out << "\nor_node";
452 out << " nullable() = " << (nullable() ? "true" : "false");
453 out << " firstpos() = ";
454 node_set fp = firstpos();
455 std::copy(fp.begin(), fp.end(),
456 std::ostream_iterator<node_id_t>(out, ","));
457
458 out << " lastpos() = ";
459 node_set lp = lastpos();
460 std::copy(lp.begin(), lp.end(),
461 std::ostream_iterator<node_id_t>(out, ","));
462
463 m_right->dump(out);
464}
465#endif
466
467
468class cat_node : public node
469{
470
471public:
472
473 cat_node(node* left, node* right);
474 cat_node(const cat_node& x);
475 virtual ~cat_node(){}
476
477 virtual node* clone() const;
478 virtual bool nullable() const;
479 virtual node_set firstpos() const;
480 virtual node_set lastpos() const;
481 virtual void compute_followpos(followpos_t& followpos) const;
482 virtual void compute_state_match(state_match_t& state_match ) const;
483 virtual void get_eof_ids(node_set& eof_set) const;
484 virtual void assign_node_ids(node_id_t& node_count);
485#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
486 virtual void dump(std::ostream& out) const;
487#endif
488
489private:
490
b32b8144
FG
491#ifdef BOOST_NO_AUTO_PTR
492 std::unique_ptr<node> m_left;
493 std::unique_ptr<node> m_right;
494#else
7c673cae
FG
495 std::auto_ptr<node> m_left;
496 std::auto_ptr<node> m_right;
b32b8144 497#endif
7c673cae
FG
498};
499
500inline
501cat_node::cat_node(node* left, node* right)
502 : node()
503 , m_left(left)
504 , m_right(right)
505{
506}
507
508inline
509cat_node::cat_node(const cat_node& x)
510 : node(x)
511 , m_left(x.m_left->clone())
512 , m_right(x.m_right->clone())
513{
514}
515
516inline node *
517cat_node::clone() const
518{
519 return new cat_node(m_left->clone(), m_right->clone());
520}
521
522inline bool
523cat_node::nullable() const
524{
525 return m_left->nullable() && m_right->nullable();
526}
527
528inline node_set
529cat_node::firstpos() const
530{
531 if (m_left->nullable())
532 {
533 node_set rval;
534 node_set l = m_left->firstpos();
535 node_set r = m_right->firstpos();
536 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
537 std::inserter(rval, rval.begin()));
538 return rval;
539 }
540 else
541 {
542 return m_left->firstpos();
543 }
544}
545
546inline node_set
547cat_node::lastpos() const
548{
549 if (m_right->nullable())
550 {
551 node_set rval;
552 node_set l = m_left->lastpos();
553 node_set r = m_right->lastpos();
554 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
555 std::inserter(rval, rval.begin()));
556 return rval;
557 }
558 else
559 {
560 return m_right->lastpos();
561 }
562}
563
564inline void
565cat_node::compute_followpos(followpos_t& followpos) const
566{
567 node_set l = m_left->lastpos();
568 for (node_set::iterator i = l.begin();
569 i != l.end();
570 ++i)
571 {
572 node_set rf = m_right->firstpos();
573 followpos[*i].insert(rf.begin(), rf.end());
574 }
575
576 m_left->compute_followpos(followpos);
577 m_right->compute_followpos(followpos);
578}
579
580inline void
581cat_node::compute_state_match(state_match_t& state_match) const
582{
583 m_left->compute_state_match(state_match);
584 m_right->compute_state_match(state_match);
585}
586
587inline void
588cat_node::get_eof_ids(node_set& eof_nodes) const
589{
590 m_left->get_eof_ids(eof_nodes);
591 m_right->get_eof_ids(eof_nodes);
592}
593
594inline void
595cat_node::assign_node_ids(node_id_t& node_count)
596{
597 m_left->assign_node_ids(node_count);
598 m_right->assign_node_ids(node_count);
599}
600
601#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
602inline void
603cat_node::dump(std::ostream& out) const
604{
605 m_left->dump(out);
606
607 out << "\ncat_node";
608 out << " nullable() = " << (nullable() ? "true" : "false");
609 out << " firstpos() = ";
610 node_set fp = firstpos();
611 std::copy(fp.begin(), fp.end(),
612 std::ostream_iterator<node_id_t>(out, ","));
613
614 out << " lastpos() = ";
615 node_set lp = lastpos();
616 std::copy(lp.begin(), lp.end(),
617 std::ostream_iterator<node_id_t>(out, ","));
618
619 m_right->dump(out);
620}
621#endif
622
623
624class star_node : public node
625{
626
627public:
628
629 star_node(node* left);
630 star_node(const star_node& x);
631 virtual ~star_node(){}
632
633 virtual node* clone() const;
634 virtual bool nullable() const;
635 virtual node_set firstpos() const;
636 virtual node_set lastpos() const;
637 virtual void compute_followpos(followpos_t& followpos) const;
638 virtual void compute_state_match(state_match_t& state_match ) const;
639 virtual void get_eof_ids(node_set& eof_set) const;
640 virtual void assign_node_ids(node_id_t& node_count);
641#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
642 virtual void dump(std::ostream& out) const;
643#endif
644
645private:
646
b32b8144
FG
647#ifdef BOOST_NO_AUTO_PTR
648 std::unique_ptr<node> m_left;
649#else
7c673cae 650 std::auto_ptr<node> m_left;
b32b8144 651#endif
7c673cae
FG
652};
653
654inline
655star_node::star_node(node* left)
656 : node()
657 , m_left(left)
658{
659}
660
661inline
662star_node::star_node(const star_node& x)
663 : node(x)
664 , m_left(x.m_left->clone())
665{
666}
667
668inline node *
669star_node::clone() const
670{
671 return new star_node(m_left->clone());
672}
673
674inline bool
675star_node::nullable() const
676{
677 return true;
678}
679
680inline node_set
681star_node::firstpos() const
682{
683 return m_left->firstpos();
684}
685
686inline node_set
687star_node::lastpos() const
688{
689 return m_left->lastpos();
690}
691
692inline void
693star_node::compute_followpos(followpos_t& followpos) const
694{
695 node_set lp = this->lastpos();
696 for (node_set::iterator i = lp.begin();
697 i != lp.end();
698 ++i)
699 {
700 node_set fp = this->firstpos();
701 followpos[*i].insert(fp.begin(), fp.end());
702 }
703
704 m_left->compute_followpos(followpos);
705}
706
707inline void
708star_node::compute_state_match(state_match_t& state_match) const
709{
710 m_left->compute_state_match(state_match);
711}
712
713inline void
714star_node::get_eof_ids(node_set& eof_nodes) const
715{
716 m_left->get_eof_ids(eof_nodes);
717}
718
719inline void
720star_node::assign_node_ids(node_id_t& node_count)
721{
722 m_left->assign_node_ids(node_count);
723}
724
725#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
726inline void
727star_node::dump(std::ostream& out) const
728{
729 m_left->dump(out);
730
731 out << "\nstar_node";
732 out << " nullable() = " << (nullable() ? "true" : "false");
733 out << " firstpos() = ";
734 node_set fp = firstpos();
735 std::copy(fp.begin(), fp.end(),
736 std::ostream_iterator<node_id_t>(out, ","));
737
738 out << " lastpos() = ";
739 node_set lp = lastpos();
740 std::copy(lp.begin(), lp.end(),
741 std::ostream_iterator<node_id_t>(out, ","));
742
743}
744#endif
745
746
747class eof_node : public node
748{
749
750public:
751
752 eof_node();
753 eof_node(const eof_node& x);
754 virtual ~eof_node(){}
755
756 virtual node* clone() const;
757 virtual bool nullable() const;
758 virtual node_set firstpos() const;
759 virtual node_set lastpos() const;
760 virtual void compute_followpos(followpos_t& followpos) const;
761 virtual void compute_state_match(state_match_t& state_match ) const;
762 virtual void get_eof_ids(node_set& eof_set) const;
763 virtual void assign_node_ids(node_id_t& node_count);
764#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
765 virtual void dump(std::ostream& out) const;
766#endif
767
768private:
769
770 node_id_t m_node_num;
771};
772
773inline
774eof_node::eof_node()
775 : node()
776 , m_node_num(0)
777{
778}
779
780inline
781eof_node::eof_node(const eof_node& x)
782 : node(x)
783 , m_node_num(x.m_node_num)
784{
785}
786
787inline node *
788eof_node::clone() const
789{
790 return new eof_node(*this);
791}
792
793inline bool
794eof_node::nullable() const
795{
796 return false;
797}
798
799inline node_set
800eof_node::firstpos() const
801{
802 node_set rval;
803 rval.insert(m_node_num);
804 return rval;
805}
806
807inline node_set
808eof_node::lastpos() const
809{
810 node_set rval;
811 rval.insert(m_node_num);
812 return rval;
813}
814
815inline void
816eof_node::compute_followpos(followpos_t&) const
817{
818 return;
819}
820
821inline void
822eof_node::compute_state_match(state_match_t& state_match) const
823{
824 if (state_match.size() < m_node_num + 1)
825 state_match.resize(m_node_num + 1);
826 state_match[m_node_num].resize(256, 0);
827}
828
829inline void
830eof_node::get_eof_ids(node_set& eof_nodes) const
831{
832 eof_nodes.insert(m_node_num);
833}
834
835inline void
836eof_node::assign_node_ids(node_id_t& node_count)
837{
838 m_node_num = node_count++;
839}
840
841#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
842inline void
843eof_node::dump(std::ostream& out) const
844{
845 out << "\neof_node";
846 out << " m_node_num = " << m_node_num;
847 out << " nullable() = " << (nullable() ? "true" : "false");
848 out << " firstpos() = ";
849 node_set fp = firstpos();
850 std::copy(fp.begin(), fp.end(),
851 std::ostream_iterator<node_id_t>(out, ","));
852
853 out << " lastpos() = ";
854 node_set lp = lastpos();
855 std::copy(lp.begin(), lp.end(),
856 std::ostream_iterator<node_id_t>(out, ","));
857}
858#endif
859
860class ccl_node : public node
861{
862
863public:
864
865 ccl_node(const std::vector<uchar>& v);
866 ccl_node(const uchar c1, const uchar c2);
867 ccl_node(const ccl_node& x);
868 virtual ~ccl_node(){}
869
870 virtual node* clone() const;
871 virtual bool nullable() const;
872 virtual node_set firstpos() const;
873 virtual node_set lastpos() const;
874 virtual void compute_followpos(followpos_t& followpos) const;
875 virtual void compute_state_match(state_match_t& state_match ) const;
876 virtual void get_eof_ids(node_set& eof_set) const;
877 virtual void assign_node_ids(node_id_t& node_count);
878#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
879 virtual void dump(std::ostream& out) const;
880#endif
881
882private:
883
884 std::vector<uchar> m_match;
885 node_id_t m_node_num;
886};
887
888inline
889ccl_node::ccl_node(const std::vector<uchar>& v)
890 : node()
891 , m_match(v)
892 , m_node_num(0)
893{
894 m_match.resize(256); // make sure it's the right size
895}
896
897inline
898ccl_node::ccl_node(const uchar c1, const uchar c2)
899 : node()
900 , m_match(256, uchar(0))
901 , m_node_num(0)
902{
903 BOOST_ASSERT(c1 < c2);
904 for (std::size_t i = c1; i <= std::size_t(c2); ++i)
905 {
906 m_match[i] = 1;
907 }
908}
909
910inline
911ccl_node::ccl_node(const ccl_node& x)
912 : node(x)
913 , m_match(x.m_match)
914 , m_node_num(x.m_node_num)
915{
916}
917
918inline node *
919ccl_node::clone() const
920{
921 return new ccl_node(*this);
922}
923
924inline bool
925ccl_node::nullable() const
926{
927 return false;
928}
929
930inline node_set
931ccl_node::firstpos() const
932{
933 node_set rval;
934 rval.insert(m_node_num);
935 return rval;
936}
937
938inline node_set
939ccl_node::lastpos() const
940{
941 return firstpos();
942}
943
944inline void
945ccl_node::compute_followpos(followpos_t&) const
946{
947 return;
948}
949
950inline void
951ccl_node::compute_state_match(state_match_t& state_match) const
952{
953 if (state_match.size() < m_node_num + 1)
954 state_match.resize(m_node_num + 1);
955 state_match[m_node_num] = m_match;
956}
957
958inline void
959ccl_node::get_eof_ids(node_set&) const
960{
961 return;
962}
963
964inline void
965ccl_node::assign_node_ids(node_id_t& node_count)
966{
967 m_node_num = node_count++;
968}
969
970#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
971inline void
972ccl_node::dump(std::ostream& out) const
973{
974 out << "\nccl_node m_match = ";
975 for (std::size_t i = 0; i < m_match.size(); ++i)
976 {
977 if (m_match[i])
978 out << i << ", ";
979 }
980 out << " m_node_num = " << m_node_num;
981 out << " nullable() = " << (nullable() ? "true" : "false");
982 out << " firstpos() = ";
983 node_set fp = firstpos();
984 std::copy(fp.begin(), fp.end(),
985 std::ostream_iterator<node_id_t>(out, ","));
986
987 out << " lastpos() = ";
988 node_set lp = lastpos();
989 std::copy(lp.begin(), lp.end(),
990 std::ostream_iterator<node_id_t>(out, ","));
991}
992#endif
993
994template <typename ScannerT>
995class make_concat
996{
997 typedef typename ScannerT::iterator_t iterator_type;
998
999public:
1000
1001 make_concat(std::stack<node*>& the_stack)
1002 : m_stack(the_stack)
1003 {}
1004
1005 void operator()(iterator_type const &, iterator_type const &) const
1006 {
1007 node* right = m_stack.top();
1008 m_stack.pop();
1009 node* left = m_stack.top();
1010 m_stack.pop();
1011 node* newnode = new cat_node(left, right);
1012 m_stack.push(newnode);
1013 }
1014
1015 std::stack<node*>& m_stack;
1016};
1017
1018template <int CharTSize>
1019struct get_byte_aux;
1020
1021template<>
1022struct get_byte_aux<1>
1023{
1024 template <typename CharT>
1025 unsigned char operator()(CharT c, unsigned int byte)
1026 {
1027 BOOST_ASSERT(byte == 0);
1028 return c;
1029 }
1030};
1031
1032template<>
1033struct get_byte_aux<2>
1034{
1035 template <typename CharT>
1036 unsigned char operator()(CharT c, unsigned int byte)
1037 {
1038 static unsigned long mask[] =
1039 {
1040 0xFF00,
1041 0x00FF
1042 };
1043
1044 BOOST_ASSERT(byte < 2);
1045 return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1046 }
1047};
1048
1049template<>
1050struct get_byte_aux<4>
1051{
1052 template <typename CharT>
1053 unsigned char operator()(CharT c, unsigned int byte)
1054 {
1055 static unsigned long mask[] =
1056 {
1057 0xFF000000,
1058 0x00FF0000,
1059 0x0000FF00,
1060 0x000000FF
1061 };
1062
1063 BOOST_ASSERT(byte < 4);
1064 return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1065 }
1066};
1067
1068template <typename CharT>
1069inline unsigned char
1070get_byte(CharT c, unsigned int byte)
1071{
1072 return get_byte_aux<sizeof(c)>()(c, byte);
1073}
1074
1075template <typename ScannerT>
1076class make_star
1077{
1078 typedef typename ScannerT::iterator_t iterator_type;
1079
1080public:
1081 typedef
1082 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1083 char_t;
1084
1085 make_star(std::stack<node*>& the_stack)
1086 : m_stack(the_stack)
1087 {}
1088
1089 void operator()(const char_t) const
1090 {
1091 node* left = m_stack.top();
1092 m_stack.pop();
1093 node* newnode = new star_node(left);
1094 m_stack.push(newnode);
1095 }
1096
1097 std::stack<node*>& m_stack;
1098};
1099
1100template <typename ScannerT>
1101class make_or
1102{
1103 typedef typename ScannerT::iterator_t iterator_type;
1104
1105public:
1106
1107 make_or(std::stack<node*>& the_stack)
1108 : m_stack(the_stack)
1109 {}
1110
1111 void operator()(iterator_type const&, iterator_type const&) const
1112 {
1113 node* right = m_stack.top();
1114 m_stack.pop();
1115 node* left = m_stack.top();
1116 m_stack.pop();
1117 node* newnode = new or_node(left, right);
1118 m_stack.push(newnode);
1119 }
1120
1121 std::stack<node*>& m_stack;
1122};
1123
1124template <typename ScannerT>
1125class make_plus
1126{
1127 typedef typename ScannerT::iterator_t iterator_type;
1128
1129public:
1130 typedef
1131 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1132 char_t;
1133
1134 make_plus(std::stack<node*>& the_stack)
1135 : m_stack(the_stack)
1136 {}
1137
1138 void operator()(const char_t) const
1139 {
1140 node* left = m_stack.top();
1141 m_stack.pop();
1142
1143 node* copy = left->clone();
1144
1145 node* new_star = new star_node(copy);
1146 node* new_cat = new cat_node(left, new_star);
1147 m_stack.push(new_cat);
1148 }
1149
1150 std::stack<node*>& m_stack;
1151};
1152
1153template <typename ScannerT>
1154class make_optional
1155{
1156 typedef typename ScannerT::iterator_t iterator_type;
1157
1158public:
1159 typedef
1160 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1161 char_t;
1162
1163 make_optional(std::stack<node*>& the_stack)
1164 : m_stack(the_stack)
1165 {}
1166
1167 void operator()(const char_t) const
1168 {
1169 node* left = m_stack.top();
1170 m_stack.pop();
1171
1172 node* new_or = new or_node(left, new epsilon_node());
1173 m_stack.push(new_or);
1174 }
1175
1176 std::stack<node*>& m_stack;
1177};
1178
1179///////////////////////////////////////////////////////////////////////////////
1180// utility function
1181template <typename CharT>
1182inline utility::impl::range<CharT> const&
1183full_range()
1184{
1185 static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
1186 (std::numeric_limits<CharT>::max)());
1187 return full;
1188}
1189
1190namespace ccl_utils
1191{
1192 template <typename char_t>
1193 inline utility::impl::range_run<char_t>
1194 negate_range_run(
1195 const utility::impl::range_run<char_t>& rr)
1196 {
1197 utility::impl::range_run<char_t> newrr;
1198 newrr.set(full_range<char_t>());
1199 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1200 iter != rr.end(); ++iter)
1201 newrr.clear(*iter);
1202 return newrr;
1203 }
1204
1205 template <typename char_t>
1206 inline node*
1207 create_mb_node_seq(char_t c)
1208 {
1209 node* newnode = new char_node(get_byte(c, 0));
1210 for (unsigned int i = 1; i < sizeof(c); ++i)
1211 {
1212 node* cnode = new char_node(get_byte(c, i));
1213 node* top_node = new cat_node(newnode, cnode);
1214 newnode = top_node;
1215 }
1216 return newnode;
1217 }
1218
1219 template <typename char_t>
1220 inline void
1221 handle_mb_char(char_t c, bool first_time,
1222 std::stack<node*>& stack)
1223 {
1224 node* newnode = create_mb_node_seq(c);
1225
1226 if (first_time)
1227 {
1228 stack.push(newnode);
1229 }
1230 else
1231 {
1232 node* top = stack.top();
1233 stack.pop();
1234
1235 node* newtop = new or_node(top, newnode);
1236 stack.push(newtop);
1237 }
1238 }
1239
1240 // forward decl only
1241 template <typename char_t>
1242 inline void
1243 handle_mb_range(char_t c1, char_t c2, bool first_time,
1244 std::stack<node*>& stack);
1245
1246 template <typename char_t>
1247 inline void
1248 create_nodes(const utility::impl::range_run<char_t>& rr,
1249 std::stack<node*>& stack)
1250 {
1251
1252 if (sizeof(char_t) == 1)
1253 {
1254 std::vector<uchar> ccl;
1255 ccl.resize(256);
1256 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1257 iter != rr.end(); ++iter)
1258 {
1259 for (int i = iter->first; i <= iter->last; ++i)
1260 {
1261// this is always true because of the limited datatype
1262// BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
1263 ccl[uchar(i)] = 1;
1264 }
1265 }
1266
1267 node* new_ccl = new ccl_node(ccl);
1268 stack.push(new_ccl);
1269 }
1270 else
1271 {
1272 bool mb_first_time = true;
1273 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1274 iter != rr.end(); ++iter)
1275 {
1276 if (iter->first == iter->last)
1277 {
1278 handle_mb_char(iter->first, mb_first_time, stack);
1279 }
1280 else
1281 {
1282 handle_mb_range(iter->first, iter->last, mb_first_time, stack);
1283 }
1284 mb_first_time = false;
1285 }
1286 }
1287 }
1288
1289 template <typename char_t>
1290 inline std::size_t
1291 compute_differing_byte(char_t c1, char_t c2)
1292 {
1293 std::size_t rval = 0;
1294 while (rval < sizeof(c1) &&
1295 get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
1296 {
1297 ++rval;
1298 }
1299 return rval;
1300 }
1301
1302 template <typename char_t>
1303 inline node*
1304 create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
1305 {
1306 std::size_t diff = get_byte(c2, (unsigned int)j) -
1307 get_byte(c1, (unsigned int)j);
1308 if (diff == 1) {
1309 return 0;
1310 }
1311 else if (diff == 2) {
1312 return new char_node(get_byte(c1, (unsigned int)j)+1);
1313 }
1314 else {
1315 return new ccl_node(get_byte(c1, (unsigned int)j)+1,
1316 get_byte(c2, (unsigned int)j)-1);
1317 }
1318 }
1319
1320 template <typename char_t>
1321 inline node *
1322 create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
1323 std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
1324 {
1325 node* cnode;
1326 if (i == sizem1 && j == differing_byte && j != sizem1)
1327 {
1328 node* tmp = create_mb_node_type1(j, c1, c2);
1329 if (tmp == 0)
1330 {
1331 delete newnode;
1332 return 0;
1333 }
1334 else
1335 cnode = tmp;
1336 }
1337 else if (i == differing_byte && j == sizem1)
1338 {
1339 if (i != sizem1) {
1340 cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
1341 }
1342 else {
1343 cnode = new ccl_node(get_byte(c1, (unsigned int)j),
1344 get_byte(c2, (unsigned int)j));
1345 }
1346 }
1347 else if (i != differing_byte && i != sizem1 &&
1348 j == (sizem1 - i + differing_byte))
1349 {
1350 cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
1351 }
1352 else if (i + j - differing_byte > sizem1) {
1353 cnode = new ccl_node(0, 0xFF);
1354 }
1355 else {//if (is plain)
1356 cnode = new char_node(get_byte(c1, (unsigned int)j));
1357 }
1358
1359 node* top_node = new cat_node(newnode, cnode);
1360 return top_node;
1361 }
1362
1363// On platforms, where wchar_t is a typedef for unsigned short, the
1364// comparision for a negative value is pointless
1365 template <bool is_signed>
1366 struct correct_char_aux {
1367 };
1368
1369 template <>
1370 struct correct_char_aux<true> {
1371
1372 template <typename char_t>
1373 static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
1374 };
1375
1376 template <>
1377 struct correct_char_aux<false> {
1378
1379 template <typename char_t>
1380 static char_t correct(char_t c) { return c; }
1381 };
1382
1383 template <typename char_t>
1384 struct correct_char
1385 {
1386 static char_t correct(char_t c)
1387 {
1388 return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
1389 correct(c);
1390 }
1391 };
1392
1393 template <typename char_t>
1394 inline void
1395 handle_mb_range(char_t c1, char_t c2, bool first_time,
1396 std::stack<node*>& stack)
1397 {
1398 // The algorithm can't handle negative value chars, which don't make
1399 // much sense anyway. This comparision is pointless for wchar_t's on
1400 // platforms, where wchar_t is a typedef for unsigned short
1401
1402 c1 = correct_char<char_t>::correct(c1);
1403 //if (c1 < 0)
1404 // c1 = 0;
1405
1406 BOOST_ASSERT(c1 < c2);
1407 node* newnode = 0;
1408 node* savednode = 0;
1409 const std::size_t differing_byte = compute_differing_byte(c1, c2);
1410 const std::size_t sizem1 = sizeof(c1) - 1;
1411 const std::size_t ndb = sizem1 - differing_byte;
1412 for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
1413 {
1414 // generate node for the first byte
1415 if (differing_byte == 0 && i == ndb)
1416 {
1417 node* tmp = create_mb_node_type1(0, c1, c2);
1418 if (tmp == 0)
1419 continue;
1420 else
1421 newnode = tmp;
1422 }
1423 else
1424 {
1425 newnode = new char_node(get_byte(c1, 0));
1426 }
1427 for (std::size_t j = 1; j < sizeof(c1); ++j)
1428 {
1429 newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
1430 c1, c2, newnode);
1431 if (newnode == 0)
1432 goto end_outer_for;
1433 }
1434
1435 // or together the various parts
1436 if (savednode)
1437 {
1438 node* top_node = new or_node(savednode, newnode);
1439 savednode = top_node;
1440 }
1441 else
1442 {
1443 savednode = newnode;
1444 }
1445end_outer_for:
1446 continue;
1447 }
1448
1449 for (std::size_t k = 0; k < ndb; ++k)
1450 {
1451 newnode = new char_node(get_byte(c2, 0));
1452 for (std::size_t j = 1; j < sizeof(c2); ++j)
1453 {
1454 node* cnode;
1455 if (k == differing_byte && j == sizem1 && k != sizem1)
1456 cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
1457
1458 else if (k != differing_byte && k != sizem1 &&
1459 j == (sizem1 - k + differing_byte))
1460 cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
1461
1462 else if (k + j - differing_byte > sizem1)
1463 cnode = new ccl_node(0, 0xFF);
1464
1465 else //if (is plain)
1466 cnode = new char_node(get_byte(c2, (unsigned int)j));
1467
1468
1469 node* top_node = new cat_node(newnode, cnode);
1470 newnode = top_node;
1471 }
1472
1473 // or together the various parts
1474 if (savednode)
1475 {
1476 node* top_node = new or_node(savednode, newnode);
1477 savednode = top_node;
1478 }
1479 else
1480 {
1481 savednode = newnode;
1482 }
1483 }
1484
1485
1486 if (first_time)
1487 {
1488 stack.push(savednode);
1489 }
1490 else
1491 {
1492 node* top = stack.top();
1493 stack.pop();
1494
1495 node* newtop = new or_node(top, savednode);
1496 stack.push(newtop);
1497 }
1498 }
1499} // namespace ccl_utils
1500
1501template <typename ScannerT>
1502class make_char
1503{
1504 typedef typename ScannerT::iterator_t iterator_type;
1505
1506public:
1507 typedef
1508 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1509 char_t;
1510
1511 make_char(std::stack<node*>& the_stack)
1512 : m_stack(the_stack)
1513 {}
1514
1515 void operator()(iterator_type const& first, iterator_type const& last) const
1516 {
1517 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1518 escape_char_parser<lex_escapes, char_t>();
1519 char_t the_char;
1520 iterator_type first_ = first;
1521 ScannerT scan(first_, last);
1522 lex_escape_ch[assign(the_char)].parse(scan);
1523 node* newnode = ccl_utils::create_mb_node_seq(the_char);
1524 m_stack.push(newnode);
1525 }
1526
1527 std::stack<node*>& m_stack;
1528};
1529
1530
1531template <typename ScannerT>
1532class make_ccl
1533{
1534 typedef typename ScannerT::iterator_t iterator_type;
1535
1536public:
1537 typedef
1538 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1539 char_t;
1540
1541 make_ccl(std::stack<node*>& the_stack)
1542 : m_stack(the_stack)
1543 {}
1544
1545 static bool is_equal_to_string(iterator_type first,
1546 iterator_type const & last, const char* str)
1547 {
1548 while (first != last &&*str &&*first ==*str)
1549 {
1550 ++first;
1551 ++str;
1552 }
1553 return*str == 0;
1554 }
1555
1556 template <typename ParserT>
1557 static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
1558 {
1559 for (int i = 0; i < 256; ++i)
1560 {
1561 if (parser.test(static_cast<char_t>(uchar(i))))
1562 rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
1563 }
1564 }
1565
1566 void operator()(iterator_type const& first_, iterator_type const& last) const
1567 {
1568 BOOST_ASSERT(*first_ == '[');
1569
1570 iterator_type first = first_;
1571 ++first; // skip over '['
1572 bool negated_ccl = false;
1573 if (*first == '^')
1574 {
1575 negated_ccl = true;
1576 ++first;
1577 }
1578
1579 utility::impl::range_run<char_t> rr;
1580 while (first != last &&*first != ']')
1581 {
1582 if (*first == '[') // it's a ccl_expr like [:space:]
1583 {
1584 // check for [:space:], etc.
1585 if (is_equal_to_string(first, last, "[:alnum:]"))
1586 {
1587 fill_ccl(rr, alnum_p);
1588 }
1589 else if (is_equal_to_string(first, last, "[:alpha:]"))
1590 {
1591 fill_ccl(rr, alpha_p);
1592 }
1593 else if (is_equal_to_string(first, last, "[:blank:]"))
1594 {
1595 fill_ccl(rr, blank_p);
1596 }
1597 else if (is_equal_to_string(first, last, "[:cntrl:]"))
1598 {
1599 fill_ccl(rr, cntrl_p);
1600 }
1601 else if (is_equal_to_string(first, last, "[:digit:]"))
1602 {
1603 fill_ccl(rr, digit_p);
1604 }
1605 else if (is_equal_to_string(first, last, "[:graph:]"))
1606 {
1607 fill_ccl(rr, graph_p);
1608 }
1609 else if (is_equal_to_string(first, last, "[:lower:]"))
1610 {
1611 fill_ccl(rr, lower_p);
1612 }
1613 else if (is_equal_to_string(first, last, "[:print:]"))
1614 {
1615 fill_ccl(rr, print_p);
1616 }
1617 else if (is_equal_to_string(first, last, "[:punct:]"))
1618 {
1619 fill_ccl(rr, punct_p);
1620 }
1621 else if (is_equal_to_string(first, last, "[:space:]"))
1622 {
1623 fill_ccl(rr, space_p);
1624 }
1625 else if (is_equal_to_string(first, last, "[:upper:]"))
1626 {
1627 fill_ccl(rr, upper_p);
1628 }
1629 else if (is_equal_to_string(first, last, "[:xdigit:]"))
1630 {
1631 fill_ccl(rr, xdigit_p);
1632 }
1633 // this can't happen, because it's parsed before we get here.
1634 //else
1635 // throw bad_regex();
1636
1637 // Advance past the character class expression
1638 while (first != last &&*first != ']')
1639 ++first;
1640 BOOST_ASSERT(*first == ']');
1641 ++first;
1642 }
1643 else {
1644 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1645 escape_char_parser<lex_escapes, char_t>();
1646
1647 char_t c1;
1648 ScannerT scan(first, last);
1649 lex_escape_ch[assign(c1)].parse(scan);
1650 if (*scan.first == '-') // insert a range
1651 {
1652 ++scan.first;
1653 char_t c2;
1654 lex_escape_ch[assign(c2)].parse(scan);
1655 BOOST_ASSERT(c1 < c2); // Throw exception?
1656 rr.set(utility::impl::range<char_t>(c1, c2));
1657 }
1658 else // insert 1 char
1659 {
1660 rr.set(utility::impl::range<char_t>(c1, c1));
1661 }
1662 }
1663 }
1664
1665 if (negated_ccl)
1666 {
1667 rr = ccl_utils::negate_range_run(rr);
1668 }
1669
1670 ccl_utils::create_nodes(rr, m_stack);
1671 }
1672
1673 std::stack<node*>& m_stack;
1674};
1675
1676template <typename ScannerT>
1677class make_any_char
1678{
1679 typedef typename ScannerT::iterator_t iterator_type;
1680
1681public:
1682 typedef
1683 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1684 char_t;
1685
1686 std::stack<node*>& m_stack;
1687
1688 make_any_char(std::stack<node*>& the_stack)
1689 : m_stack(the_stack)
1690 {}
1691
1692 void operator()(const char_t c) const
1693 {
1694 BOOST_ASSERT(c == '.');
1695 do_any_char();
1696 }
1697
1698 void do_any_char() const
1699 {
1700 static utility::impl::range_run<char_t> rr;
1701 rr.set(full_range<char_t>());
1702 char_t newline = '\n';
1703 rr.clear(utility::impl::range<char_t>(newline, newline));
1704
1705 ccl_utils::create_nodes(rr, m_stack);
1706 }
1707};
1708
1709template <typename ScannerT>
1710class make_string
1711{
1712 typedef typename ScannerT::iterator_t iterator_type;
1713
1714public:
1715 typedef
1716 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1717 char_t;
1718
1719 std::stack<node*>& m_stack;
1720
1721 make_string(std::stack<node*>& the_stack)
1722 : m_stack(the_stack)
1723 {}
1724
1725 void operator()(iterator_type const& first, iterator_type const& last) const
1726 {
1727 BOOST_ASSERT(*first == '"');
1728
1729 iterator_type first_ = first;
1730 ScannerT scan(first_, last);
1731 ++scan.first; // skip over '"'
1732
1733 // empty string not allowed
1734 if (*scan.first == '"')
1735 {
1736 boost::throw_exception(bad_regex());
1737 }
1738
1739 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1740 escape_char_parser<lex_escapes, char_t>();
1741
1742 char_t c;
1743 lex_escape_ch[assign(c)].parse(scan);
1744 node* top_node = ccl_utils::create_mb_node_seq(c);
1745
1746 while (*scan.first != '"' && scan.first != scan.last)
1747 {
1748 lex_escape_ch[assign(c)].parse(scan);
1749 node* cur_node = ccl_utils::create_mb_node_seq(c);
1750 top_node = new cat_node(top_node, cur_node);
1751 }
1752 m_stack.push(top_node);
1753 }
1754};
1755
1756inline
1757node* repeat_node(node* n, int num)
1758{
1759 node* list_of_nodes = n;
1760 for (int i = 1; i < num; ++i)
1761 {
1762 list_of_nodes = new cat_node(list_of_nodes, n->clone());
1763 }
1764 return list_of_nodes;
1765}
1766
1767inline
1768node* optional_node(node* n)
1769{
1770 return new or_node(n, new epsilon_node());
1771}
1772
1773template <typename ScannerT>
1774class make_rep1
1775{
1776 typedef typename ScannerT::iterator_t iterator_type;
1777
1778public:
1779 typedef
1780 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1781 char_t;
1782
1783 std::stack<node*>& m_stack;
1784
1785 make_rep1(std::stack<node*>& the_stack)
1786 : m_stack(the_stack)
1787 {}
1788
1789 void operator()(iterator_type const& first, iterator_type const& last) const
1790 {
1791 BOOST_ASSERT(*first == '{');
1792
1793 iterator_type first_ = first;
1794 ScannerT scan(first_, last);
1795 ++scan.first; // skip over '{'
1796
1797 unsigned int count;
1798 uint_p[assign(count)].parse(scan);
1799 if (count == 0)
1800 boost::throw_exception(bad_regex());
1801
1802 node* top_node = m_stack.top();
1803 m_stack.pop();
1804 top_node = repeat_node(top_node, count);
1805 m_stack.push(top_node);
1806 }
1807};
1808
1809template <typename ScannerT>
1810class make_rep2
1811{
1812 typedef typename ScannerT::iterator_t iterator_type;
1813
1814public:
1815 typedef
1816 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1817 char_t;
1818
1819 std::stack<node*>& m_stack;
1820
1821 make_rep2(std::stack<node*>& the_stack)
1822 : m_stack(the_stack)
1823 {}
1824
1825 void operator()(iterator_type const& first, iterator_type const& last) const
1826 {
1827 BOOST_ASSERT(*first == '{');
1828
1829 iterator_type first_ = first;
1830 ScannerT scan (first_, last);
1831 ++scan.first; // skip over '{'
1832
1833 unsigned int count;
1834 uint_p[assign(count)].parse(scan);
1835 if (count == 0)
1836 boost::throw_exception(bad_regex());
1837
1838 node* top_node = m_stack.top();
1839 m_stack.pop();
1840 top_node = new cat_node(repeat_node(top_node, count),
1841 new star_node(top_node->clone()));
1842 m_stack.push(top_node);
1843
1844 }
1845};
1846
1847template <typename ScannerT>
1848class make_rep3
1849{
1850 typedef typename ScannerT::iterator_t iterator_type;
1851
1852public:
1853 typedef
1854 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1855 char_t;
1856
1857 std::stack<node*>& m_stack;
1858
1859 make_rep3(std::stack<node*>& the_stack)
1860 : m_stack(the_stack)
1861 {}
1862
1863 void operator()(iterator_type const& first, iterator_type const& last) const
1864 {
1865 BOOST_ASSERT(*first == '{');
1866
1867 iterator_type first_ = first;
1868 ScannerT scan(first_, last);
1869 ++scan.first; // skip over '{'
1870
1871 unsigned int count1, count2;
1872 uint_p[assign(count1)].parse(scan);
1873 if (count1 == 0)
1874 boost::throw_exception(bad_regex());
1875
1876 ++scan.first; // skip over ','
1877
1878 uint_p[assign(count2)].parse(scan);
1879 if (count2 <= count1)
1880 boost::throw_exception(bad_regex());
1881
1882 node* top_node = m_stack.top();
1883 m_stack.pop();
1884 node* repeats = repeat_node(top_node, count1);
1885 top_node = new cat_node(repeats,
1886 repeat_node(optional_node(top_node->clone()),
1887 count2 - count1));
1888
1889 m_stack.push(top_node);
1890 }
1891};
1892
1893///////////////////////////////////////////////////////////////////////////////
1894//
1895// Lexer grammar
1896//
1897// Defines the grammar, which mandates the syntax of the understood
1898// lexeme definitions passed to lexer::register_regex.
1899//
1900///////////////////////////////////////////////////////////////////////////////
1901class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
1902{
1903public:
1904 lexer_grammar(std::stack<node*> &node_stack_)
1905 : node_stack(node_stack_) {}
1906
1907 template <typename ScannerT>
1908 struct definition
1909 {
1910 typedef rule<ScannerT> rule_t;
1911 typedef typename ScannerT::iterator_t iterator_type;
1912 typedef
1913 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1914 char_t;
1915
1916 rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
1917 escseq, ccl_char;
1918 symbols<> ccl_expr;
1919
1920 definition(lexer_grammar const &self)
1921 {
1922 regex =
1923 re >> !('/' >> re) >> !ch_p('$')
1924 ;
1925
1926 re =
1927 series
1928 >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
1929 ;
1930
1931 series =
1932 singleton
1933 >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
1934 ;
1935
1936 singleton =
1937 ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
1938 >> singleton2
1939 | fullccl
1940 >> singleton2
1941 | ('"' >> string >> '"')
1942 [
1943 make_string<ScannerT>(self.node_stack)
1944 ]
1945 >> singleton2
1946 | '(' >> re >> ')'
1947 >> singleton2
1948 | ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
1949 [
1950 make_char<ScannerT>(self.node_stack)
1951 ]
1952 >> singleton2
1953 ;
1954
1955 singleton2 =
1956 ch_p('*')[make_star<ScannerT>(self.node_stack)]
1957 >> singleton2
1958 | ch_p('+')[make_plus<ScannerT>(self.node_stack)]
1959 >> singleton2
1960 | ch_p('?')[make_optional<ScannerT>(self.node_stack)]
1961 >> singleton2
1962 | ('{' >> uint_p >> '}')
1963 [
1964 make_rep1<ScannerT>(self.node_stack)
1965 ]
1966 >> singleton2
1967 | ('{' >> uint_p >> ',' >> '}')
1968 [
1969 make_rep2<ScannerT>(self.node_stack)
1970 ]
1971 >> singleton2
1972 | ('{' >> uint_p >> ',' >> uint_p >> '}')
1973 [
1974 make_rep3<ScannerT>(self.node_stack)
1975 ]
1976 >> singleton2
1977 | epsilon_p
1978 ;
1979
1980 fullccl =
1981 ('[' >> !ch_p('^') >> ccl >> ']')
1982 [
1983 make_ccl<ScannerT>(self.node_stack)
1984 ]
1985 ;
1986
1987 ccl =
1988 *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
1989 ;
1990
1991 ccl_char =
1992 ( (anychar_p - chset<>("\\\n]")) | escseq )
1993 ;
1994
1995 ccl_expr =
1996 "[:alnum:]",
1997 "[:alpha:]",
1998 "[:blank:]",
1999 "[:cntrl:]",
2000 "[:digit:]",
2001 "[:graph:]",
2002 "[:lower:]",
2003 "[:print:]",
2004 "[:punct:]",
2005 "[:space:]",
2006 "[:upper:]",
2007 "[:xdigit:]"
2008 ;
2009
2010 string =
2011 +( (anychar_p - chset<>("\"\\")) | escseq )
2012 ;
2013
2014 typedef
2015 uint_parser<char_t, 8, 1,
2016 std::numeric_limits<char_t>::digits / 3 + 1
2017 > oct_parser_t;
2018 typedef
2019 uint_parser<char_t, 16, 1,
2020 std::numeric_limits<char_t>::digits / 4 + 1
2021 > hex_parser_t;
2022
2023 escseq =
2024 ch_p('\\')
2025 >> (
2026 oct_parser_t()
2027 | as_lower_d['x'] >> hex_parser_t()
2028 | (anychar_p - chset<>('\n'))
2029 )
2030 ;
2031
2032#define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2033
2034 BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
2035 BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
2036 BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
2037 BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
2038 BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
2039 BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
2040 BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
2041 BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
2042 BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
2043 BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
2044
2045#undef BOOST_SLEX_DEBUG
2046 }
2047
2048 rule<ScannerT> const&
2049 start() const { return regex; }
2050 };
2051
2052 std::stack<node*> &node_stack;
2053
2054}; // class lexer_grammar
2055
2056template <typename StringT>
2057inline node *
2058parse(lexer_grammar& g, StringT const& str)
2059{
2060 typedef
2061 scanner<typename StringT::const_iterator, scanner_policies<> >
2062 scanner_t;
2063 typedef typename rule<scanner_t>::template result<scanner_t>::type
2064 result_t;
2065
2066 typename StringT::const_iterator first = str.begin();
2067 typename StringT::const_iterator last = str.end();
2068
2069 scanner_t scan(first, last);
2070// typename rule<scanner_t>::result_t hit = g.parse(scan);
2071 result_t hit = g.parse(scan);
2072 if (!hit || !scan.at_end())
2073 {
2074 while (g.node_stack.size())
2075 {
2076 delete g.node_stack.top();
2077 g.node_stack.pop();
2078 }
2079 return 0;
2080 }
2081
2082 BOOST_ASSERT(g.node_stack.size() == 1);
2083 node* rval = g.node_stack.top();
2084 g.node_stack.pop();
2085 node* an_eof_node = new eof_node();
2086 rval = new cat_node(rval, an_eof_node);
2087 return rval;
2088}
2089
2090inline
2091void make_case_insensitive(state_match_t& state_match)
2092{
2093 // TODO: Fix this.
2094 // This doesn't take into account foreign languages, figure out how to
2095 // do that. Also this approach is broken for this implementation of
2096 // wide chars.
2097 for (state_match_t::iterator iter = state_match.begin();
2098 iter != state_match.end(); ++iter)
2099 {
2100 int i, j;
2101 for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
2102 {
2103 // if either is set, turn them both on
2104 (*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
2105 }
2106 }
2107}
2108
2109template<bool wide_char>
2110struct regex_match_helper;
2111
2112template<>
2113struct regex_match_helper<false> // single byte char
2114{
2115 template <typename DfaT, typename IteratorT>
2116 static bool
2117 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2118 int& regex_index,
2119 std::basic_string<
2120 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2121 > *token)
2122 {
2123 typedef std::basic_string<
2124 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2125 > string_type;
2126 typedef typename string_type::size_type size_type;
2127
2128 node_id_t s = 0;
2129 node_id_t last_accepting_index = invalid_node;
2130 IteratorT p = first;
2131 IteratorT last_accepting_cpos = first;
2132 while (p != last)
2133 {
2134 s = dfa.transition_table[s][(uchar)*p];
2135 if (s == invalid_node)
2136 break;
2137 if (token) token->append((size_type)1, *p);
2138 ++p;
2139 if (dfa.acceptance_index[s] != invalid_node)
2140 {
2141 last_accepting_index = s;
2142 last_accepting_cpos = p;
2143 }
2144 }
2145 if (last_accepting_index != invalid_node)
2146 {
2147#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2148 std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2149 dfa.acceptance_index[last_accepting_index] << '\n';
2150#endif
2151
2152 first = last_accepting_cpos;
2153 regex_index = dfa.acceptance_index[last_accepting_index];
2154 return true;
2155 }
2156 else
2157 return false;
2158 }
2159};
2160
2161template<>
2162struct regex_match_helper<true> // wide char
2163{
2164 template <typename DfaT, typename IteratorT>
2165 static bool
2166 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2167 int& regex_index,
2168 std::basic_string<
2169 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2170 > *token)
2171 {
2172 typedef
2173 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2174 char_t;
2175 typedef std::basic_string<char_t> string_type;
2176 typedef typename string_type::size_type size_type;
2177
2178 node_id_t s = 0;
2179 node_id_t last_accepting_index = invalid_node;
2180 IteratorT wp = first;
2181 IteratorT last_accepting_cpos = first;
2182
2183 while (wp != last)
2184 {
2185 for (unsigned int i = 0; i < sizeof(char_t); ++i)
2186 {
2187 s = dfa.transition_table[s][get_byte(*wp,i)];
2188 if (s == invalid_node)
2189 {
2190 goto break_while;
2191 }
2192 }
2193 if (token) token->append((size_type)1, *wp);
2194 ++wp;
2195 if (dfa.acceptance_index[s] != invalid_node)
2196 {
2197 last_accepting_index = s;
2198 last_accepting_cpos = wp;
2199 }
2200
2201 }
2202
2203 break_while:
2204 if (last_accepting_index != invalid_node)
2205 {
2206#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2207 std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2208 dfa.acceptance_index[last_accepting_index] << '\n';
2209#endif
2210 first = last_accepting_cpos;
2211 regex_index = dfa.acceptance_index[last_accepting_index];
2212
2213 return true;
2214 }
2215 else
2216 return false;
2217 }
2218};
2219
2220template <typename DfaT, typename IteratorT, bool wide_char>
2221struct regex_match
2222{
2223 static bool
2224 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2225 int& regex_index,
2226 std::basic_string<
2227 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2228 > *token)
2229 {
2230 return regex_match_helper<wide_char>::do_match(
2231 dfa, first, last, regex_index, token);
2232 }
2233};
2234
2235} // namespace lexerimpl
2236
2237///////////////////////////////////////////////////////////////////////////////
2238//
2239template <typename IteratorT = char const*, typename TokenT = int,
2240 typename CallbackT = void(*)(IteratorT const &,
2241 IteratorT &,
2242 IteratorT const&,
2243 TokenT const&,
2244 lexer_control<TokenT> &)>
2245class lexer
2246{
2247public:
2248 typedef CallbackT callback_t;
2249 typedef
2250 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2251 char_t;
2252
2253 struct regex_info
2254 {
2255 std::basic_string<char_t> str;
2256 TokenT token;
2257 CallbackT callback;
2258
2259 regex_info(const std::basic_string<char_t>& _str,
2260 const TokenT& _token,
2261 const CallbackT& _callback)
2262 : str(_str)
2263 , token(_token)
2264 , callback(_callback)
2265 {}
2266
2267 };
2268
2269 struct dfa_table
2270 {
2271 std::vector<std::vector<node_id_t> > transition_table;
2272 std::vector<node_id_t> acceptance_index;
2273 };
2274 typedef std::vector<node_id_t> node_table_t;
2275 typedef std::vector<node_table_t> transition_table_t;
2276 typedef std::vector<dfa_table> dfa_t;
2277
2278
2279 lexer(unsigned int states = 1);
2280
2281 void register_regex(const std::basic_string<char_t>& regex,
2282 const TokenT& id, const CallbackT& cb = CallbackT(),
2283 unsigned int state = 0);
2284
2285 TokenT next_token(IteratorT &first, IteratorT const &last,
2286 std::basic_string<char_t> *token = 0);
2287
2288 void create_dfa();
2289 bool has_compiled_dfa() { return m_compiled_dfa; }
2290
2291 void set_case_insensitive(bool insensitive);
2292
2293#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2294 void dump(std::ostream& out);
2295#endif
2296 typedef std::vector<std::vector<regex_info> > regex_list_t;
2297
2298 bool load (std::ifstream &in, long unique_id = 0);
2299 bool save (std::ofstream &out, long unique_id = 0);
2300 enum {
2301 SLEX_SIGNATURE = 0x58454C53, // "SLEX"
2302 SLEX_VERSION_100 = 0x0100, // persistance version
2303 SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
2304 SLEX_MINOR_VERSION_MASK = 0xFF
2305 };
2306
2307private:
2308
2309 void create_dfa_for_state(int state);
2310
2311 static bool regex_match(const dfa_t& dfa, IteratorT& first,
2312 IteratorT& last, int& regex_index);
2313
2314 mutable std::stack<lexerimpl::node*> node_stack;
2315 lexerimpl::lexer_grammar g;
2316
2317 mutable bool m_compiled_dfa;
2318 mutable dfa_t m_dfa;
2319
2320 regex_list_t m_regex_list;
2321 bool m_case_insensitive;
2322
2323 unsigned int m_state;
2324 std::stack<unsigned int> m_state_stack;
2325 unsigned int m_num_states;
2326};
2327
2328
2329template <typename IteratorT, typename TokenT, typename CallbackT>
2330inline
2331lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
2332 : g(node_stack)
2333 , m_compiled_dfa(false)
2334 , m_regex_list(states)
2335 , m_case_insensitive(false)
2336 , m_state(0)
2337 , m_state_stack()
2338 , m_num_states(states)
2339{
2340 BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
2341 BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
2342}
2343
2344template <typename IteratorT, typename TokenT, typename CallbackT>
2345inline void
2346lexer<IteratorT, TokenT, CallbackT>::register_regex(
2347 const std::basic_string<char_t>& regex, const TokenT& id,
2348 const CallbackT& callback, unsigned int state)
2349{
2350 if (state > m_num_states) {
2351 m_regex_list.resize(state);
2352 m_num_states = state;
2353 }
2354 m_regex_list[state].push_back(regex_info(regex, id, callback));
2355}
2356
2357template <typename IteratorT, typename TokenT, typename CallbackT>
2358inline TokenT
2359lexer<IteratorT, TokenT, CallbackT>::next_token(
2360 IteratorT &first, IteratorT const& last,
2361 std::basic_string<
2362 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2363 > *token)
2364{
2365 if (!m_compiled_dfa)
2366 {
2367 create_dfa();
2368 }
2369
2370 IteratorT saved = first;
2371 int regex_index;
2372 if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
2373 do_match(m_dfa[m_state], first, last, regex_index, token))
2374 return -1; // TODO: can't return -1, need to return some invalid token.
2375 // how to figure this out? We can use traits I guess.
2376 else
2377 {
2378 regex_info regex = m_regex_list[m_state][regex_index];
2379 TokenT rval = regex.token;
2380 if (regex.callback)
2381 {
2382 // execute corresponding callback
2383 lexer_control<TokenT> controller(rval, m_state, m_state_stack);
2384 regex.callback(saved, first, last, regex.token, controller);
2385 if (controller.ignore_current_token_set()) {
2386 if (token)
2387 token->erase();
2388 return next_token(first, last, token);
2389 }
2390 }
2391 return rval;
2392 }
2393}
2394
2395namespace lexerimpl
2396{
2397
2398inline
2399bool find_acceptance_state(const node_set& eof_node_ids,
2400 const node_set& current_set,
2401 node_id_t& acceptance_node_id)
2402{
2403 for(node_set::const_iterator nsi = eof_node_ids.begin();
2404 nsi != eof_node_ids.end(); ++nsi)
2405 {
2406 node_id_t eof_node_id =*nsi;
2407 if (current_set.end() != current_set.find(eof_node_id))
2408 {
2409 // store the first one we come to as the
2410 // matched pattern
2411 acceptance_node_id = eof_node_id;
2412 // don't bother searching for more
2413 return true;
2414 }
2415 }
2416 return false;
2417}
2418
2419template <typename RegexListT, typename GrammarT>
b32b8144
FG
2420#ifdef BOOST_NO_AUTO_PTR
2421inline std::unique_ptr<node>
2422#else
7c673cae 2423inline std::auto_ptr<node>
b32b8144 2424#endif
7c673cae
FG
2425parse_regexes(const RegexListT& regex_list, GrammarT& g)
2426{
2427 // parse the expressions into a tree
2428 if (regex_list.begin() == regex_list.end())
2429 boost::throw_exception(bad_regex());
2430
2431 typename RegexListT::const_iterator ri = regex_list.begin();
b32b8144
FG
2432#ifdef BOOST_NO_AUTO_PTR
2433 std::unique_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
2434#else
7c673cae 2435 std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
b32b8144 2436#endif
7c673cae
FG
2437 if (tree.get() == 0)
2438 boost::throw_exception(bad_regex());
2439
2440 ++ri;
2441 for (/**/; ri != regex_list.end(); ++ri)
2442 {
b32b8144
FG
2443#ifdef BOOST_NO_AUTO_PTR
2444 std::unique_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
2445#else
7c673cae 2446 std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
b32b8144 2447#endif
7c673cae
FG
2448 if (next_tree.get() == 0)
2449 boost::throw_exception(bad_regex());
b32b8144
FG
2450#ifdef BOOST_NO_AUTO_PTR
2451 tree = std::unique_ptr<node>(new or_node(tree.release(), next_tree.release()));
2452#else
2453 tree = std::auto_ptr<node>(new or_node(tree.release(), next_tree.release()));
2454#endif
7c673cae
FG
2455 }
2456 return tree;
2457}
2458
2459} //namespace lexerimpl
2460
2461template <typename IteratorT, typename TokenT, typename CallbackT>
2462inline void
2463lexer<IteratorT, TokenT, CallbackT>::create_dfa()
2464{
2465 m_dfa.resize(m_num_states);
2466 for (unsigned int i = 0; i < m_num_states; ++i)
2467 create_dfa_for_state(i);
2468}
2469
2470// Algorithm from Compilers: Principles, Techniques, and Tools p. 141
2471template <typename IteratorT, typename TokenT, typename CallbackT>
2472inline void
2473lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
2474{
2475 using lexerimpl::node;
b32b8144
FG
2476#ifdef BOOST_NO_AUTO_PTR
2477 std::unique_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
2478#else
7c673cae 2479 std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
b32b8144 2480#endif
7c673cae
FG
2481 node_id_t dummy = 0;
2482 tree->assign_node_ids(dummy);
2483
2484#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2485 tree->dump(std::cout);
2486#endif
2487
2488 // compute followpos(root)
2489 followpos_t followpos;
2490 tree->compute_followpos(followpos);
2491
2492 // the dfa states <-> nfa state groups
2493 std::map<node_set, node_id_t> dstates1;
2494 std::map<node_id_t, node_set> dstates2;
2495
2496 // the dfa transitions
2497 m_dfa[state].transition_table.push_back(
2498 std::vector<node_id_t>(256, invalid_node));
2499 m_dfa[state].acceptance_index.push_back(invalid_node);
2500
2501 // whether the dfa state has been processed yet
2502 std::vector<node_id_t> marked;
2503
2504 // used to give a unique id to each dfa state
2505 node_id_t num_states = 0;
2506
2507 // initially, the only unmarked state in Dstates is firstpos(root).
2508 marked.push_back(0);
2509 node_set fpr = tree->firstpos();
2510 dstates1[fpr] = 0;
2511 dstates2[0] = fpr;
2512 state_match_t state_match;
2513 tree->compute_state_match(state_match);
2514
2515 if (m_case_insensitive)
2516 lexerimpl::make_case_insensitive(state_match);
2517
2518 node_set eof_node_ids;
2519 tree->get_eof_ids(eof_node_ids);
2520 // translate the eof_node_ids into a 0-based index
2521 std::map<node_id_t, node_id_t> eof_node_id_map;
2522 unsigned int x = 0;
2523 for (node_set::iterator node_id_it = eof_node_ids.begin();
2524 node_id_it != eof_node_ids.end();
2525 ++node_id_it)
2526 {
2527 eof_node_id_map[*node_id_it] = x++;
2528 }
2529
2530 // figure out if this is an acceptance state
2531 node_id_t eof_node_id;
2532 if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
2533 {
2534 m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
2535 }
2536
2537 std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
2538 node_id_t(0));
2539 while (marked.end() != i)
2540 {
2541 *i = 1;
2542 node_id_t T = node_id_t(std::distance(marked.begin(), i));
2543 BOOST_ASSERT(T < dstates2.size());
2544 node_set Tstates = dstates2[T];
2545 for (node_id_t j = 0; j < 256; ++j)
2546 {
2547 node_set U;
2548 for (node_set::iterator k = Tstates.begin();
2549 k != Tstates.end(); ++k)
2550 {
2551 node_id_t p =*k;
2552 BOOST_ASSERT(p < state_match.size());
2553 BOOST_ASSERT(j < state_match[p].size());
2554 if (state_match[p][j])
2555 {
2556 node_set fpp = followpos[p];
2557 U.insert(fpp.begin(), fpp.end());
2558 }
2559 }
2560 if (U.size() > 0)
2561 {
2562 std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
2563 node_id_t target_state;
2564 if (l == dstates1.end()) // not in the states yet
2565 {
2566 ++num_states;
2567 dstates1[U] = target_state = num_states;
2568 dstates2[target_state] = U;
2569 marked.push_back(0);
2570 m_dfa[state].transition_table.push_back(
2571 std::vector<node_id_t>(256, invalid_node));
2572 m_dfa[state].acceptance_index.push_back(invalid_node);
2573 // figure out if this is an acceptance state
2574 node_id_t eof_node_id;
2575 if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
2576 {
2577 m_dfa[state].acceptance_index[target_state] =
2578 eof_node_id_map[eof_node_id];
2579 }
2580 }
2581 else
2582 {
2583 target_state = dstates1[U];
2584 }
2585
2586 BOOST_ASSERT(T < m_dfa[state].transition_table.size());
2587 BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
2588 m_dfa[state].transition_table[T][j] = target_state;
2589 }
2590
2591 }
2592
2593 i = std::find(marked.begin(), marked.end(), node_id_t(0));
2594 }
2595 m_compiled_dfa = true;
2596}
2597
2598template <typename IteratorT, typename TokenT, typename CallbackT>
2599inline void
2600lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
2601{
2602 m_case_insensitive = insensitive;
2603}
2604
2605
2606#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2607template <typename IteratorT, typename TokenT, typename CallbackT>
2608inline void
2609lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
2610{
2611 for (unsigned x = 0; x < m_dfa.size(); ++x)
2612 {
2613 out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
2614 for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
2615 {
2616 out << "state " << i << ":";
2617 for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
2618 {
2619 if (m_dfa[x].transition_table[i][j] != invalid_node)
2620 out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
2621 }
2622 out << "\n";
2623 }
2624 out << "acceptance states: ";
2625 for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
2626 {
2627 if (m_dfa[x].acceptance_index[k] != invalid_node)
2628 out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
2629 }
2630 out << endl;
2631 }
2632}
2633#endif
2634
2635///////////////////////////////////////////////////////////////////////////////
2636// load the lexer tables
2637#define slex_in(strm, val) \
2638 strm.read((char*)&val, sizeof(val)); \
2639 if (std::ios::goodbit != strm.rdstate()) return false
2640
2641template <typename IteratorT, typename TokenT, typename CallbackT>
2642inline bool
2643lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
2644{
2645// ensure correct signature and version
2646long signature = 0;
2647
2648 slex_in (in, signature);
2649 if (signature != SLEX_SIGNATURE)
2650 return false; // not for us
2651
2652long version = 0;
2653
2654 slex_in (in, version);
2655 if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
2656 return false; // to new for us
2657
2658long uid = 0;
2659
2660 slex_in (in, uid);
2661 if (uid != unique_id)
2662 return false; // not saved by us
2663
2664// load auxiliary members
2665int num_states = 0;
2666
2667 slex_in (in, num_states);
2668
2669// load the dfa tables
2670dfa_t in_dfa;
2671std::size_t dfa_size = 0;
2672
2673 slex_in (in, dfa_size);
2674 in_dfa.resize(dfa_size);
2675 for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
2676 {
2677 // load the transition tables
2678 std::size_t tt_size = 0;
2679 transition_table_t &tt_table = in_dfa[dfa].transition_table;
2680
2681 slex_in (in, tt_size);
2682 tt_table.resize(tt_size);
2683 for (std::size_t tt = 0; tt < tt_size; ++tt)
2684 {
2685 std::size_t nt_size = 0;
2686 node_table_t &nt_table = tt_table[tt];
2687
2688 slex_in (in, nt_size);
2689 nt_table.resize(nt_size);
2690 for (std::size_t nt = 0; nt < nt_size; ++nt)
2691 {
2692 slex_in (in, nt_table[nt]);
2693 }
2694 }
2695
2696 // load the acceptance index table
2697 std::size_t ai_size = 0;
2698 node_table_t &ai_table = in_dfa[dfa].acceptance_index;
2699
2700 slex_in (in, ai_size);
2701 ai_table.resize(ai_size);
2702 for (std::size_t ai = 0; ai < ai_size; ++ai)
2703 {
2704 slex_in (in, ai_table[ai]);
2705 }
2706 }
2707
2708 m_dfa.swap(in_dfa); // success, swap in the read values
2709 m_num_states = num_states;
2710
2711 m_compiled_dfa = true;
2712 return true;
2713}
2714
2715#undef slex_in
2716
2717///////////////////////////////////////////////////////////////////////////////
2718// save the lexer tables
2719#define slex_out(strm, val) \
2720 strm.write((char*)&val, sizeof(val)); \
2721 if (std::ios::goodbit != strm.rdstate()) return false
2722
2723template <typename IteratorT, typename TokenT, typename CallbackT>
2724inline bool
2725lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
2726{
2727// save signature and version information
2728long out_long = SLEX_SIGNATURE;
2729
2730 slex_out(out, out_long);
2731 out_long = SLEX_VERSION_100;
2732 slex_out(out, out_long);
2733 slex_out(out, unique_id);
2734
2735// save auxiliary members
2736 slex_out(out, m_num_states);
2737
2738// save the dfa tables
2739 typedef typename dfa_t::const_iterator dfa_iter_t;
2740 typedef transition_table_t::const_iterator transition_table_iter_t;
2741 typedef node_table_t::const_iterator node_table_iter_t;
2742
2743 std::size_t out_size_t = m_dfa.size();
2744 slex_out(out, out_size_t);
2745
2746 dfa_iter_t end = m_dfa.end();
2747 for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
2748 {
2749 // save the transition table
2750 out_size_t = (*it).transition_table.size();
2751 slex_out(out, out_size_t);
2752
2753 transition_table_iter_t tt_end = (*it).transition_table.end();
2754 for (transition_table_iter_t tt_it = (*it).transition_table.begin();
2755 tt_it != tt_end;
2756 ++tt_it)
2757 {
2758 out_size_t = (*tt_it).size();
2759 slex_out(out, out_size_t);
2760
2761 node_table_iter_t nt_end = (*tt_it).end();
2762 for (node_table_iter_t nt_it = (*tt_it).begin();
2763 nt_it != nt_end;
2764 ++nt_it)
2765 {
2766 slex_out(out, (*nt_it));
2767 }
2768 }
2769
2770 // save the acceptance index table
2771 out_size_t = (*it).acceptance_index.size();
2772 slex_out(out, out_size_t);
2773
2774 node_table_iter_t nt_end = (*it).acceptance_index.end();
2775 for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
2776 nt_it != nt_end;
2777 ++nt_it)
2778 {
2779 slex_out(out, (*nt_it));
2780 }
2781 }
2782 return true;
2783}
2784
2785#undef slex_out
2786
2787/*
2788a lexer_control object supports some operations on the lexer.
2789 get current lexer state
2790 set state
2791 terminate
2792 state stack (push, pop, top)
2793 set new token return value
2794 ignore the current token
2795 yymore
2796 get length of matched token
2797*/
2798template <typename TokenT>
2799class lexer_control
2800{
2801public:
2802
2803 lexer_control(TokenT& token, unsigned int& current_state,
2804 std::stack<unsigned int>& state_stack);
2805 // functions dealing with the lexer state
2806
2807 // set the state to state
2808 void set_state(unsigned int state);
2809
2810 // get the current state
2811 unsigned int get_state();
2812
2813 // pushes the current state onto the top of the state stack and
2814 // switches to new_state
2815 void push_state(unsigned int new_state);
2816
2817 // pops the top of the state stack and switches to it.
2818 void pop_state();
2819
2820 // returns the top of the stack without altering the stack's contents.
2821 unsigned int top_state();
2822
2823 // functions dealing with the token returned.
2824
2825 // set a new TokenT return value, overriding that one that was
2826 // registered via. register_regex()
2827 void set_token(const TokenT& token);
2828
2829 // tell the lexer to return an invalid token, signifying termination.
2830 void terminate();
2831
2832 // ignore the current token, and move on to the next one. The current
2833 // token will NOT be returned from next_token()
2834 void ignore_current_token();
2835
2836 // returns true if ignore_current_token() has been called,
2837 // false otherwise.
2838 bool ignore_current_token_set();
2839
2840private:
2841 TokenT& m_token;
2842 bool m_ignore_current_token;
2843 unsigned int& m_current_state;
2844 std::stack<unsigned int>& m_state_stack;
2845};
2846
2847template <typename TokenT>
2848inline
2849lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
2850 std::stack<unsigned int>& state_stack)
2851 : m_token(token)
2852 , m_ignore_current_token(false)
2853 , m_current_state(current_state)
2854 , m_state_stack(state_stack)
2855{
2856}
2857
2858template <typename TokenT>
2859inline void
2860lexer_control<TokenT>::set_state(unsigned int state)
2861{
2862 m_current_state = state;
2863}
2864
2865template <typename TokenT>
2866inline unsigned int
2867lexer_control<TokenT>::get_state()
2868{
2869 return m_current_state;
2870}
2871
2872template <typename TokenT>
2873inline void
2874lexer_control<TokenT>::push_state(unsigned int new_state)
2875{
2876 m_state_stack.push(m_current_state);
2877 m_current_state = new_state;
2878}
2879
2880template <typename TokenT>
2881inline void
2882lexer_control<TokenT>::pop_state()
2883{
2884 m_current_state = m_state_stack.top();
2885 m_state_stack.pop();
2886}
2887
2888template <typename TokenT>
2889inline unsigned int
2890lexer_control<TokenT>::top_state()
2891{
2892 return m_state_stack.top();
2893}
2894
2895template <typename TokenT>
2896inline void
2897lexer_control<TokenT>::set_token(const TokenT& token)
2898{
2899 m_token = token;
2900}
2901
2902template <typename TokenT>
2903inline void
2904lexer_control<TokenT>::terminate()
2905{
2906 m_token = -1; // TOOD: fix this, need to be determined by traits
2907}
2908
2909template <typename TokenT>
2910inline void
2911lexer_control<TokenT>::ignore_current_token()
2912{
2913 m_ignore_current_token = true;
2914}
2915
2916template <typename TokenT>
2917inline bool
2918lexer_control<TokenT>::ignore_current_token_set()
2919{
2920 return m_ignore_current_token;
2921}
2922
2923} // namespace classic
2924} // namespace spirit
2925} // namespace boost
2926
2927#undef BOOST_SPIRIT_IT_NS
2928
2929#endif
2930