]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / libs / wave / samples / cpp_tokens / slex / lexer.hpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Spirit based lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001, Daniel C. Nuffer.
9 Copyright (c) 2001-2012 Hartmut Kaiser.
10 Distributed under the Boost Software License, Version 1.0. (See accompanying
11 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
12
13 TODO List:
14 X callback objects (called when a match is made.)
15 X callback passed first & last iterator, and
16 a reference to a lexercontrol object that supports some
17 operations on the lexer.
18 set state
19 terminate
20 state stack (push, pop, top)
21 set new token return value
22 ignore the current token
23 yymore
24 get length of matched token
25 get current lexer state
26 X DFA serialization to save recomputing the DFA.
27
28 lexer states.
29 organize the file into hpp and ipp. arrange the classes in a logical order.
30 use buffering - iterator only needs be an input iterator,
31 lexer & callback are not templatized on iterator type, but char type.
32 next_token is templatized on iterator type.
33 beginning/ending contexts.
34 ^ and $
35 DFA minimization.
36 DFA table compression.
37
38=============================================================================*/
39#ifndef BOOST_SPIRIT_LEXER_HPP
40#define BOOST_SPIRIT_LEXER_HPP
41
42///////////////////////////////////////////////////////////////////////////////
92f5a8d4 43#include <boost/config.hpp>
7c673cae
FG
44#include <boost/throw_exception.hpp>
45
46#include <boost/spirit/include/classic_core.hpp>
47#include <boost/spirit/include/classic_symbols.hpp>
48#include <boost/spirit/include/classic_chset.hpp>
49#include <boost/spirit/include/classic_escape_char.hpp>
50
51#include <set>
52#include <map>
92f5a8d4 53#include <memory> // for auto_ptr/unique_ptr
7c673cae
FG
54#include <vector>
55#include <stack>
56#include <utility> // for pair
20effc67 57#if defined(BOOST_SPIRIT_DEBUG)
7c673cae 58#include <iostream>
20effc67 59#endif
7c673cae
FG
60#include <fstream>
61#include <boost/assert.hpp>
62#include <boost/limits.hpp>
63
64#if defined(BOOST_NO_STD_ITERATOR_TRAITS)
65#define BOOST_SPIRIT_IT_NS impl
66#else
67#define BOOST_SPIRIT_IT_NS std
68#endif
69
70///////////////////////////////////////////////////////////////////////////////
71namespace boost {
72namespace spirit {
73namespace classic {
74
75typedef unsigned char uchar;
76typedef unsigned int node_id_t;
77const node_id_t invalid_node = node_id_t(-1);
78typedef std::set<node_id_t> node_set;
79typedef std::vector<uchar> uchar_vector;
80typedef std::map<node_id_t, node_set> followpos_t;
81typedef std::vector<uchar_vector> state_match_t;
82
83template <typename TokenT>
84class lexer_control;
85
86class bad_regex : public std::exception
87{
88};
89
90namespace lexerimpl
91{
92
93class node
94{
95public:
96
97 virtual ~node() {}
98
99 virtual node* clone() const = 0;
100 virtual bool nullable() const = 0;
101 virtual node_set firstpos() const = 0;
102 virtual node_set lastpos() const = 0;
103 virtual void compute_followpos(followpos_t& followpos) const = 0;
104 virtual void compute_state_match(state_match_t& state_match) const = 0;
105 virtual void get_eof_ids(node_set& eof_set) const = 0;
106 virtual void assign_node_ids(node_id_t& node_count) = 0;
107#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
108 virtual void dump(std::ostream& out) const = 0;
109#endif
110
111};
112
113class char_node : public node
114{
115
116public:
117
118 char_node(const uchar c);
119 char_node(const char_node& x);
120 virtual ~char_node(){}
121
20effc67
TL
122 node* clone() const BOOST_OVERRIDE;
123 bool nullable() const BOOST_OVERRIDE;
124 node_set firstpos() const BOOST_OVERRIDE;
125 node_set lastpos() const BOOST_OVERRIDE;
126 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
127 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
128 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
129 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 130#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 131 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
132#endif
133
134private:
135
136 uchar m_char;
137 node_id_t m_node_num;
138};
139
140inline
141char_node::char_node(const uchar c)
142 : node()
143 , m_char(c)
144 , m_node_num(0)
145{
146}
147
148inline
149char_node::char_node(const char_node& x)
150 : node(x)
151 , m_char(x.m_char)
152 , m_node_num(x.m_node_num)
153{
154}
155
156inline node *
157char_node::clone() const
158{
159 return new char_node(*this);
160}
161
162inline bool
163char_node::nullable() const
164{
165 return false;
166}
167
168inline node_set
169char_node::firstpos() const
170{
171 node_set rval;
172 rval.insert(m_node_num);
173 return rval;
174}
175
176inline node_set
177char_node::lastpos() const
178{
179 return firstpos();
180}
181
182inline void
183char_node::compute_followpos(followpos_t&) const
184{
185 return;
186}
187
188inline void
189char_node::compute_state_match(state_match_t& state_match) const
190{
191 if (state_match.size() < m_node_num + 1)
192 state_match.resize(m_node_num + 1);
193 state_match[m_node_num].resize(256);
194 state_match[m_node_num][m_char] = 1;
195}
196
197inline void
198char_node::get_eof_ids(node_set&) const
199{
200 return;
201}
202
203inline void
204char_node::assign_node_ids(node_id_t& node_count)
205{
206 m_node_num = node_count++;
207}
208
209#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
210inline void
211char_node::dump(std::ostream& out) const
212{
213 out << "\nchar_node m_char = " << m_char;
214 out << " m_node_num = " << m_node_num;
215 out << " nullable() = " << (nullable() ? "true" : "false");
216 out << " firstpos() = ";
217 node_set fp = firstpos();
218 std::copy(fp.begin(), fp.end(),
219 std::ostream_iterator<node_id_t>(out, ","));
220
221 out << " lastpos() = ";
222 node_set lp = lastpos();
223 std::copy(lp.begin(), lp.end(),
224 std::ostream_iterator<node_id_t>(out, ","));
225}
226#endif
227
228
229class epsilon_node : public node
230{
231
232public:
233
234 epsilon_node();
235 epsilon_node(const epsilon_node& x);
236 virtual ~epsilon_node(){}
237
20effc67
TL
238 node* clone() const BOOST_OVERRIDE;
239 bool nullable() const BOOST_OVERRIDE;
240 node_set firstpos() const BOOST_OVERRIDE;
241 node_set lastpos() const BOOST_OVERRIDE;
242 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
243 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
244 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
245 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 246#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 247 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
248#endif
249
250private:
251
252 node_id_t m_node_num;
253};
254
255inline
256epsilon_node::epsilon_node()
257 : node()
258 , m_node_num(0)
259{
260}
261
262inline
263epsilon_node::epsilon_node(const epsilon_node& x)
264 : node(x)
265 , m_node_num(x.m_node_num)
266{
267}
268
269inline node *
270epsilon_node::clone() const
271{
272 return new epsilon_node(*this);
273}
274
275inline bool
276epsilon_node::nullable() const
277{
278 return true;
279}
280
281inline node_set
282epsilon_node::firstpos() const
283{
284 return node_set();
285}
286
287inline node_set
288epsilon_node::lastpos() const
289{
290 return node_set();
291}
292
293inline void
294epsilon_node::compute_followpos(followpos_t&) const
295{
296 return;
297}
298
299inline void
300epsilon_node::compute_state_match(state_match_t& state_match) const
301{
302 if (state_match.size() < m_node_num + 1)
303 state_match.resize(m_node_num + 1);
304 state_match[m_node_num].resize(256, 1);
305}
306
307inline void
308epsilon_node::get_eof_ids(node_set&) const
309{
310 return;
311}
312
313inline void
314epsilon_node::assign_node_ids(node_id_t& node_count)
315{
316 m_node_num = node_count++;
317}
318
319#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
320inline void
321epsilon_node::dump(std::ostream& out) const
322{
323 out << "\nepsilon_node";
324 out << " m_node_num = " << m_node_num;
325 out << " nullable() = " << (nullable() ? "true" : "false");
326 out << " firstpos() = ";
327 node_set fp = firstpos();
328 std::copy(fp.begin(), fp.end(),
329 std::ostream_iterator<node_id_t>(out, ","));
330
331 out << " lastpos() = ";
332 node_set lp = lastpos();
333 std::copy(lp.begin(), lp.end(),
334 std::ostream_iterator<node_id_t>(out, ","));
335}
336#endif
337
338
339class or_node : public node
340{
341
342public:
343
344 or_node(node* left, node* right);
345 or_node(const or_node& x);
346 virtual ~or_node(){}
347
20effc67
TL
348 node* clone() const BOOST_OVERRIDE;
349 bool nullable() const BOOST_OVERRIDE;
350 node_set firstpos() const BOOST_OVERRIDE;
351 node_set lastpos() const BOOST_OVERRIDE;
352 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
353 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
354 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
355 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 356#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 357 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
358#endif
359
360private:
361
92f5a8d4 362#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
363 std::unique_ptr<node> m_left;
364 std::unique_ptr<node> m_right;
365#else
7c673cae
FG
366 std::auto_ptr<node> m_left;
367 std::auto_ptr<node> m_right;
b32b8144 368#endif
7c673cae
FG
369};
370
371inline
372or_node::or_node(node* left, node* right)
373 : node()
374 , m_left(left)
375 , m_right(right)
376{
377}
378
379inline
380or_node::or_node(const or_node& x)
381 : node(x)
382 , m_left(x.m_left->clone())
383 , m_right(x.m_right->clone())
384{
385}
386
387inline node *
388or_node::clone() const
389{
390 return new or_node(m_left->clone(), m_right->clone());
391}
392
393inline bool
394or_node::nullable() const
395{
396 return m_left->nullable() || m_right->nullable();
397}
398
399inline node_set
400or_node::firstpos() const
401{
402 node_set rval;
403 node_set l = m_left->firstpos();
404 node_set r = m_right->firstpos();
405 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
406 std::inserter(rval, rval.begin()));
407 return rval;
408}
409
410inline node_set
411or_node::lastpos() const
412{
413 node_set rval;
414 node_set l = m_left->lastpos();
415 node_set r = m_right->lastpos();
416 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
417 std::inserter(rval, rval.begin()));
418 return rval;
419}
420
421inline void
422or_node::compute_followpos(followpos_t& followpos) const
423{
424 m_left->compute_followpos(followpos);
425 m_right->compute_followpos(followpos);
426}
427
428inline void
429or_node::compute_state_match(state_match_t& state_match) const
430{
431 m_left->compute_state_match(state_match);
432 m_right->compute_state_match(state_match);
433}
434
435inline void
436or_node::get_eof_ids(node_set& eof_nodes) const
437{
438 m_left->get_eof_ids(eof_nodes);
439 m_right->get_eof_ids(eof_nodes);
440}
441
442inline void
443or_node::assign_node_ids(node_id_t& node_count)
444{
445 m_left->assign_node_ids(node_count);
446 m_right->assign_node_ids(node_count);
447}
448
449#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
450inline void
451or_node::dump(std::ostream& out) const
452{
453 m_left->dump(out);
454
455 out << "\nor_node";
456 out << " nullable() = " << (nullable() ? "true" : "false");
457 out << " firstpos() = ";
458 node_set fp = firstpos();
459 std::copy(fp.begin(), fp.end(),
460 std::ostream_iterator<node_id_t>(out, ","));
461
462 out << " lastpos() = ";
463 node_set lp = lastpos();
464 std::copy(lp.begin(), lp.end(),
465 std::ostream_iterator<node_id_t>(out, ","));
466
467 m_right->dump(out);
468}
469#endif
470
471
472class cat_node : public node
473{
474
475public:
476
477 cat_node(node* left, node* right);
478 cat_node(const cat_node& x);
479 virtual ~cat_node(){}
480
20effc67
TL
481 node* clone() const BOOST_OVERRIDE;
482 bool nullable() const BOOST_OVERRIDE;
483 node_set firstpos() const BOOST_OVERRIDE;
484 node_set lastpos() const BOOST_OVERRIDE;
485 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
486 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
487 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
488 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 489#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 490 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
491#endif
492
493private:
494
92f5a8d4 495#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
496 std::unique_ptr<node> m_left;
497 std::unique_ptr<node> m_right;
498#else
7c673cae
FG
499 std::auto_ptr<node> m_left;
500 std::auto_ptr<node> m_right;
b32b8144 501#endif
7c673cae
FG
502};
503
504inline
505cat_node::cat_node(node* left, node* right)
506 : node()
507 , m_left(left)
508 , m_right(right)
509{
510}
511
512inline
513cat_node::cat_node(const cat_node& x)
514 : node(x)
515 , m_left(x.m_left->clone())
516 , m_right(x.m_right->clone())
517{
518}
519
520inline node *
521cat_node::clone() const
522{
523 return new cat_node(m_left->clone(), m_right->clone());
524}
525
526inline bool
527cat_node::nullable() const
528{
529 return m_left->nullable() && m_right->nullable();
530}
531
532inline node_set
533cat_node::firstpos() const
534{
535 if (m_left->nullable())
536 {
537 node_set rval;
538 node_set l = m_left->firstpos();
539 node_set r = m_right->firstpos();
540 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
541 std::inserter(rval, rval.begin()));
542 return rval;
543 }
544 else
545 {
546 return m_left->firstpos();
547 }
548}
549
550inline node_set
551cat_node::lastpos() const
552{
553 if (m_right->nullable())
554 {
555 node_set rval;
556 node_set l = m_left->lastpos();
557 node_set r = m_right->lastpos();
558 std::set_union(l.begin(), l.end(), r.begin(), r.end(),
559 std::inserter(rval, rval.begin()));
560 return rval;
561 }
562 else
563 {
564 return m_right->lastpos();
565 }
566}
567
568inline void
569cat_node::compute_followpos(followpos_t& followpos) const
570{
571 node_set l = m_left->lastpos();
572 for (node_set::iterator i = l.begin();
573 i != l.end();
574 ++i)
575 {
576 node_set rf = m_right->firstpos();
577 followpos[*i].insert(rf.begin(), rf.end());
578 }
579
580 m_left->compute_followpos(followpos);
581 m_right->compute_followpos(followpos);
582}
583
584inline void
585cat_node::compute_state_match(state_match_t& state_match) const
586{
587 m_left->compute_state_match(state_match);
588 m_right->compute_state_match(state_match);
589}
590
591inline void
592cat_node::get_eof_ids(node_set& eof_nodes) const
593{
594 m_left->get_eof_ids(eof_nodes);
595 m_right->get_eof_ids(eof_nodes);
596}
597
598inline void
599cat_node::assign_node_ids(node_id_t& node_count)
600{
601 m_left->assign_node_ids(node_count);
602 m_right->assign_node_ids(node_count);
603}
604
605#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
606inline void
607cat_node::dump(std::ostream& out) const
608{
609 m_left->dump(out);
610
611 out << "\ncat_node";
612 out << " nullable() = " << (nullable() ? "true" : "false");
613 out << " firstpos() = ";
614 node_set fp = firstpos();
615 std::copy(fp.begin(), fp.end(),
616 std::ostream_iterator<node_id_t>(out, ","));
617
618 out << " lastpos() = ";
619 node_set lp = lastpos();
620 std::copy(lp.begin(), lp.end(),
621 std::ostream_iterator<node_id_t>(out, ","));
622
623 m_right->dump(out);
624}
625#endif
626
627
628class star_node : public node
629{
630
631public:
632
633 star_node(node* left);
634 star_node(const star_node& x);
635 virtual ~star_node(){}
636
20effc67
TL
637 node* clone() const BOOST_OVERRIDE;
638 bool nullable() const BOOST_OVERRIDE;
639 node_set firstpos() const BOOST_OVERRIDE;
640 node_set lastpos() const BOOST_OVERRIDE;
641 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
642 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
643 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
644 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 645#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 646 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
647#endif
648
649private:
650
92f5a8d4 651#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
652 std::unique_ptr<node> m_left;
653#else
7c673cae 654 std::auto_ptr<node> m_left;
b32b8144 655#endif
7c673cae
FG
656};
657
658inline
659star_node::star_node(node* left)
660 : node()
661 , m_left(left)
662{
663}
664
665inline
666star_node::star_node(const star_node& x)
667 : node(x)
668 , m_left(x.m_left->clone())
669{
670}
671
672inline node *
673star_node::clone() const
674{
675 return new star_node(m_left->clone());
676}
677
678inline bool
679star_node::nullable() const
680{
681 return true;
682}
683
684inline node_set
685star_node::firstpos() const
686{
687 return m_left->firstpos();
688}
689
690inline node_set
691star_node::lastpos() const
692{
693 return m_left->lastpos();
694}
695
696inline void
697star_node::compute_followpos(followpos_t& followpos) const
698{
699 node_set lp = this->lastpos();
700 for (node_set::iterator i = lp.begin();
701 i != lp.end();
702 ++i)
703 {
704 node_set fp = this->firstpos();
705 followpos[*i].insert(fp.begin(), fp.end());
706 }
707
708 m_left->compute_followpos(followpos);
709}
710
711inline void
712star_node::compute_state_match(state_match_t& state_match) const
713{
714 m_left->compute_state_match(state_match);
715}
716
717inline void
718star_node::get_eof_ids(node_set& eof_nodes) const
719{
720 m_left->get_eof_ids(eof_nodes);
721}
722
723inline void
724star_node::assign_node_ids(node_id_t& node_count)
725{
726 m_left->assign_node_ids(node_count);
727}
728
729#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
730inline void
731star_node::dump(std::ostream& out) const
732{
733 m_left->dump(out);
734
735 out << "\nstar_node";
736 out << " nullable() = " << (nullable() ? "true" : "false");
737 out << " firstpos() = ";
738 node_set fp = firstpos();
739 std::copy(fp.begin(), fp.end(),
740 std::ostream_iterator<node_id_t>(out, ","));
741
742 out << " lastpos() = ";
743 node_set lp = lastpos();
744 std::copy(lp.begin(), lp.end(),
745 std::ostream_iterator<node_id_t>(out, ","));
746
747}
748#endif
749
750
751class eof_node : public node
752{
753
754public:
755
756 eof_node();
757 eof_node(const eof_node& x);
758 virtual ~eof_node(){}
759
20effc67
TL
760 node* clone() const BOOST_OVERRIDE;
761 bool nullable() const BOOST_OVERRIDE;
762 node_set firstpos() const BOOST_OVERRIDE;
763 node_set lastpos() const BOOST_OVERRIDE;
764 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
765 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
766 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
767 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 768#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 769 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
770#endif
771
772private:
773
774 node_id_t m_node_num;
775};
776
777inline
778eof_node::eof_node()
779 : node()
780 , m_node_num(0)
781{
782}
783
784inline
785eof_node::eof_node(const eof_node& x)
786 : node(x)
787 , m_node_num(x.m_node_num)
788{
789}
790
791inline node *
792eof_node::clone() const
793{
794 return new eof_node(*this);
795}
796
797inline bool
798eof_node::nullable() const
799{
800 return false;
801}
802
803inline node_set
804eof_node::firstpos() const
805{
806 node_set rval;
807 rval.insert(m_node_num);
808 return rval;
809}
810
811inline node_set
812eof_node::lastpos() const
813{
814 node_set rval;
815 rval.insert(m_node_num);
816 return rval;
817}
818
819inline void
820eof_node::compute_followpos(followpos_t&) const
821{
822 return;
823}
824
825inline void
826eof_node::compute_state_match(state_match_t& state_match) const
827{
828 if (state_match.size() < m_node_num + 1)
829 state_match.resize(m_node_num + 1);
830 state_match[m_node_num].resize(256, 0);
831}
832
833inline void
834eof_node::get_eof_ids(node_set& eof_nodes) const
835{
836 eof_nodes.insert(m_node_num);
837}
838
839inline void
840eof_node::assign_node_ids(node_id_t& node_count)
841{
842 m_node_num = node_count++;
843}
844
845#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
846inline void
847eof_node::dump(std::ostream& out) const
848{
849 out << "\neof_node";
850 out << " m_node_num = " << m_node_num;
851 out << " nullable() = " << (nullable() ? "true" : "false");
852 out << " firstpos() = ";
853 node_set fp = firstpos();
854 std::copy(fp.begin(), fp.end(),
855 std::ostream_iterator<node_id_t>(out, ","));
856
857 out << " lastpos() = ";
858 node_set lp = lastpos();
859 std::copy(lp.begin(), lp.end(),
860 std::ostream_iterator<node_id_t>(out, ","));
861}
862#endif
863
864class ccl_node : public node
865{
866
867public:
868
869 ccl_node(const std::vector<uchar>& v);
870 ccl_node(const uchar c1, const uchar c2);
871 ccl_node(const ccl_node& x);
872 virtual ~ccl_node(){}
873
20effc67
TL
874 node* clone() const BOOST_OVERRIDE;
875 bool nullable() const BOOST_OVERRIDE;
876 node_set firstpos() const BOOST_OVERRIDE;
877 node_set lastpos() const BOOST_OVERRIDE;
878 void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
879 void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
880 void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
881 void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
7c673cae 882#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
20effc67 883 void dump(std::ostream& out) const BOOST_OVERRIDE;
7c673cae
FG
884#endif
885
886private:
887
888 std::vector<uchar> m_match;
889 node_id_t m_node_num;
890};
891
892inline
893ccl_node::ccl_node(const std::vector<uchar>& v)
894 : node()
895 , m_match(v)
896 , m_node_num(0)
897{
898 m_match.resize(256); // make sure it's the right size
899}
900
901inline
902ccl_node::ccl_node(const uchar c1, const uchar c2)
903 : node()
904 , m_match(256, uchar(0))
905 , m_node_num(0)
906{
907 BOOST_ASSERT(c1 < c2);
908 for (std::size_t i = c1; i <= std::size_t(c2); ++i)
909 {
910 m_match[i] = 1;
911 }
912}
913
914inline
915ccl_node::ccl_node(const ccl_node& x)
916 : node(x)
917 , m_match(x.m_match)
918 , m_node_num(x.m_node_num)
919{
920}
921
922inline node *
923ccl_node::clone() const
924{
925 return new ccl_node(*this);
926}
927
928inline bool
929ccl_node::nullable() const
930{
931 return false;
932}
933
934inline node_set
935ccl_node::firstpos() const
936{
937 node_set rval;
938 rval.insert(m_node_num);
939 return rval;
940}
941
942inline node_set
943ccl_node::lastpos() const
944{
945 return firstpos();
946}
947
948inline void
949ccl_node::compute_followpos(followpos_t&) const
950{
951 return;
952}
953
954inline void
955ccl_node::compute_state_match(state_match_t& state_match) const
956{
957 if (state_match.size() < m_node_num + 1)
958 state_match.resize(m_node_num + 1);
959 state_match[m_node_num] = m_match;
960}
961
962inline void
963ccl_node::get_eof_ids(node_set&) const
964{
965 return;
966}
967
968inline void
969ccl_node::assign_node_ids(node_id_t& node_count)
970{
971 m_node_num = node_count++;
972}
973
974#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
975inline void
976ccl_node::dump(std::ostream& out) const
977{
978 out << "\nccl_node m_match = ";
979 for (std::size_t i = 0; i < m_match.size(); ++i)
980 {
981 if (m_match[i])
982 out << i << ", ";
983 }
984 out << " m_node_num = " << m_node_num;
985 out << " nullable() = " << (nullable() ? "true" : "false");
986 out << " firstpos() = ";
987 node_set fp = firstpos();
988 std::copy(fp.begin(), fp.end(),
989 std::ostream_iterator<node_id_t>(out, ","));
990
991 out << " lastpos() = ";
992 node_set lp = lastpos();
993 std::copy(lp.begin(), lp.end(),
994 std::ostream_iterator<node_id_t>(out, ","));
995}
996#endif
997
998template <typename ScannerT>
999class make_concat
1000{
1001 typedef typename ScannerT::iterator_t iterator_type;
1002
1003public:
1004
1005 make_concat(std::stack<node*>& the_stack)
1006 : m_stack(the_stack)
1007 {}
1008
1009 void operator()(iterator_type const &, iterator_type const &) const
1010 {
1011 node* right = m_stack.top();
1012 m_stack.pop();
1013 node* left = m_stack.top();
1014 m_stack.pop();
1015 node* newnode = new cat_node(left, right);
1016 m_stack.push(newnode);
1017 }
1018
1019 std::stack<node*>& m_stack;
1020};
1021
1022template <int CharTSize>
1023struct get_byte_aux;
1024
1025template<>
1026struct get_byte_aux<1>
1027{
1028 template <typename CharT>
1029 unsigned char operator()(CharT c, unsigned int byte)
1030 {
1031 BOOST_ASSERT(byte == 0);
1032 return c;
1033 }
1034};
1035
1036template<>
1037struct get_byte_aux<2>
1038{
1039 template <typename CharT>
1040 unsigned char operator()(CharT c, unsigned int byte)
1041 {
1042 static unsigned long mask[] =
1043 {
1044 0xFF00,
1045 0x00FF
1046 };
1047
1048 BOOST_ASSERT(byte < 2);
1049 return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1050 }
1051};
1052
1053template<>
1054struct get_byte_aux<4>
1055{
1056 template <typename CharT>
1057 unsigned char operator()(CharT c, unsigned int byte)
1058 {
1059 static unsigned long mask[] =
1060 {
1061 0xFF000000,
1062 0x00FF0000,
1063 0x0000FF00,
1064 0x000000FF
1065 };
1066
1067 BOOST_ASSERT(byte < 4);
1068 return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1069 }
1070};
1071
1072template <typename CharT>
1073inline unsigned char
1074get_byte(CharT c, unsigned int byte)
1075{
1076 return get_byte_aux<sizeof(c)>()(c, byte);
1077}
1078
1079template <typename ScannerT>
1080class make_star
1081{
1082 typedef typename ScannerT::iterator_t iterator_type;
1083
1084public:
1085 typedef
1086 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1087 char_t;
1088
1089 make_star(std::stack<node*>& the_stack)
1090 : m_stack(the_stack)
1091 {}
1092
1093 void operator()(const char_t) const
1094 {
1095 node* left = m_stack.top();
1096 m_stack.pop();
1097 node* newnode = new star_node(left);
1098 m_stack.push(newnode);
1099 }
1100
1101 std::stack<node*>& m_stack;
1102};
1103
1104template <typename ScannerT>
1105class make_or
1106{
1107 typedef typename ScannerT::iterator_t iterator_type;
1108
1109public:
1110
1111 make_or(std::stack<node*>& the_stack)
1112 : m_stack(the_stack)
1113 {}
1114
1115 void operator()(iterator_type const&, iterator_type const&) const
1116 {
1117 node* right = m_stack.top();
1118 m_stack.pop();
1119 node* left = m_stack.top();
1120 m_stack.pop();
1121 node* newnode = new or_node(left, right);
1122 m_stack.push(newnode);
1123 }
1124
1125 std::stack<node*>& m_stack;
1126};
1127
1128template <typename ScannerT>
1129class make_plus
1130{
1131 typedef typename ScannerT::iterator_t iterator_type;
1132
1133public:
1134 typedef
1135 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1136 char_t;
1137
1138 make_plus(std::stack<node*>& the_stack)
1139 : m_stack(the_stack)
1140 {}
1141
1142 void operator()(const char_t) const
1143 {
1144 node* left = m_stack.top();
1145 m_stack.pop();
1146
1147 node* copy = left->clone();
1148
1149 node* new_star = new star_node(copy);
1150 node* new_cat = new cat_node(left, new_star);
1151 m_stack.push(new_cat);
1152 }
1153
1154 std::stack<node*>& m_stack;
1155};
1156
1157template <typename ScannerT>
1158class make_optional
1159{
1160 typedef typename ScannerT::iterator_t iterator_type;
1161
1162public:
1163 typedef
1164 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1165 char_t;
1166
1167 make_optional(std::stack<node*>& the_stack)
1168 : m_stack(the_stack)
1169 {}
1170
1171 void operator()(const char_t) const
1172 {
1173 node* left = m_stack.top();
1174 m_stack.pop();
1175
1176 node* new_or = new or_node(left, new epsilon_node());
1177 m_stack.push(new_or);
1178 }
1179
1180 std::stack<node*>& m_stack;
1181};
1182
1183///////////////////////////////////////////////////////////////////////////////
1184// utility function
1185template <typename CharT>
1186inline utility::impl::range<CharT> const&
1187full_range()
1188{
1189 static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
1190 (std::numeric_limits<CharT>::max)());
1191 return full;
1192}
1193
1194namespace ccl_utils
1195{
1196 template <typename char_t>
1197 inline utility::impl::range_run<char_t>
1198 negate_range_run(
1199 const utility::impl::range_run<char_t>& rr)
1200 {
1201 utility::impl::range_run<char_t> newrr;
1202 newrr.set(full_range<char_t>());
1203 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1204 iter != rr.end(); ++iter)
1205 newrr.clear(*iter);
1206 return newrr;
1207 }
1208
1209 template <typename char_t>
1210 inline node*
1211 create_mb_node_seq(char_t c)
1212 {
1213 node* newnode = new char_node(get_byte(c, 0));
1214 for (unsigned int i = 1; i < sizeof(c); ++i)
1215 {
1216 node* cnode = new char_node(get_byte(c, i));
1217 node* top_node = new cat_node(newnode, cnode);
1218 newnode = top_node;
1219 }
1220 return newnode;
1221 }
1222
1223 template <typename char_t>
1224 inline void
1225 handle_mb_char(char_t c, bool first_time,
1226 std::stack<node*>& stack)
1227 {
1228 node* newnode = create_mb_node_seq(c);
1229
1230 if (first_time)
1231 {
1232 stack.push(newnode);
1233 }
1234 else
1235 {
1236 node* top = stack.top();
1237 stack.pop();
1238
1239 node* newtop = new or_node(top, newnode);
1240 stack.push(newtop);
1241 }
1242 }
1243
1244 // forward decl only
1245 template <typename char_t>
1246 inline void
1247 handle_mb_range(char_t c1, char_t c2, bool first_time,
1248 std::stack<node*>& stack);
1249
1250 template <typename char_t>
1251 inline void
1252 create_nodes(const utility::impl::range_run<char_t>& rr,
1253 std::stack<node*>& stack)
1254 {
1255
1256 if (sizeof(char_t) == 1)
1257 {
1258 std::vector<uchar> ccl;
1259 ccl.resize(256);
1260 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1261 iter != rr.end(); ++iter)
1262 {
1263 for (int i = iter->first; i <= iter->last; ++i)
1264 {
1265// this is always true because of the limited datatype
1266// BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
1267 ccl[uchar(i)] = 1;
1268 }
1269 }
1270
1271 node* new_ccl = new ccl_node(ccl);
1272 stack.push(new_ccl);
1273 }
1274 else
1275 {
1276 bool mb_first_time = true;
1277 for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1278 iter != rr.end(); ++iter)
1279 {
1280 if (iter->first == iter->last)
1281 {
1282 handle_mb_char(iter->first, mb_first_time, stack);
1283 }
1284 else
1285 {
1286 handle_mb_range(iter->first, iter->last, mb_first_time, stack);
1287 }
1288 mb_first_time = false;
1289 }
1290 }
1291 }
1292
1293 template <typename char_t>
1294 inline std::size_t
1295 compute_differing_byte(char_t c1, char_t c2)
1296 {
1297 std::size_t rval = 0;
1298 while (rval < sizeof(c1) &&
1299 get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
1300 {
1301 ++rval;
1302 }
1303 return rval;
1304 }
1305
1306 template <typename char_t>
1307 inline node*
1308 create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
1309 {
1310 std::size_t diff = get_byte(c2, (unsigned int)j) -
1311 get_byte(c1, (unsigned int)j);
1312 if (diff == 1) {
1313 return 0;
1314 }
1315 else if (diff == 2) {
1316 return new char_node(get_byte(c1, (unsigned int)j)+1);
1317 }
1318 else {
1319 return new ccl_node(get_byte(c1, (unsigned int)j)+1,
1320 get_byte(c2, (unsigned int)j)-1);
1321 }
1322 }
1323
1324 template <typename char_t>
1325 inline node *
1326 create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
1327 std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
1328 {
1329 node* cnode;
1330 if (i == sizem1 && j == differing_byte && j != sizem1)
1331 {
1332 node* tmp = create_mb_node_type1(j, c1, c2);
1333 if (tmp == 0)
1334 {
1335 delete newnode;
1336 return 0;
1337 }
1338 else
1339 cnode = tmp;
1340 }
1341 else if (i == differing_byte && j == sizem1)
1342 {
1343 if (i != sizem1) {
1344 cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
1345 }
1346 else {
1347 cnode = new ccl_node(get_byte(c1, (unsigned int)j),
1348 get_byte(c2, (unsigned int)j));
1349 }
1350 }
1351 else if (i != differing_byte && i != sizem1 &&
1352 j == (sizem1 - i + differing_byte))
1353 {
1354 cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
1355 }
1356 else if (i + j - differing_byte > sizem1) {
1357 cnode = new ccl_node(0, 0xFF);
1358 }
1359 else {//if (is plain)
1360 cnode = new char_node(get_byte(c1, (unsigned int)j));
1361 }
1362
1363 node* top_node = new cat_node(newnode, cnode);
1364 return top_node;
1365 }
1366
1367// On platforms, where wchar_t is a typedef for unsigned short, the
1368// comparision for a negative value is pointless
1369 template <bool is_signed>
1370 struct correct_char_aux {
1371 };
1372
1373 template <>
1374 struct correct_char_aux<true> {
1375
1376 template <typename char_t>
1377 static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
1378 };
1379
1380 template <>
1381 struct correct_char_aux<false> {
1382
1383 template <typename char_t>
1384 static char_t correct(char_t c) { return c; }
1385 };
1386
1387 template <typename char_t>
1388 struct correct_char
1389 {
1390 static char_t correct(char_t c)
1391 {
1392 return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
1393 correct(c);
1394 }
1395 };
1396
1397 template <typename char_t>
1398 inline void
1399 handle_mb_range(char_t c1, char_t c2, bool first_time,
1400 std::stack<node*>& stack)
1401 {
1402 // The algorithm can't handle negative value chars, which don't make
1403 // much sense anyway. This comparision is pointless for wchar_t's on
1404 // platforms, where wchar_t is a typedef for unsigned short
1405
1406 c1 = correct_char<char_t>::correct(c1);
1407 //if (c1 < 0)
1408 // c1 = 0;
1409
1410 BOOST_ASSERT(c1 < c2);
1411 node* newnode = 0;
1412 node* savednode = 0;
1413 const std::size_t differing_byte = compute_differing_byte(c1, c2);
1414 const std::size_t sizem1 = sizeof(c1) - 1;
1415 const std::size_t ndb = sizem1 - differing_byte;
1416 for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
1417 {
1418 // generate node for the first byte
1419 if (differing_byte == 0 && i == ndb)
1420 {
1421 node* tmp = create_mb_node_type1(0, c1, c2);
1422 if (tmp == 0)
1423 continue;
1424 else
1425 newnode = tmp;
1426 }
1427 else
1428 {
1429 newnode = new char_node(get_byte(c1, 0));
1430 }
1431 for (std::size_t j = 1; j < sizeof(c1); ++j)
1432 {
1433 newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
1434 c1, c2, newnode);
1435 if (newnode == 0)
1436 goto end_outer_for;
1437 }
1438
1439 // or together the various parts
1440 if (savednode)
1441 {
1442 node* top_node = new or_node(savednode, newnode);
1443 savednode = top_node;
1444 }
1445 else
1446 {
1447 savednode = newnode;
1448 }
1449end_outer_for:
1450 continue;
1451 }
1452
1453 for (std::size_t k = 0; k < ndb; ++k)
1454 {
1455 newnode = new char_node(get_byte(c2, 0));
1456 for (std::size_t j = 1; j < sizeof(c2); ++j)
1457 {
1458 node* cnode;
1459 if (k == differing_byte && j == sizem1 && k != sizem1)
1460 cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
1461
1462 else if (k != differing_byte && k != sizem1 &&
1463 j == (sizem1 - k + differing_byte))
1464 cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
1465
1466 else if (k + j - differing_byte > sizem1)
1467 cnode = new ccl_node(0, 0xFF);
1468
1469 else //if (is plain)
1470 cnode = new char_node(get_byte(c2, (unsigned int)j));
1471
1472
1473 node* top_node = new cat_node(newnode, cnode);
1474 newnode = top_node;
1475 }
1476
1477 // or together the various parts
1478 if (savednode)
1479 {
1480 node* top_node = new or_node(savednode, newnode);
1481 savednode = top_node;
1482 }
1483 else
1484 {
1485 savednode = newnode;
1486 }
1487 }
1488
1489
1490 if (first_time)
1491 {
1492 stack.push(savednode);
1493 }
1494 else
1495 {
1496 node* top = stack.top();
1497 stack.pop();
1498
1499 node* newtop = new or_node(top, savednode);
1500 stack.push(newtop);
1501 }
1502 }
1503} // namespace ccl_utils
1504
1505template <typename ScannerT>
1506class make_char
1507{
1508 typedef typename ScannerT::iterator_t iterator_type;
1509
1510public:
1511 typedef
1512 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1513 char_t;
1514
1515 make_char(std::stack<node*>& the_stack)
1516 : m_stack(the_stack)
1517 {}
1518
1519 void operator()(iterator_type const& first, iterator_type const& last) const
1520 {
1521 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1522 escape_char_parser<lex_escapes, char_t>();
1523 char_t the_char;
1524 iterator_type first_ = first;
1525 ScannerT scan(first_, last);
1526 lex_escape_ch[assign(the_char)].parse(scan);
1527 node* newnode = ccl_utils::create_mb_node_seq(the_char);
1528 m_stack.push(newnode);
1529 }
1530
1531 std::stack<node*>& m_stack;
1532};
1533
1534
1535template <typename ScannerT>
1536class make_ccl
1537{
1538 typedef typename ScannerT::iterator_t iterator_type;
1539
1540public:
1541 typedef
1542 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1543 char_t;
1544
1545 make_ccl(std::stack<node*>& the_stack)
1546 : m_stack(the_stack)
1547 {}
1548
1549 static bool is_equal_to_string(iterator_type first,
1550 iterator_type const & last, const char* str)
1551 {
1552 while (first != last &&*str &&*first ==*str)
1553 {
1554 ++first;
1555 ++str;
1556 }
1557 return*str == 0;
1558 }
1559
1560 template <typename ParserT>
1561 static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
1562 {
1563 for (int i = 0; i < 256; ++i)
1564 {
1565 if (parser.test(static_cast<char_t>(uchar(i))))
1566 rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
1567 }
1568 }
1569
1570 void operator()(iterator_type const& first_, iterator_type const& last) const
1571 {
1572 BOOST_ASSERT(*first_ == '[');
1573
1574 iterator_type first = first_;
1575 ++first; // skip over '['
1576 bool negated_ccl = false;
1577 if (*first == '^')
1578 {
1579 negated_ccl = true;
1580 ++first;
1581 }
1582
1583 utility::impl::range_run<char_t> rr;
1584 while (first != last &&*first != ']')
1585 {
1586 if (*first == '[') // it's a ccl_expr like [:space:]
1587 {
1588 // check for [:space:], etc.
1589 if (is_equal_to_string(first, last, "[:alnum:]"))
1590 {
1591 fill_ccl(rr, alnum_p);
1592 }
1593 else if (is_equal_to_string(first, last, "[:alpha:]"))
1594 {
1595 fill_ccl(rr, alpha_p);
1596 }
1597 else if (is_equal_to_string(first, last, "[:blank:]"))
1598 {
1599 fill_ccl(rr, blank_p);
1600 }
1601 else if (is_equal_to_string(first, last, "[:cntrl:]"))
1602 {
1603 fill_ccl(rr, cntrl_p);
1604 }
1605 else if (is_equal_to_string(first, last, "[:digit:]"))
1606 {
1607 fill_ccl(rr, digit_p);
1608 }
1609 else if (is_equal_to_string(first, last, "[:graph:]"))
1610 {
1611 fill_ccl(rr, graph_p);
1612 }
1613 else if (is_equal_to_string(first, last, "[:lower:]"))
1614 {
1615 fill_ccl(rr, lower_p);
1616 }
1617 else if (is_equal_to_string(first, last, "[:print:]"))
1618 {
1619 fill_ccl(rr, print_p);
1620 }
1621 else if (is_equal_to_string(first, last, "[:punct:]"))
1622 {
1623 fill_ccl(rr, punct_p);
1624 }
1625 else if (is_equal_to_string(first, last, "[:space:]"))
1626 {
1627 fill_ccl(rr, space_p);
1628 }
1629 else if (is_equal_to_string(first, last, "[:upper:]"))
1630 {
1631 fill_ccl(rr, upper_p);
1632 }
1633 else if (is_equal_to_string(first, last, "[:xdigit:]"))
1634 {
1635 fill_ccl(rr, xdigit_p);
1636 }
1637 // this can't happen, because it's parsed before we get here.
1638 //else
1639 // throw bad_regex();
1640
1641 // Advance past the character class expression
1642 while (first != last &&*first != ']')
1643 ++first;
1644 BOOST_ASSERT(*first == ']');
1645 ++first;
1646 }
1647 else {
1648 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1649 escape_char_parser<lex_escapes, char_t>();
1650
1651 char_t c1;
1652 ScannerT scan(first, last);
1653 lex_escape_ch[assign(c1)].parse(scan);
1654 if (*scan.first == '-') // insert a range
1655 {
1656 ++scan.first;
1657 char_t c2;
1658 lex_escape_ch[assign(c2)].parse(scan);
1659 BOOST_ASSERT(c1 < c2); // Throw exception?
1660 rr.set(utility::impl::range<char_t>(c1, c2));
1661 }
1662 else // insert 1 char
1663 {
1664 rr.set(utility::impl::range<char_t>(c1, c1));
1665 }
1666 }
1667 }
1668
1669 if (negated_ccl)
1670 {
1671 rr = ccl_utils::negate_range_run(rr);
1672 }
1673
1674 ccl_utils::create_nodes(rr, m_stack);
1675 }
1676
1677 std::stack<node*>& m_stack;
1678};
1679
1680template <typename ScannerT>
1681class make_any_char
1682{
1683 typedef typename ScannerT::iterator_t iterator_type;
1684
1685public:
1686 typedef
1687 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1688 char_t;
1689
1690 std::stack<node*>& m_stack;
1691
1692 make_any_char(std::stack<node*>& the_stack)
1693 : m_stack(the_stack)
1694 {}
1695
1696 void operator()(const char_t c) const
1697 {
1698 BOOST_ASSERT(c == '.');
1699 do_any_char();
1700 }
1701
1702 void do_any_char() const
1703 {
1704 static utility::impl::range_run<char_t> rr;
1705 rr.set(full_range<char_t>());
1706 char_t newline = '\n';
1707 rr.clear(utility::impl::range<char_t>(newline, newline));
1708
1709 ccl_utils::create_nodes(rr, m_stack);
1710 }
1711};
1712
1713template <typename ScannerT>
1714class make_string
1715{
1716 typedef typename ScannerT::iterator_t iterator_type;
1717
1718public:
1719 typedef
1720 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1721 char_t;
1722
1723 std::stack<node*>& m_stack;
1724
1725 make_string(std::stack<node*>& the_stack)
1726 : m_stack(the_stack)
1727 {}
1728
1729 void operator()(iterator_type const& first, iterator_type const& last) const
1730 {
1731 BOOST_ASSERT(*first == '"');
1732
1733 iterator_type first_ = first;
1734 ScannerT scan(first_, last);
1735 ++scan.first; // skip over '"'
1736
1737 // empty string not allowed
1738 if (*scan.first == '"')
1739 {
1740 boost::throw_exception(bad_regex());
1741 }
1742
1743 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1744 escape_char_parser<lex_escapes, char_t>();
1745
1746 char_t c;
1747 lex_escape_ch[assign(c)].parse(scan);
1748 node* top_node = ccl_utils::create_mb_node_seq(c);
1749
1750 while (*scan.first != '"' && scan.first != scan.last)
1751 {
1752 lex_escape_ch[assign(c)].parse(scan);
1753 node* cur_node = ccl_utils::create_mb_node_seq(c);
1754 top_node = new cat_node(top_node, cur_node);
1755 }
1756 m_stack.push(top_node);
1757 }
1758};
1759
1760inline
1761node* repeat_node(node* n, int num)
1762{
1763 node* list_of_nodes = n;
1764 for (int i = 1; i < num; ++i)
1765 {
1766 list_of_nodes = new cat_node(list_of_nodes, n->clone());
1767 }
1768 return list_of_nodes;
1769}
1770
1771inline
1772node* optional_node(node* n)
1773{
1774 return new or_node(n, new epsilon_node());
1775}
1776
1777template <typename ScannerT>
1778class make_rep1
1779{
1780 typedef typename ScannerT::iterator_t iterator_type;
1781
1782public:
1783 typedef
1784 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1785 char_t;
1786
1787 std::stack<node*>& m_stack;
1788
1789 make_rep1(std::stack<node*>& the_stack)
1790 : m_stack(the_stack)
1791 {}
1792
1793 void operator()(iterator_type const& first, iterator_type const& last) const
1794 {
1795 BOOST_ASSERT(*first == '{');
1796
1797 iterator_type first_ = first;
1798 ScannerT scan(first_, last);
1799 ++scan.first; // skip over '{'
1800
1801 unsigned int count;
1802 uint_p[assign(count)].parse(scan);
1803 if (count == 0)
1804 boost::throw_exception(bad_regex());
1805
1806 node* top_node = m_stack.top();
1807 m_stack.pop();
1808 top_node = repeat_node(top_node, count);
1809 m_stack.push(top_node);
1810 }
1811};
1812
1813template <typename ScannerT>
1814class make_rep2
1815{
1816 typedef typename ScannerT::iterator_t iterator_type;
1817
1818public:
1819 typedef
1820 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1821 char_t;
1822
1823 std::stack<node*>& m_stack;
1824
1825 make_rep2(std::stack<node*>& the_stack)
1826 : m_stack(the_stack)
1827 {}
1828
1829 void operator()(iterator_type const& first, iterator_type const& last) const
1830 {
1831 BOOST_ASSERT(*first == '{');
1832
1833 iterator_type first_ = first;
1834 ScannerT scan (first_, last);
1835 ++scan.first; // skip over '{'
1836
1837 unsigned int count;
1838 uint_p[assign(count)].parse(scan);
1839 if (count == 0)
1840 boost::throw_exception(bad_regex());
1841
1842 node* top_node = m_stack.top();
1843 m_stack.pop();
1844 top_node = new cat_node(repeat_node(top_node, count),
1845 new star_node(top_node->clone()));
1846 m_stack.push(top_node);
1847
1848 }
1849};
1850
1851template <typename ScannerT>
1852class make_rep3
1853{
1854 typedef typename ScannerT::iterator_t iterator_type;
1855
1856public:
1857 typedef
1858 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1859 char_t;
1860
1861 std::stack<node*>& m_stack;
1862
1863 make_rep3(std::stack<node*>& the_stack)
1864 : m_stack(the_stack)
1865 {}
1866
1867 void operator()(iterator_type const& first, iterator_type const& last) const
1868 {
1869 BOOST_ASSERT(*first == '{');
1870
1871 iterator_type first_ = first;
1872 ScannerT scan(first_, last);
1873 ++scan.first; // skip over '{'
1874
1875 unsigned int count1, count2;
1876 uint_p[assign(count1)].parse(scan);
1877 if (count1 == 0)
1878 boost::throw_exception(bad_regex());
1879
1880 ++scan.first; // skip over ','
1881
1882 uint_p[assign(count2)].parse(scan);
1883 if (count2 <= count1)
1884 boost::throw_exception(bad_regex());
1885
1886 node* top_node = m_stack.top();
1887 m_stack.pop();
1888 node* repeats = repeat_node(top_node, count1);
1889 top_node = new cat_node(repeats,
1890 repeat_node(optional_node(top_node->clone()),
1891 count2 - count1));
1892
1893 m_stack.push(top_node);
1894 }
1895};
1896
1897///////////////////////////////////////////////////////////////////////////////
1898//
1899// Lexer grammar
1900//
1901// Defines the grammar, which mandates the syntax of the understood
1902// lexeme definitions passed to lexer::register_regex.
1903//
1904///////////////////////////////////////////////////////////////////////////////
1905class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
1906{
1907public:
1908 lexer_grammar(std::stack<node*> &node_stack_)
1909 : node_stack(node_stack_) {}
1910
1911 template <typename ScannerT>
1912 struct definition
1913 {
1914 typedef rule<ScannerT> rule_t;
1915 typedef typename ScannerT::iterator_t iterator_type;
1916 typedef
1917 typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1918 char_t;
1919
1920 rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
1921 escseq, ccl_char;
1922 symbols<> ccl_expr;
1923
1924 definition(lexer_grammar const &self)
1925 {
1926 regex =
1927 re >> !('/' >> re) >> !ch_p('$')
1928 ;
1929
1930 re =
1931 series
1932 >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
1933 ;
1934
1935 series =
1936 singleton
1937 >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
1938 ;
1939
1940 singleton =
1941 ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
1942 >> singleton2
1943 | fullccl
1944 >> singleton2
1945 | ('"' >> string >> '"')
1946 [
1947 make_string<ScannerT>(self.node_stack)
1948 ]
1949 >> singleton2
1950 | '(' >> re >> ')'
1951 >> singleton2
1952 | ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
1953 [
1954 make_char<ScannerT>(self.node_stack)
1955 ]
1956 >> singleton2
1957 ;
1958
1959 singleton2 =
1960 ch_p('*')[make_star<ScannerT>(self.node_stack)]
1961 >> singleton2
1962 | ch_p('+')[make_plus<ScannerT>(self.node_stack)]
1963 >> singleton2
1964 | ch_p('?')[make_optional<ScannerT>(self.node_stack)]
1965 >> singleton2
1966 | ('{' >> uint_p >> '}')
1967 [
1968 make_rep1<ScannerT>(self.node_stack)
1969 ]
1970 >> singleton2
1971 | ('{' >> uint_p >> ',' >> '}')
1972 [
1973 make_rep2<ScannerT>(self.node_stack)
1974 ]
1975 >> singleton2
1976 | ('{' >> uint_p >> ',' >> uint_p >> '}')
1977 [
1978 make_rep3<ScannerT>(self.node_stack)
1979 ]
1980 >> singleton2
1981 | epsilon_p
1982 ;
1983
1984 fullccl =
1985 ('[' >> !ch_p('^') >> ccl >> ']')
1986 [
1987 make_ccl<ScannerT>(self.node_stack)
1988 ]
1989 ;
1990
1991 ccl =
1992 *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
1993 ;
1994
1995 ccl_char =
1996 ( (anychar_p - chset<>("\\\n]")) | escseq )
1997 ;
1998
1999 ccl_expr =
2000 "[:alnum:]",
2001 "[:alpha:]",
2002 "[:blank:]",
2003 "[:cntrl:]",
2004 "[:digit:]",
2005 "[:graph:]",
2006 "[:lower:]",
2007 "[:print:]",
2008 "[:punct:]",
2009 "[:space:]",
2010 "[:upper:]",
2011 "[:xdigit:]"
2012 ;
2013
2014 string =
2015 +( (anychar_p - chset<>("\"\\")) | escseq )
2016 ;
2017
2018 typedef
2019 uint_parser<char_t, 8, 1,
2020 std::numeric_limits<char_t>::digits / 3 + 1
2021 > oct_parser_t;
2022 typedef
2023 uint_parser<char_t, 16, 1,
2024 std::numeric_limits<char_t>::digits / 4 + 1
2025 > hex_parser_t;
2026
2027 escseq =
2028 ch_p('\\')
2029 >> (
2030 oct_parser_t()
2031 | as_lower_d['x'] >> hex_parser_t()
2032 | (anychar_p - chset<>('\n'))
2033 )
2034 ;
2035
2036#define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2037
2038 BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
2039 BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
2040 BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
2041 BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
2042 BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
2043 BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
2044 BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
2045 BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
2046 BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
2047 BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
2048
2049#undef BOOST_SLEX_DEBUG
2050 }
2051
2052 rule<ScannerT> const&
2053 start() const { return regex; }
2054 };
2055
2056 std::stack<node*> &node_stack;
2057
2058}; // class lexer_grammar
2059
2060template <typename StringT>
2061inline node *
2062parse(lexer_grammar& g, StringT const& str)
2063{
2064 typedef
2065 scanner<typename StringT::const_iterator, scanner_policies<> >
2066 scanner_t;
2067 typedef typename rule<scanner_t>::template result<scanner_t>::type
2068 result_t;
2069
2070 typename StringT::const_iterator first = str.begin();
2071 typename StringT::const_iterator last = str.end();
2072
2073 scanner_t scan(first, last);
2074// typename rule<scanner_t>::result_t hit = g.parse(scan);
2075 result_t hit = g.parse(scan);
2076 if (!hit || !scan.at_end())
2077 {
2078 while (g.node_stack.size())
2079 {
2080 delete g.node_stack.top();
2081 g.node_stack.pop();
2082 }
2083 return 0;
2084 }
2085
2086 BOOST_ASSERT(g.node_stack.size() == 1);
2087 node* rval = g.node_stack.top();
2088 g.node_stack.pop();
2089 node* an_eof_node = new eof_node();
2090 rval = new cat_node(rval, an_eof_node);
2091 return rval;
2092}
2093
2094inline
2095void make_case_insensitive(state_match_t& state_match)
2096{
2097 // TODO: Fix this.
2098 // This doesn't take into account foreign languages, figure out how to
2099 // do that. Also this approach is broken for this implementation of
2100 // wide chars.
2101 for (state_match_t::iterator iter = state_match.begin();
2102 iter != state_match.end(); ++iter)
2103 {
2104 int i, j;
2105 for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
2106 {
2107 // if either is set, turn them both on
2108 (*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
2109 }
2110 }
2111}
2112
2113template<bool wide_char>
2114struct regex_match_helper;
2115
2116template<>
2117struct regex_match_helper<false> // single byte char
2118{
2119 template <typename DfaT, typename IteratorT>
2120 static bool
2121 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2122 int& regex_index,
2123 std::basic_string<
2124 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2125 > *token)
2126 {
2127 typedef std::basic_string<
2128 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2129 > string_type;
2130 typedef typename string_type::size_type size_type;
2131
2132 node_id_t s = 0;
2133 node_id_t last_accepting_index = invalid_node;
2134 IteratorT p = first;
2135 IteratorT last_accepting_cpos = first;
2136 while (p != last)
2137 {
2138 s = dfa.transition_table[s][(uchar)*p];
2139 if (s == invalid_node)
2140 break;
2141 if (token) token->append((size_type)1, *p);
2142 ++p;
2143 if (dfa.acceptance_index[s] != invalid_node)
2144 {
2145 last_accepting_index = s;
2146 last_accepting_cpos = p;
2147 }
2148 }
2149 if (last_accepting_index != invalid_node)
2150 {
2151#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2152 std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2153 dfa.acceptance_index[last_accepting_index] << '\n';
2154#endif
2155
2156 first = last_accepting_cpos;
2157 regex_index = dfa.acceptance_index[last_accepting_index];
2158 return true;
2159 }
2160 else
2161 return false;
2162 }
2163};
2164
2165template<>
2166struct regex_match_helper<true> // wide char
2167{
2168 template <typename DfaT, typename IteratorT>
2169 static bool
2170 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2171 int& regex_index,
2172 std::basic_string<
2173 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2174 > *token)
2175 {
2176 typedef
2177 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2178 char_t;
2179 typedef std::basic_string<char_t> string_type;
2180 typedef typename string_type::size_type size_type;
2181
2182 node_id_t s = 0;
2183 node_id_t last_accepting_index = invalid_node;
2184 IteratorT wp = first;
2185 IteratorT last_accepting_cpos = first;
2186
2187 while (wp != last)
2188 {
2189 for (unsigned int i = 0; i < sizeof(char_t); ++i)
2190 {
2191 s = dfa.transition_table[s][get_byte(*wp,i)];
2192 if (s == invalid_node)
2193 {
2194 goto break_while;
2195 }
2196 }
2197 if (token) token->append((size_type)1, *wp);
2198 ++wp;
2199 if (dfa.acceptance_index[s] != invalid_node)
2200 {
2201 last_accepting_index = s;
2202 last_accepting_cpos = wp;
2203 }
2204
2205 }
2206
2207 break_while:
2208 if (last_accepting_index != invalid_node)
2209 {
2210#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2211 std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2212 dfa.acceptance_index[last_accepting_index] << '\n';
2213#endif
2214 first = last_accepting_cpos;
2215 regex_index = dfa.acceptance_index[last_accepting_index];
2216
2217 return true;
2218 }
2219 else
2220 return false;
2221 }
2222};
2223
2224template <typename DfaT, typename IteratorT, bool wide_char>
2225struct regex_match
2226{
2227 static bool
2228 do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2229 int& regex_index,
2230 std::basic_string<
2231 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2232 > *token)
2233 {
2234 return regex_match_helper<wide_char>::do_match(
2235 dfa, first, last, regex_index, token);
2236 }
2237};
2238
2239} // namespace lexerimpl
2240
2241///////////////////////////////////////////////////////////////////////////////
2242//
2243template <typename IteratorT = char const*, typename TokenT = int,
2244 typename CallbackT = void(*)(IteratorT const &,
2245 IteratorT &,
2246 IteratorT const&,
2247 TokenT const&,
2248 lexer_control<TokenT> &)>
2249class lexer
2250{
2251public:
2252 typedef CallbackT callback_t;
2253 typedef
2254 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2255 char_t;
2256
2257 struct regex_info
2258 {
2259 std::basic_string<char_t> str;
2260 TokenT token;
2261 CallbackT callback;
2262
2263 regex_info(const std::basic_string<char_t>& _str,
2264 const TokenT& _token,
2265 const CallbackT& _callback)
2266 : str(_str)
2267 , token(_token)
2268 , callback(_callback)
2269 {}
2270
2271 };
2272
2273 struct dfa_table
2274 {
2275 std::vector<std::vector<node_id_t> > transition_table;
2276 std::vector<node_id_t> acceptance_index;
2277 };
2278 typedef std::vector<node_id_t> node_table_t;
2279 typedef std::vector<node_table_t> transition_table_t;
2280 typedef std::vector<dfa_table> dfa_t;
2281
2282
2283 lexer(unsigned int states = 1);
2284
2285 void register_regex(const std::basic_string<char_t>& regex,
2286 const TokenT& id, const CallbackT& cb = CallbackT(),
2287 unsigned int state = 0);
2288
2289 TokenT next_token(IteratorT &first, IteratorT const &last,
2290 std::basic_string<char_t> *token = 0);
2291
2292 void create_dfa();
2293 bool has_compiled_dfa() { return m_compiled_dfa; }
2294
2295 void set_case_insensitive(bool insensitive);
2296
2297#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2298 void dump(std::ostream& out);
2299#endif
2300 typedef std::vector<std::vector<regex_info> > regex_list_t;
2301
2302 bool load (std::ifstream &in, long unique_id = 0);
2303 bool save (std::ofstream &out, long unique_id = 0);
2304 enum {
2305 SLEX_SIGNATURE = 0x58454C53, // "SLEX"
2306 SLEX_VERSION_100 = 0x0100, // persistance version
2307 SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
2308 SLEX_MINOR_VERSION_MASK = 0xFF
2309 };
2310
2311private:
2312
2313 void create_dfa_for_state(int state);
2314
2315 static bool regex_match(const dfa_t& dfa, IteratorT& first,
2316 IteratorT& last, int& regex_index);
2317
2318 mutable std::stack<lexerimpl::node*> node_stack;
2319 lexerimpl::lexer_grammar g;
2320
2321 mutable bool m_compiled_dfa;
2322 mutable dfa_t m_dfa;
2323
2324 regex_list_t m_regex_list;
2325 bool m_case_insensitive;
2326
2327 unsigned int m_state;
2328 std::stack<unsigned int> m_state_stack;
2329 unsigned int m_num_states;
2330};
2331
2332
2333template <typename IteratorT, typename TokenT, typename CallbackT>
2334inline
2335lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
2336 : g(node_stack)
2337 , m_compiled_dfa(false)
2338 , m_regex_list(states)
2339 , m_case_insensitive(false)
2340 , m_state(0)
2341 , m_state_stack()
2342 , m_num_states(states)
2343{
2344 BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
2345 BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
2346}
2347
2348template <typename IteratorT, typename TokenT, typename CallbackT>
2349inline void
2350lexer<IteratorT, TokenT, CallbackT>::register_regex(
2351 const std::basic_string<char_t>& regex, const TokenT& id,
2352 const CallbackT& callback, unsigned int state)
2353{
2354 if (state > m_num_states) {
2355 m_regex_list.resize(state);
2356 m_num_states = state;
2357 }
2358 m_regex_list[state].push_back(regex_info(regex, id, callback));
2359}
2360
2361template <typename IteratorT, typename TokenT, typename CallbackT>
2362inline TokenT
2363lexer<IteratorT, TokenT, CallbackT>::next_token(
2364 IteratorT &first, IteratorT const& last,
2365 std::basic_string<
2366 typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2367 > *token)
2368{
2369 if (!m_compiled_dfa)
2370 {
2371 create_dfa();
2372 }
2373
2374 IteratorT saved = first;
2375 int regex_index;
2376 if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
2377 do_match(m_dfa[m_state], first, last, regex_index, token))
2378 return -1; // TODO: can't return -1, need to return some invalid token.
2379 // how to figure this out? We can use traits I guess.
2380 else
2381 {
2382 regex_info regex = m_regex_list[m_state][regex_index];
2383 TokenT rval = regex.token;
2384 if (regex.callback)
2385 {
2386 // execute corresponding callback
2387 lexer_control<TokenT> controller(rval, m_state, m_state_stack);
2388 regex.callback(saved, first, last, regex.token, controller);
2389 if (controller.ignore_current_token_set()) {
2390 if (token)
2391 token->erase();
2392 return next_token(first, last, token);
2393 }
2394 }
2395 return rval;
2396 }
2397}
2398
2399namespace lexerimpl
2400{
2401
2402inline
2403bool find_acceptance_state(const node_set& eof_node_ids,
2404 const node_set& current_set,
2405 node_id_t& acceptance_node_id)
2406{
2407 for(node_set::const_iterator nsi = eof_node_ids.begin();
2408 nsi != eof_node_ids.end(); ++nsi)
2409 {
2410 node_id_t eof_node_id =*nsi;
2411 if (current_set.end() != current_set.find(eof_node_id))
2412 {
2413 // store the first one we come to as the
2414 // matched pattern
2415 acceptance_node_id = eof_node_id;
2416 // don't bother searching for more
2417 return true;
2418 }
2419 }
2420 return false;
2421}
2422
2423template <typename RegexListT, typename GrammarT>
92f5a8d4 2424#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
2425inline std::unique_ptr<node>
2426#else
7c673cae 2427inline std::auto_ptr<node>
b32b8144 2428#endif
7c673cae
FG
2429parse_regexes(const RegexListT& regex_list, GrammarT& g)
2430{
2431 // parse the expressions into a tree
2432 if (regex_list.begin() == regex_list.end())
2433 boost::throw_exception(bad_regex());
2434
2435 typename RegexListT::const_iterator ri = regex_list.begin();
92f5a8d4 2436#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
2437 std::unique_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
2438#else
7c673cae 2439 std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
b32b8144 2440#endif
7c673cae
FG
2441 if (tree.get() == 0)
2442 boost::throw_exception(bad_regex());
2443
2444 ++ri;
2445 for (/**/; ri != regex_list.end(); ++ri)
2446 {
92f5a8d4 2447#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
2448 std::unique_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
2449#else
7c673cae 2450 std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
b32b8144 2451#endif
7c673cae
FG
2452 if (next_tree.get() == 0)
2453 boost::throw_exception(bad_regex());
92f5a8d4 2454#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
2455 tree = std::unique_ptr<node>(new or_node(tree.release(), next_tree.release()));
2456#else
2457 tree = std::auto_ptr<node>(new or_node(tree.release(), next_tree.release()));
2458#endif
7c673cae
FG
2459 }
2460 return tree;
2461}
2462
2463} //namespace lexerimpl
2464
2465template <typename IteratorT, typename TokenT, typename CallbackT>
2466inline void
2467lexer<IteratorT, TokenT, CallbackT>::create_dfa()
2468{
2469 m_dfa.resize(m_num_states);
2470 for (unsigned int i = 0; i < m_num_states; ++i)
2471 create_dfa_for_state(i);
2472}
2473
2474// Algorithm from Compilers: Principles, Techniques, and Tools p. 141
2475template <typename IteratorT, typename TokenT, typename CallbackT>
2476inline void
2477lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
2478{
2479 using lexerimpl::node;
92f5a8d4 2480#ifndef BOOST_NO_CXX11_SMART_PTR
b32b8144
FG
2481 std::unique_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
2482#else
7c673cae 2483 std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
b32b8144 2484#endif
7c673cae
FG
2485 node_id_t dummy = 0;
2486 tree->assign_node_ids(dummy);
2487
2488#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2489 tree->dump(std::cout);
2490#endif
2491
2492 // compute followpos(root)
2493 followpos_t followpos;
2494 tree->compute_followpos(followpos);
2495
2496 // the dfa states <-> nfa state groups
2497 std::map<node_set, node_id_t> dstates1;
2498 std::map<node_id_t, node_set> dstates2;
2499
2500 // the dfa transitions
2501 m_dfa[state].transition_table.push_back(
2502 std::vector<node_id_t>(256, invalid_node));
2503 m_dfa[state].acceptance_index.push_back(invalid_node);
2504
2505 // whether the dfa state has been processed yet
2506 std::vector<node_id_t> marked;
2507
2508 // used to give a unique id to each dfa state
2509 node_id_t num_states = 0;
2510
2511 // initially, the only unmarked state in Dstates is firstpos(root).
2512 marked.push_back(0);
2513 node_set fpr = tree->firstpos();
2514 dstates1[fpr] = 0;
2515 dstates2[0] = fpr;
2516 state_match_t state_match;
2517 tree->compute_state_match(state_match);
2518
2519 if (m_case_insensitive)
2520 lexerimpl::make_case_insensitive(state_match);
2521
2522 node_set eof_node_ids;
2523 tree->get_eof_ids(eof_node_ids);
2524 // translate the eof_node_ids into a 0-based index
2525 std::map<node_id_t, node_id_t> eof_node_id_map;
2526 unsigned int x = 0;
2527 for (node_set::iterator node_id_it = eof_node_ids.begin();
2528 node_id_it != eof_node_ids.end();
2529 ++node_id_it)
2530 {
2531 eof_node_id_map[*node_id_it] = x++;
2532 }
2533
2534 // figure out if this is an acceptance state
2535 node_id_t eof_node_id;
2536 if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
2537 {
2538 m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
2539 }
2540
2541 std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
2542 node_id_t(0));
2543 while (marked.end() != i)
2544 {
2545 *i = 1;
2546 node_id_t T = node_id_t(std::distance(marked.begin(), i));
2547 BOOST_ASSERT(T < dstates2.size());
2548 node_set Tstates = dstates2[T];
2549 for (node_id_t j = 0; j < 256; ++j)
2550 {
2551 node_set U;
2552 for (node_set::iterator k = Tstates.begin();
2553 k != Tstates.end(); ++k)
2554 {
2555 node_id_t p =*k;
2556 BOOST_ASSERT(p < state_match.size());
2557 BOOST_ASSERT(j < state_match[p].size());
2558 if (state_match[p][j])
2559 {
2560 node_set fpp = followpos[p];
2561 U.insert(fpp.begin(), fpp.end());
2562 }
2563 }
2564 if (U.size() > 0)
2565 {
2566 std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
2567 node_id_t target_state;
2568 if (l == dstates1.end()) // not in the states yet
2569 {
2570 ++num_states;
2571 dstates1[U] = target_state = num_states;
2572 dstates2[target_state] = U;
2573 marked.push_back(0);
2574 m_dfa[state].transition_table.push_back(
2575 std::vector<node_id_t>(256, invalid_node));
2576 m_dfa[state].acceptance_index.push_back(invalid_node);
2577 // figure out if this is an acceptance state
2578 node_id_t eof_node_id;
2579 if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
2580 {
2581 m_dfa[state].acceptance_index[target_state] =
2582 eof_node_id_map[eof_node_id];
2583 }
2584 }
2585 else
2586 {
2587 target_state = dstates1[U];
2588 }
2589
2590 BOOST_ASSERT(T < m_dfa[state].transition_table.size());
2591 BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
2592 m_dfa[state].transition_table[T][j] = target_state;
2593 }
2594
2595 }
2596
2597 i = std::find(marked.begin(), marked.end(), node_id_t(0));
2598 }
2599 m_compiled_dfa = true;
2600}
2601
2602template <typename IteratorT, typename TokenT, typename CallbackT>
2603inline void
2604lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
2605{
2606 m_case_insensitive = insensitive;
2607}
2608
2609
2610#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2611template <typename IteratorT, typename TokenT, typename CallbackT>
2612inline void
2613lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
2614{
2615 for (unsigned x = 0; x < m_dfa.size(); ++x)
2616 {
2617 out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
2618 for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
2619 {
2620 out << "state " << i << ":";
2621 for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
2622 {
2623 if (m_dfa[x].transition_table[i][j] != invalid_node)
2624 out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
2625 }
2626 out << "\n";
2627 }
2628 out << "acceptance states: ";
2629 for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
2630 {
2631 if (m_dfa[x].acceptance_index[k] != invalid_node)
2632 out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
2633 }
2634 out << endl;
2635 }
2636}
2637#endif
2638
2639///////////////////////////////////////////////////////////////////////////////
2640// load the lexer tables
2641#define slex_in(strm, val) \
2642 strm.read((char*)&val, sizeof(val)); \
2643 if (std::ios::goodbit != strm.rdstate()) return false
2644
2645template <typename IteratorT, typename TokenT, typename CallbackT>
2646inline bool
2647lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
2648{
2649// ensure correct signature and version
2650long signature = 0;
2651
2652 slex_in (in, signature);
2653 if (signature != SLEX_SIGNATURE)
2654 return false; // not for us
2655
2656long version = 0;
2657
2658 slex_in (in, version);
2659 if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
2660 return false; // to new for us
2661
2662long uid = 0;
2663
2664 slex_in (in, uid);
2665 if (uid != unique_id)
2666 return false; // not saved by us
2667
2668// load auxiliary members
2669int num_states = 0;
2670
2671 slex_in (in, num_states);
2672
2673// load the dfa tables
2674dfa_t in_dfa;
2675std::size_t dfa_size = 0;
2676
2677 slex_in (in, dfa_size);
2678 in_dfa.resize(dfa_size);
2679 for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
2680 {
2681 // load the transition tables
2682 std::size_t tt_size = 0;
2683 transition_table_t &tt_table = in_dfa[dfa].transition_table;
2684
2685 slex_in (in, tt_size);
2686 tt_table.resize(tt_size);
2687 for (std::size_t tt = 0; tt < tt_size; ++tt)
2688 {
2689 std::size_t nt_size = 0;
2690 node_table_t &nt_table = tt_table[tt];
2691
2692 slex_in (in, nt_size);
2693 nt_table.resize(nt_size);
2694 for (std::size_t nt = 0; nt < nt_size; ++nt)
2695 {
2696 slex_in (in, nt_table[nt]);
2697 }
2698 }
2699
2700 // load the acceptance index table
2701 std::size_t ai_size = 0;
2702 node_table_t &ai_table = in_dfa[dfa].acceptance_index;
2703
2704 slex_in (in, ai_size);
2705 ai_table.resize(ai_size);
2706 for (std::size_t ai = 0; ai < ai_size; ++ai)
2707 {
2708 slex_in (in, ai_table[ai]);
2709 }
2710 }
2711
2712 m_dfa.swap(in_dfa); // success, swap in the read values
2713 m_num_states = num_states;
2714
2715 m_compiled_dfa = true;
2716 return true;
2717}
2718
2719#undef slex_in
2720
2721///////////////////////////////////////////////////////////////////////////////
2722// save the lexer tables
2723#define slex_out(strm, val) \
2724 strm.write((char*)&val, sizeof(val)); \
2725 if (std::ios::goodbit != strm.rdstate()) return false
2726
2727template <typename IteratorT, typename TokenT, typename CallbackT>
2728inline bool
2729lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
2730{
2731// save signature and version information
2732long out_long = SLEX_SIGNATURE;
2733
2734 slex_out(out, out_long);
2735 out_long = SLEX_VERSION_100;
2736 slex_out(out, out_long);
2737 slex_out(out, unique_id);
2738
2739// save auxiliary members
2740 slex_out(out, m_num_states);
2741
2742// save the dfa tables
2743 typedef typename dfa_t::const_iterator dfa_iter_t;
2744 typedef transition_table_t::const_iterator transition_table_iter_t;
2745 typedef node_table_t::const_iterator node_table_iter_t;
2746
2747 std::size_t out_size_t = m_dfa.size();
2748 slex_out(out, out_size_t);
2749
2750 dfa_iter_t end = m_dfa.end();
2751 for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
2752 {
2753 // save the transition table
2754 out_size_t = (*it).transition_table.size();
2755 slex_out(out, out_size_t);
2756
2757 transition_table_iter_t tt_end = (*it).transition_table.end();
2758 for (transition_table_iter_t tt_it = (*it).transition_table.begin();
2759 tt_it != tt_end;
2760 ++tt_it)
2761 {
2762 out_size_t = (*tt_it).size();
2763 slex_out(out, out_size_t);
2764
2765 node_table_iter_t nt_end = (*tt_it).end();
2766 for (node_table_iter_t nt_it = (*tt_it).begin();
2767 nt_it != nt_end;
2768 ++nt_it)
2769 {
2770 slex_out(out, (*nt_it));
2771 }
2772 }
2773
2774 // save the acceptance index table
2775 out_size_t = (*it).acceptance_index.size();
2776 slex_out(out, out_size_t);
2777
2778 node_table_iter_t nt_end = (*it).acceptance_index.end();
2779 for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
2780 nt_it != nt_end;
2781 ++nt_it)
2782 {
2783 slex_out(out, (*nt_it));
2784 }
2785 }
2786 return true;
2787}
2788
2789#undef slex_out
2790
2791/*
2792a lexer_control object supports some operations on the lexer.
2793 get current lexer state
2794 set state
2795 terminate
2796 state stack (push, pop, top)
2797 set new token return value
2798 ignore the current token
2799 yymore
2800 get length of matched token
2801*/
2802template <typename TokenT>
2803class lexer_control
2804{
2805public:
2806
2807 lexer_control(TokenT& token, unsigned int& current_state,
2808 std::stack<unsigned int>& state_stack);
2809 // functions dealing with the lexer state
2810
2811 // set the state to state
2812 void set_state(unsigned int state);
2813
2814 // get the current state
2815 unsigned int get_state();
2816
2817 // pushes the current state onto the top of the state stack and
2818 // switches to new_state
2819 void push_state(unsigned int new_state);
2820
2821 // pops the top of the state stack and switches to it.
2822 void pop_state();
2823
2824 // returns the top of the stack without altering the stack's contents.
2825 unsigned int top_state();
2826
2827 // functions dealing with the token returned.
2828
2829 // set a new TokenT return value, overriding that one that was
2830 // registered via. register_regex()
2831 void set_token(const TokenT& token);
2832
2833 // tell the lexer to return an invalid token, signifying termination.
2834 void terminate();
2835
2836 // ignore the current token, and move on to the next one. The current
2837 // token will NOT be returned from next_token()
2838 void ignore_current_token();
2839
2840 // returns true if ignore_current_token() has been called,
2841 // false otherwise.
2842 bool ignore_current_token_set();
2843
2844private:
2845 TokenT& m_token;
2846 bool m_ignore_current_token;
2847 unsigned int& m_current_state;
2848 std::stack<unsigned int>& m_state_stack;
2849};
2850
2851template <typename TokenT>
2852inline
2853lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
2854 std::stack<unsigned int>& state_stack)
2855 : m_token(token)
2856 , m_ignore_current_token(false)
2857 , m_current_state(current_state)
2858 , m_state_stack(state_stack)
2859{
2860}
2861
2862template <typename TokenT>
2863inline void
2864lexer_control<TokenT>::set_state(unsigned int state)
2865{
2866 m_current_state = state;
2867}
2868
2869template <typename TokenT>
2870inline unsigned int
2871lexer_control<TokenT>::get_state()
2872{
2873 return m_current_state;
2874}
2875
2876template <typename TokenT>
2877inline void
2878lexer_control<TokenT>::push_state(unsigned int new_state)
2879{
2880 m_state_stack.push(m_current_state);
2881 m_current_state = new_state;
2882}
2883
2884template <typename TokenT>
2885inline void
2886lexer_control<TokenT>::pop_state()
2887{
2888 m_current_state = m_state_stack.top();
2889 m_state_stack.pop();
2890}
2891
2892template <typename TokenT>
2893inline unsigned int
2894lexer_control<TokenT>::top_state()
2895{
2896 return m_state_stack.top();
2897}
2898
2899template <typename TokenT>
2900inline void
2901lexer_control<TokenT>::set_token(const TokenT& token)
2902{
2903 m_token = token;
2904}
2905
2906template <typename TokenT>
2907inline void
2908lexer_control<TokenT>::terminate()
2909{
2910 m_token = -1; // TOOD: fix this, need to be determined by traits
2911}
2912
2913template <typename TokenT>
2914inline void
2915lexer_control<TokenT>::ignore_current_token()
2916{
2917 m_ignore_current_token = true;
2918}
2919
2920template <typename TokenT>
2921inline bool
2922lexer_control<TokenT>::ignore_current_token_set()
2923{
2924 return m_ignore_current_token;
2925}
2926
2927} // namespace classic
2928} // namespace spirit
2929} // namespace boost
2930
2931#undef BOOST_SPIRIT_IT_NS
2932
2933#endif
2934