1 // Copyright (c) 2001-2010 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 // This example shows how to create a simple lexer recognizing a couple of
7 // different tokens and how to use this with a grammar. This example has a
8 // heavily backtracking grammar which makes it a candidate for lexer based
9 // parsing (all tokens are scanned and generated only once, even if
10 // backtracking is required) which speeds up the overall parsing process
11 // considerably, out-weighting the overhead needed for setting up the lexer.
13 // Additionally, this example demonstrates, how to define a token set usable
14 // as the skip parser during parsing, allowing to define several tokens to be
17 // This example recognizes couplets, which are sequences of numbers enclosed
18 // in matching pairs of parenthesis. See the comments below to for details
21 // #define BOOST_SPIRIT_LEXERTL_DEBUG
22 // #define BOOST_SPIRIT_DEBUG
24 #include <boost/config/warning_disable.hpp>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/spirit/include/lex_lexertl.hpp>
32 #include "example.hpp"
34 using namespace boost::spirit
;
36 ///////////////////////////////////////////////////////////////////////////////
38 ///////////////////////////////////////////////////////////////////////////////
39 template <typename Lexer
>
40 struct example3_tokens
: lex::lexer
<Lexer
>
44 // define the tokens to match
45 ellipses
= "\\.\\.\\.";
48 // associate the tokens and the token set with the lexer
49 this->self
= ellipses
| '(' | ')' | number
;
51 // define the whitespace to ignore (spaces, tabs, newlines and C-style
54 = lex::token_def
<>("[ \\t\\n]+") // whitespace
55 | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
59 // these tokens expose the iterator_range of the matched input sequence
60 lex::token_def
<> ellipses
, identifier
, number
;
63 ///////////////////////////////////////////////////////////////////////////////
65 ///////////////////////////////////////////////////////////////////////////////
66 template <typename Iterator
, typename Lexer
>
67 struct example3_grammar
68 : qi::grammar
<Iterator
, qi::in_state_skipper
<Lexer
> >
70 template <typename TokenDef
>
71 example3_grammar(TokenDef
const& tok
)
72 : example3_grammar::base_type(start
)
75 = +(couplet
| tok
.ellipses
)
78 // A couplet matches nested left and right parenthesis.
80 // (1) (1 2) (1 2 3) ...
81 // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
85 | '(' >> +couplet
>> ')'
88 BOOST_SPIRIT_DEBUG_NODE(start
);
89 BOOST_SPIRIT_DEBUG_NODE(couplet
);
92 qi::rule
<Iterator
, qi::in_state_skipper
<Lexer
> > start
, couplet
;
95 ///////////////////////////////////////////////////////////////////////////////
98 // iterator type used to expose the underlying input stream
99 typedef std::string::iterator base_iterator_type
;
101 // This is the token type to return from the lexer iterator
102 typedef lex::lexertl::token
<base_iterator_type
> token_type
;
104 // This is the lexer type to use to tokenize the input.
105 // Here we use the lexertl based lexer engine.
106 typedef lex::lexertl::lexer
<token_type
> lexer_type
;
108 // This is the token definition type (derived from the given lexer type).
109 typedef example3_tokens
<lexer_type
> example3_tokens
;
111 // this is the iterator type exposed by the lexer
112 typedef example3_tokens::iterator_type iterator_type
;
114 // this is the type of the grammar to parse
115 typedef example3_grammar
<iterator_type
, example3_tokens::lexer_def
> example3_grammar
;
117 // now we use the types defined above to create the lexer and grammar
118 // object instances needed to invoke the parsing process
119 example3_tokens tokens
; // Our lexer
120 example3_grammar
calc(tokens
); // Our parser
122 std::string
str (read_from_file("example3.input"));
124 // At this point we generate the iterator pair used to expose the
125 // tokenized input stream.
126 std::string::iterator it
= str
.begin();
127 iterator_type iter
= tokens
.begin(it
, str
.end());
128 iterator_type end
= tokens
.end();
130 // Parsing is done based on the token stream, not the character
131 // stream read from the input.
132 // Note how we use the lexer defined above as the skip parser.
133 bool r
= qi::phrase_parse(iter
, end
, calc
, qi::in_state("WS")[tokens
.self
]);
135 if (r
&& iter
== end
)
137 std::cout
<< "-------------------------\n";
138 std::cout
<< "Parsing succeeded\n";
139 std::cout
<< "-------------------------\n";
143 std::cout
<< "-------------------------\n";
144 std::cout
<< "Parsing failed\n";
145 std::cout
<< "-------------------------\n";
148 std::cout
<< "Bye... :-) \n\n";