]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2010 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | // This example shows how to create a simple lexer recognizing a couple of | |
7 | // different tokens aimed at a simple language and how to use this lexer with | |
8 | // a grammar. It shows how to associate attributes to tokens and how to access | |
9 | // the token attributes from inside the grammar. | |
10 | // | |
11 | // We use explicit token attribute types, making the corresponding token instances | |
12 | // carry convert the matched input into an instance of that type. The token | |
13 | // attribute is exposed as the parser attribute if this token is used as a | |
14 | // parser component somewhere in a grammar. | |
15 | // | |
16 | // Additionally, this example demonstrates, how to define a token set usable | |
17 | // as the skip parser during parsing, allowing to define several tokens to be | |
18 | // ignored. | |
19 | // | |
20 | // This example recognizes a very simple programming language having | |
21 | // assignment statements and if and while control structures. Look at the file | |
22 | // example4.input for an example. | |
23 | ||
7c673cae FG |
24 | #include <boost/spirit/include/qi.hpp> |
25 | #include <boost/spirit/include/lex_lexertl.hpp> | |
1e59de90 | 26 | #include <boost/phoenix/operator.hpp> |
7c673cae FG |
27 | |
28 | #include <iostream> | |
29 | #include <fstream> | |
30 | #include <string> | |
31 | ||
32 | #include "example.hpp" | |
33 | ||
34 | using namespace boost::spirit; | |
35 | using boost::phoenix::val; | |
36 | ||
37 | /////////////////////////////////////////////////////////////////////////////// | |
38 | // Token definition | |
39 | /////////////////////////////////////////////////////////////////////////////// | |
40 | template <typename Lexer> | |
41 | struct example4_tokens : lex::lexer<Lexer> | |
42 | { | |
43 | example4_tokens() | |
44 | { | |
45 | // define the tokens to match | |
46 | identifier = "[a-zA-Z_][a-zA-Z0-9_]*"; | |
47 | constant = "[0-9]+"; | |
48 | if_ = "if"; | |
49 | else_ = "else"; | |
50 | while_ = "while"; | |
51 | ||
52 | // associate the tokens and the token set with the lexer | |
53 | this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant; | |
54 | this->self += if_ | else_ | while_ | identifier; | |
55 | ||
56 | // define the whitespace to ignore (spaces, tabs, newlines and C-style | |
57 | // comments) | |
58 | this->self("WS") | |
59 | = lex::token_def<>("[ \\t\\n]+") | |
60 | | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" | |
61 | ; | |
62 | } | |
63 | ||
64 | //[example4_token_def | |
65 | // these tokens expose the iterator_range of the matched input sequence | |
66 | lex::token_def<> if_, else_, while_; | |
67 | ||
68 | // The following two tokens have an associated attribute type, 'identifier' | |
69 | // carries a string (the identifier name) and 'constant' carries the | |
70 | // matched integer value. | |
71 | // | |
72 | // Note: any token attribute type explicitly specified in a token_def<> | |
73 | // declaration needs to be listed during token type definition as | |
74 | // well (see the typedef for the token_type below). | |
75 | // | |
76 | // The conversion of the matched input to an instance of this type occurs | |
77 | // once (on first access), which makes token attributes as efficient as | |
78 | // possible. Moreover, token instances are constructed once by the lexer | |
79 | // library. From this point on tokens are passed by reference only, | |
80 | // avoiding them being copied around. | |
81 | lex::token_def<std::string> identifier; | |
82 | lex::token_def<unsigned int> constant; | |
83 | //] | |
84 | }; | |
85 | ||
86 | /////////////////////////////////////////////////////////////////////////////// | |
87 | // Grammar definition | |
88 | /////////////////////////////////////////////////////////////////////////////// | |
89 | template <typename Iterator, typename Lexer> | |
90 | struct example4_grammar | |
91 | : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > | |
92 | { | |
93 | template <typename TokenDef> | |
94 | example4_grammar(TokenDef const& tok) | |
95 | : example4_grammar::base_type(program) | |
96 | { | |
97 | using boost::spirit::_val; | |
98 | ||
99 | program | |
100 | = +block | |
101 | ; | |
102 | ||
103 | block | |
104 | = '{' >> *statement >> '}' | |
105 | ; | |
106 | ||
107 | statement | |
108 | = assignment | |
109 | | if_stmt | |
110 | | while_stmt | |
111 | ; | |
112 | ||
113 | assignment | |
114 | = (tok.identifier >> '=' >> expression >> ';') | |
115 | [ | |
116 | std::cout << val("assignment statement to: ") << _1 << "\n" | |
117 | ] | |
118 | ; | |
119 | ||
120 | if_stmt | |
121 | = ( tok.if_ >> '(' >> expression >> ')' >> block | |
122 | >> -(tok.else_ >> block) | |
123 | ) | |
124 | [ | |
125 | std::cout << val("if expression: ") << _2 << "\n" | |
126 | ] | |
127 | ; | |
128 | ||
129 | while_stmt | |
130 | = (tok.while_ >> '(' >> expression >> ')' >> block) | |
131 | [ | |
132 | std::cout << val("while expression: ") << _2 << "\n" | |
133 | ] | |
134 | ; | |
135 | ||
136 | // since expression has a variant return type accommodating for | |
137 | // std::string and unsigned integer, both possible values may be | |
138 | // returned to the calling rule | |
139 | expression | |
140 | = tok.identifier [ _val = _1 ] | |
141 | | tok.constant [ _val = _1 ] | |
142 | ; | |
143 | } | |
144 | ||
145 | typedef boost::variant<unsigned int, std::string> expression_type; | |
146 | ||
147 | qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement; | |
148 | qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt; | |
149 | qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt; | |
150 | ||
151 | // the expression is the only rule having a return value | |
152 | qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression; | |
153 | }; | |
154 | ||
155 | /////////////////////////////////////////////////////////////////////////////// | |
156 | int main() | |
157 | { | |
158 | // iterator type used to expose the underlying input stream | |
159 | typedef std::string::iterator base_iterator_type; | |
160 | ||
161 | //[example4_token | |
162 | // This is the lexer token type to use. The second template parameter lists | |
163 | // all attribute types used for token_def's during token definition (see | |
164 | // calculator_tokens<> above). Here we use the predefined lexertl token | |
165 | // type, but any compatible token type may be used instead. | |
166 | // | |
167 | // If you don't list any token attribute types in the following declaration | |
168 | // (or just use the default token type: lexertl_token<base_iterator_type>) | |
169 | // it will compile and work just fine, just a bit less efficient. This is | |
170 | // because the token attribute will be generated from the matched input | |
171 | // sequence every time it is requested. But as soon as you specify at | |
172 | // least one token attribute type you'll have to list all attribute types | |
173 | // used for token_def<> declarations in the token definition class above, | |
174 | // otherwise compilation errors will occur. | |
175 | typedef lex::lexertl::token< | |
176 | base_iterator_type, boost::mpl::vector<unsigned int, std::string> | |
177 | > token_type; | |
178 | //] | |
179 | // Here we use the lexertl based lexer engine. | |
180 | typedef lex::lexertl::lexer<token_type> lexer_type; | |
181 | ||
182 | // This is the token definition type (derived from the given lexer type). | |
183 | typedef example4_tokens<lexer_type> example4_tokens; | |
184 | ||
185 | // this is the iterator type exposed by the lexer | |
186 | typedef example4_tokens::iterator_type iterator_type; | |
187 | ||
188 | // this is the type of the grammar to parse | |
189 | typedef example4_grammar<iterator_type, example4_tokens::lexer_def> example4_grammar; | |
190 | ||
191 | // now we use the types defined above to create the lexer and grammar | |
192 | // object instances needed to invoke the parsing process | |
193 | example4_tokens tokens; // Our lexer | |
194 | example4_grammar calc(tokens); // Our parser | |
195 | ||
196 | std::string str (read_from_file("example4.input")); | |
197 | ||
198 | // At this point we generate the iterator pair used to expose the | |
199 | // tokenized input stream. | |
200 | std::string::iterator it = str.begin(); | |
201 | iterator_type iter = tokens.begin(it, str.end()); | |
202 | iterator_type end = tokens.end(); | |
203 | ||
204 | // Parsing is done based on the token stream, not the character | |
205 | // stream read from the input. | |
206 | // Note how we use the lexer defined above as the skip parser. It must | |
207 | // be explicitly wrapped inside a state directive, switching the lexer | |
208 | // state for the duration of skipping whitespace. | |
209 | bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]); | |
210 | ||
211 | if (r && iter == end) | |
212 | { | |
213 | std::cout << "-------------------------\n"; | |
214 | std::cout << "Parsing succeeded\n"; | |
215 | std::cout << "-------------------------\n"; | |
216 | } | |
217 | else | |
218 | { | |
219 | std::cout << "-------------------------\n"; | |
220 | std::cout << "Parsing failed\n"; | |
221 | std::cout << "-------------------------\n"; | |
222 | } | |
223 | ||
224 | std::cout << "Bye... :-) \n\n"; | |
225 | return 0; | |
226 | } |