]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM) | |
7 | #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/spirit/home/qi/skip_over.hpp> | |
14 | #include <boost/spirit/home/qi/parse.hpp> | |
15 | #include <boost/spirit/home/qi/nonterminal/grammar.hpp> | |
16 | #include <boost/spirit/home/support/unused.hpp> | |
17 | #include <boost/spirit/home/lex/lexer.hpp> | |
18 | #include <boost/mpl/assert.hpp> | |
19 | ||
20 | namespace boost { namespace spirit { namespace lex | |
21 | { | |
22 | /////////////////////////////////////////////////////////////////////////// | |
23 | // Import skip_flag enumerator type from Qi namespace | |
24 | using qi::skip_flag; | |
25 | ||
26 | /////////////////////////////////////////////////////////////////////////// | |
27 | // | |
28 | // The tokenize_and_parse() function is one of the main Spirit API | |
29 | // functions. It simplifies using a lexer as the underlying token source | |
30 | // while parsing a given input sequence. | |
31 | // | |
32 | // The function takes a pair of iterators spanning the underlying input | |
33 | // stream to parse, the lexer object (built from the token definitions) | |
34 | // and a parser object (built from the parser grammar definition). | |
35 | // | |
36 | // The second version of this function additionally takes an attribute to | |
37 | // be used as the top level data structure instance the parser should use | |
38 | // to store the recognized input to. | |
39 | // | |
40 | // The function returns true if the parsing succeeded (the given input | |
41 | // sequence has been successfully matched by the given grammar). | |
42 | // | |
43 | // first, last: The pair of iterators spanning the underlying input | |
44 | // sequence to parse. These iterators must at least | |
45 | // conform to the requirements of the std::intput_iterator | |
46 | // category. | |
47 | // On exit the iterator 'first' will be updated to the | |
48 | // position right after the last successfully matched | |
49 | // token. | |
50 | // lex: The lexer object (encoding the token definitions) to be | |
51 | // used to convert the input sequence into a sequence of | |
52 | // tokens. This token sequence is passed to the parsing | |
53 | // process. The LexerExpr type must conform to the | |
54 | // lexer interface described in the corresponding section | |
55 | // of the documentation. | |
56 | // xpr: The grammar object (encoding the parser grammar) to be | |
57 | // used to match the token sequence generated by the lex | |
58 | // object instance. The ParserExpr type must conform to | |
59 | // the grammar interface described in the corresponding | |
60 | // section of the documentation. | |
61 | // attr: The top level attribute passed to the parser. It will | |
62 | // be populated during the parsing of the input sequence. | |
63 | // On exit it will hold the 'parser result' corresponding | |
64 | // to the matched input sequence. | |
65 | // | |
66 | /////////////////////////////////////////////////////////////////////////// | |
67 | template <typename Iterator, typename Lexer, typename ParserExpr> | |
68 | inline bool | |
69 | tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex, | |
70 | ParserExpr const& xpr) | |
71 | { | |
72 | // Report invalid expression error as early as possible. | |
73 | // If you got an error_invalid_expression error message here, | |
74 | // then the expression (expr) is not a valid spirit qi expression. | |
75 | BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); | |
76 | ||
77 | typename Lexer::iterator_type iter = lex.begin(first, last); | |
78 | return compile<qi::domain>(xpr).parse( | |
79 | iter, lex.end(), unused, unused, unused); | |
80 | } | |
81 | ||
82 | /////////////////////////////////////////////////////////////////////////// | |
83 | template <typename Iterator, typename Lexer, typename ParserExpr | |
84 | , typename Attribute> | |
85 | inline bool | |
86 | tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex | |
87 | , ParserExpr const& xpr, Attribute& attr) | |
88 | { | |
89 | // Report invalid expression error as early as possible. | |
90 | // If you got an error_invalid_expression error message here, | |
91 | // then the expression (expr) is not a valid spirit qi expression. | |
92 | BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); | |
93 | ||
94 | typename Lexer::iterator_type iter = lex.begin(first, last); | |
95 | return compile<qi::domain>(xpr).parse( | |
96 | iter, lex.end(), unused, unused, attr); | |
97 | } | |
98 | ||
99 | /////////////////////////////////////////////////////////////////////////// | |
100 | // | |
101 | // The tokenize_and_phrase_parse() function is one of the main Spirit API | |
102 | // functions. It simplifies using a lexer as the underlying token source | |
103 | // while phrase parsing a given input sequence. | |
104 | // | |
105 | // The function takes a pair of iterators spanning the underlying input | |
106 | // stream to parse, the lexer object (built from the token definitions) | |
107 | // and a parser object (built from the parser grammar definition). The | |
108 | // additional skipper parameter will be used as the skip parser during | |
109 | // the parsing process. | |
110 | // | |
111 | // The second version of this function additionally takes an attribute to | |
112 | // be used as the top level data structure instance the parser should use | |
113 | // to store the recognized input to. | |
114 | // | |
115 | // The function returns true if the parsing succeeded (the given input | |
116 | // sequence has been successfully matched by the given grammar). | |
117 | // | |
118 | // first, last: The pair of iterators spanning the underlying input | |
119 | // sequence to parse. These iterators must at least | |
120 | // conform to the requirements of the std::intput_iterator | |
121 | // category. | |
122 | // On exit the iterator 'first' will be updated to the | |
123 | // position right after the last successfully matched | |
124 | // token. | |
125 | // lex: The lexer object (encoding the token definitions) to be | |
126 | // used to convert the input sequence into a sequence of | |
127 | // tokens. This token sequence is passed to the parsing | |
128 | // process. The LexerExpr type must conform to the | |
129 | // lexer interface described in the corresponding section | |
130 | // of the documentation. | |
131 | // xpr: The grammar object (encoding the parser grammar) to be | |
132 | // used to match the token sequence generated by the lex | |
133 | // object instance. The ParserExpr type must conform to | |
134 | // the grammar interface described in the corresponding | |
135 | // section of the documentation. | |
136 | // skipper: The skip parser to be used while parsing the given | |
137 | // input sequence. Note, the skip parser will have to | |
138 | // act on the same token sequence as the main parser | |
139 | // 'xpr'. | |
140 | // post_skip: The post_skip flag controls whether the function will | |
141 | // invoke an additional post skip after the main parser | |
142 | // returned. | |
143 | // attr: The top level attribute passed to the parser. It will | |
144 | // be populated during the parsing of the input sequence. | |
145 | // On exit it will hold the 'parser result' corresponding | |
146 | // to the matched input sequence. | |
147 | // | |
148 | /////////////////////////////////////////////////////////////////////////// | |
149 | template <typename Iterator, typename Lexer, typename ParserExpr | |
150 | , typename Skipper> | |
151 | inline bool | |
152 | tokenize_and_phrase_parse(Iterator& first, Iterator last | |
153 | , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper | |
154 | , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip) | |
155 | { | |
156 | // Report invalid expression error as early as possible. | |
157 | // If you got an error_invalid_expression error message here, | |
158 | // then the expression (expr) is not a valid spirit qi expression. | |
159 | BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); | |
160 | BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper); | |
161 | ||
162 | typedef | |
163 | typename spirit::result_of::compile<qi::domain, Skipper>::type | |
164 | skipper_type; | |
165 | skipper_type const skipper_ = compile<qi::domain>(skipper); | |
166 | ||
167 | typename Lexer::iterator_type iter = lex.begin(first, last); | |
168 | typename Lexer::iterator_type end = lex.end(); | |
169 | if (!compile<qi::domain>(xpr).parse( | |
170 | iter, end, unused, skipper_, unused)) | |
171 | return false; | |
172 | ||
173 | // do a final post-skip | |
174 | if (post_skip == skip_flag::postskip) | |
175 | qi::skip_over(iter, end, skipper_); | |
176 | return true; | |
177 | } | |
178 | ||
179 | template <typename Iterator, typename Lexer, typename ParserExpr | |
180 | , typename Skipper, typename Attribute> | |
181 | inline bool | |
182 | tokenize_and_phrase_parse(Iterator& first, Iterator last | |
183 | , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper | |
184 | , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr) | |
185 | { | |
186 | // Report invalid expression error as early as possible. | |
187 | // If you got an error_invalid_expression error message here, | |
188 | // then the expression (expr) is not a valid spirit qi expression. | |
189 | BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); | |
190 | BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper); | |
191 | ||
192 | typedef | |
193 | typename spirit::result_of::compile<qi::domain, Skipper>::type | |
194 | skipper_type; | |
195 | skipper_type const skipper_ = compile<qi::domain>(skipper); | |
196 | ||
197 | typename Lexer::iterator_type iter = lex.begin(first, last); | |
198 | typename Lexer::iterator_type end = lex.end(); | |
199 | if (!compile<qi::domain>(xpr).parse( | |
200 | iter, end, unused, skipper_, attr)) | |
201 | return false; | |
202 | ||
203 | // do a final post-skip | |
204 | if (post_skip == skip_flag::postskip) | |
205 | qi::skip_over(iter, end, skipper_); | |
206 | return true; | |
207 | } | |
208 | ||
209 | /////////////////////////////////////////////////////////////////////////// | |
210 | template <typename Iterator, typename Lexer, typename ParserExpr | |
211 | , typename Skipper, typename Attribute> | |
212 | inline bool | |
213 | tokenize_and_phrase_parse(Iterator& first, Iterator last | |
214 | , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper | |
215 | , Attribute& attr) | |
216 | { | |
217 | return tokenize_and_phrase_parse(first, last, lex, xpr, skipper | |
218 | , skip_flag::postskip, attr); | |
219 | } | |
220 | ||
221 | /////////////////////////////////////////////////////////////////////////// | |
222 | // | |
223 | // The tokenize() function is one of the main Spirit API functions. It | |
224 | // simplifies using a lexer to tokenize a given input sequence. It's main | |
225 | // purpose is to use the lexer to tokenize all the input. | |
226 | // | |
227 | // The second version below discards all generated tokens afterwards. | |
228 | // This is useful whenever all the needed functionality has been | |
229 | // implemented directly inside the lexer semantic actions, which are being | |
230 | // executed while the tokens are matched. | |
231 | // | |
232 | // The function takes a pair of iterators spanning the underlying input | |
233 | // stream to scan, the lexer object (built from the token definitions), | |
234 | // and a (optional) functor being called for each of the generated tokens. | |
235 | // | |
236 | // The function returns true if the scanning of the input succeeded (the | |
237 | // given input sequence has been successfully matched by the given token | |
238 | // definitions). | |
239 | // | |
240 | // first, last: The pair of iterators spanning the underlying input | |
241 | // sequence to parse. These iterators must at least | |
242 | // conform to the requirements of the std::intput_iterator | |
243 | // category. | |
244 | // On exit the iterator 'first' will be updated to the | |
245 | // position right after the last successfully matched | |
246 | // token. | |
247 | // lex: The lexer object (encoding the token definitions) to be | |
248 | // used to convert the input sequence into a sequence of | |
249 | // tokens. The LexerExpr type must conform to the | |
250 | // lexer interface described in the corresponding section | |
251 | // of the documentation. | |
252 | // f: A functor (callable object) taking a single argument of | |
253 | // the token type and returning a bool, indicating whether | |
254 | // the tokenization should be canceled. | |
255 | // initial_state: The name of the state the lexer should start matching. | |
256 | // The default value is zero, causing the lexer to start | |
257 | // in its 'INITIAL' state. | |
258 | // | |
259 | /////////////////////////////////////////////////////////////////////////// | |
260 | namespace detail | |
261 | { | |
262 | template <typename Token, typename F> | |
263 | bool tokenize_callback(Token const& t, F f) | |
264 | { | |
265 | return f(t); | |
266 | } | |
267 | ||
268 | template <typename Token, typename Eval> | |
269 | bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f) | |
270 | { | |
271 | f(t); | |
272 | return true; | |
273 | } | |
274 | ||
275 | template <typename Token> | |
276 | bool tokenize_callback(Token const& t, void (*f)(Token const&)) | |
277 | { | |
278 | f(t); | |
279 | return true; | |
280 | } | |
281 | ||
282 | template <typename Token> | |
283 | bool tokenize_callback(Token const& t, bool (*f)(Token const&)) | |
284 | { | |
285 | return f(t); | |
286 | } | |
287 | } | |
288 | ||
289 | template <typename Iterator, typename Lexer, typename F> | |
290 | inline bool | |
291 | tokenize(Iterator& first, Iterator last, Lexer const& lex, F f | |
292 | , typename Lexer::char_type const* initial_state = 0) | |
293 | { | |
294 | typedef typename Lexer::iterator_type iterator_type; | |
295 | ||
296 | iterator_type iter = lex.begin(first, last, initial_state); | |
297 | iterator_type end = lex.end(); | |
298 | for (/**/; iter != end && token_is_valid(*iter); ++iter) | |
299 | { | |
300 | if (!detail::tokenize_callback(*iter, f)) | |
301 | return false; | |
302 | } | |
303 | return (iter == end) ? true : false; | |
304 | } | |
305 | ||
306 | /////////////////////////////////////////////////////////////////////////// | |
307 | template <typename Iterator, typename Lexer> | |
308 | inline bool | |
309 | tokenize(Iterator& first, Iterator last, Lexer const& lex | |
310 | , typename Lexer::char_type const* initial_state = 0) | |
311 | { | |
312 | typedef typename Lexer::iterator_type iterator_type; | |
313 | ||
314 | iterator_type iter = lex.begin(first, last, initial_state); | |
315 | iterator_type end = lex.end(); | |
316 | ||
317 | while (iter != end && token_is_valid(*iter)) | |
318 | ++iter; | |
319 | ||
320 | return (iter == end) ? true : false; | |
321 | } | |
322 | ||
323 | }}} | |
324 | ||
325 | #endif |