]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM) | |
7 | #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/spirit/home/qi/detail/assign_to.hpp> | |
14 | #include <boost/spirit/home/support/detail/lexer/generator.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
16 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
17 | #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp> | |
18 | #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp> | |
19 | #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp> | |
20 | #include <boost/mpl/bool.hpp> | |
21 | #include <boost/optional.hpp> | |
22 | ||
23 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
24 | { | |
25 | namespace detail | |
26 | { | |
27 | /////////////////////////////////////////////////////////////////////// | |
28 | template <typename Iterator, typename HasActors, typename HasState | |
29 | , typename TokenValue> | |
30 | class data; // no default specialization | |
31 | ||
32 | /////////////////////////////////////////////////////////////////////// | |
33 | // neither supports state, nor actors | |
34 | template <typename Iterator, typename TokenValue> | |
35 | class data<Iterator, mpl::false_, mpl::false_, TokenValue> | |
36 | { | |
37 | protected: | |
38 | typedef typename | |
39 | boost::detail::iterator_traits<Iterator>::value_type | |
40 | char_type; | |
41 | ||
42 | public: | |
43 | typedef Iterator base_iterator_type; | |
44 | typedef iterator_range<Iterator> token_value_type; | |
45 | typedef token_value_type get_value_type; | |
46 | typedef std::size_t state_type; | |
47 | typedef char_type const* state_name_type; | |
48 | typedef unused_type semantic_actions_type; | |
49 | typedef detail::wrap_action<unused_type, Iterator, data, std::size_t> | |
50 | wrap_action_type; | |
51 | ||
52 | typedef unused_type next_token_functor; | |
53 | typedef unused_type get_state_name_type; | |
54 | ||
55 | // initialize the shared data | |
56 | template <typename IterData> | |
57 | data (IterData const& data_, Iterator& first, Iterator const& last) | |
58 | : first_(first), last_(last) | |
59 | , state_machine_(data_.state_machine_) | |
60 | , rules_(data_.rules_) | |
61 | , bol_(data_.state_machine_.data()._seen_BOL_assertion) {} | |
62 | ||
63 | // The following functions are used by the implementation of the | |
64 | // placeholder '_state'. | |
65 | template <typename Char> | |
66 | void set_state_name (Char const*) | |
67 | { | |
68 | // some (random) versions of gcc instantiate this function even if it's not | |
69 | // needed leading to false static asserts | |
70 | #if !defined(__GNUC__) | |
71 | // If you see a compile time assertion below you're probably | |
72 | // using a token type not supporting lexer states (the 3rd | |
73 | // template parameter of the token is mpl::false_), but your | |
74 | // code uses state changes anyways. | |
75 | BOOST_STATIC_ASSERT(false); | |
76 | #endif | |
77 | } | |
78 | char_type const* get_state_name() const { return rules_.initial(); } | |
79 | std::size_t get_state_id (char_type const*) const | |
80 | { | |
81 | return 0; | |
82 | } | |
83 | ||
84 | // The function get_eoi() is used by the implementation of the | |
85 | // placeholder '_eoi'. | |
86 | Iterator const& get_eoi() const { return last_; } | |
87 | ||
88 | // The function less() is used by the implementation of the support | |
89 | // function lex::less(). Its functionality is equivalent to flex' | |
90 | // function yyless(): it returns an iterator positioned to the | |
91 | // nth input character beyond the current start iterator (i.e. by | |
92 | // assigning the return value to the placeholder '_end' it is | |
93 | // possible to return all but the first n characters of the current | |
94 | // token back to the input stream. | |
95 | // | |
96 | // This function does nothing as long as no semantic actions are | |
97 | // used. | |
98 | Iterator const& less(Iterator const& it, int) | |
99 | { | |
100 | // The following assertion fires most likely because you are | |
101 | // using lexer semantic actions without using the actor_lexer | |
102 | // as the base class for your token definition class. | |
103 | BOOST_ASSERT(false && | |
104 | "Are you using lexer semantic actions without using the " | |
105 | "actor_lexer base?"); | |
106 | return it; | |
107 | } | |
108 | ||
109 | // The function more() is used by the implementation of the support | |
110 | // function lex::more(). Its functionality is equivalent to flex' | |
111 | // function yymore(): it tells the lexer that the next time it | |
112 | // matches a rule, the corresponding token should be appended onto | |
113 | // the current token value rather than replacing it. | |
114 | // | |
115 | // These functions do nothing as long as no semantic actions are | |
116 | // used. | |
117 | void more() | |
118 | { | |
119 | // The following assertion fires most likely because you are | |
120 | // using lexer semantic actions without using the actor_lexer | |
121 | // as the base class for your token definition class. | |
122 | BOOST_ASSERT(false && | |
123 | "Are you using lexer semantic actions without using the " | |
124 | "actor_lexer base?"); | |
125 | } | |
126 | bool adjust_start() { return false; } | |
127 | void revert_adjust_start() {} | |
128 | ||
129 | // The function lookahead() is used by the implementation of the | |
130 | // support function lex::lookahead. It can be used to implement | |
131 | // lookahead for lexer engines not supporting constructs like flex' | |
132 | // a/b (match a, but only when followed by b): | |
133 | // | |
134 | // This function does nothing as long as no semantic actions are | |
135 | // used. | |
136 | bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) | |
137 | { | |
138 | // The following assertion fires most likely because you are | |
139 | // using lexer semantic actions without using the actor_lexer | |
140 | // as the base class for your token definition class. | |
141 | BOOST_ASSERT(false && | |
142 | "Are you using lexer semantic actions without using the " | |
143 | "actor_lexer base?"); | |
144 | return false; | |
145 | } | |
146 | ||
147 | // the functions next, invoke_actions, and get_state are used by | |
148 | // the functor implementation below | |
149 | ||
150 | // The function next() tries to match the next token from the | |
151 | // underlying input sequence. | |
152 | std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) | |
153 | { | |
154 | prev_bol = bol_; | |
155 | ||
156 | typedef basic_iterator_tokeniser<Iterator> tokenizer; | |
157 | return tokenizer::next(state_machine_, bol_, end, last_ | |
158 | , unique_id); | |
159 | } | |
160 | ||
161 | // nothing to invoke, so this is empty | |
162 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t | |
163 | , std::size_t, std::size_t, Iterator const&) | |
164 | { | |
165 | return pass_flags::pass_normal; // always accept | |
166 | } | |
167 | ||
168 | std::size_t get_state() const { return 0; } | |
169 | void set_state(std::size_t) {} | |
170 | ||
171 | void set_end(Iterator const& /*it*/) {} | |
172 | ||
173 | Iterator& get_first() { return first_; } | |
174 | Iterator const& get_first() const { return first_; } | |
175 | Iterator const& get_last() const { return last_; } | |
176 | ||
177 | iterator_range<Iterator> get_value() const | |
178 | { | |
179 | return iterator_range<Iterator>(first_, last_); | |
180 | } | |
181 | bool has_value() const { return false; } | |
182 | void reset_value() {} | |
183 | ||
184 | void reset_bol(bool bol) { bol_ = bol; } | |
185 | ||
186 | protected: | |
187 | Iterator& first_; | |
188 | Iterator last_; | |
189 | ||
190 | boost::lexer::basic_state_machine<char_type> const& state_machine_; | |
191 | boost::lexer::basic_rules<char_type> const& rules_; | |
192 | ||
193 | bool bol_; // helper storing whether last character was \n | |
194 | ||
195 | private: | |
196 | // silence MSVC warning C4512: assignment operator could not be generated | |
197 | data& operator= (data const&); | |
198 | }; | |
199 | ||
200 | /////////////////////////////////////////////////////////////////////// | |
201 | // doesn't support lexer semantic actions, but supports state | |
202 | template <typename Iterator, typename TokenValue> | |
203 | class data<Iterator, mpl::false_, mpl::true_, TokenValue> | |
204 | : public data<Iterator, mpl::false_, mpl::false_, TokenValue> | |
205 | { | |
206 | protected: | |
207 | typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type; | |
208 | typedef typename base_type::char_type char_type; | |
209 | ||
210 | public: | |
211 | typedef Iterator base_iterator_type; | |
212 | typedef iterator_range<Iterator> token_value_type; | |
213 | typedef token_value_type get_value_type; | |
214 | typedef typename base_type::state_type state_type; | |
215 | typedef typename base_type::state_name_type state_name_type; | |
216 | typedef typename base_type::semantic_actions_type | |
217 | semantic_actions_type; | |
218 | ||
219 | // initialize the shared data | |
220 | template <typename IterData> | |
221 | data (IterData const& data_, Iterator& first, Iterator const& last) | |
222 | : base_type(data_, first, last) | |
223 | , state_(0) {} | |
224 | ||
225 | // The following functions are used by the implementation of the | |
226 | // placeholder '_state'. | |
227 | void set_state_name (char_type const* new_state) | |
228 | { | |
229 | std::size_t state_id = this->rules_.state(new_state); | |
230 | ||
231 | // If the following assertion fires you've probably been using | |
232 | // a lexer state name which was not defined in your token | |
233 | // definition. | |
234 | BOOST_ASSERT(state_id != boost::lexer::npos); | |
235 | ||
236 | if (state_id != boost::lexer::npos) | |
237 | state_ = state_id; | |
238 | } | |
239 | char_type const* get_state_name() const | |
240 | { | |
241 | return this->rules_.state(state_); | |
242 | } | |
243 | std::size_t get_state_id (char_type const* state) const | |
244 | { | |
245 | return this->rules_.state(state); | |
246 | } | |
247 | ||
248 | // the functions next() and get_state() are used by the functor | |
249 | // implementation below | |
250 | ||
251 | // The function next() tries to match the next token from the | |
252 | // underlying input sequence. | |
253 | std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) | |
254 | { | |
255 | prev_bol = this->bol_; | |
256 | ||
257 | typedef basic_iterator_tokeniser<Iterator> tokenizer; | |
258 | return tokenizer::next(this->state_machine_, state_, | |
259 | this->bol_, end, this->get_eoi(), unique_id); | |
260 | } | |
261 | ||
262 | std::size_t& get_state() { return state_; } | |
263 | void set_state(std::size_t state) { state_ = state; } | |
264 | ||
265 | protected: | |
266 | std::size_t state_; | |
267 | ||
268 | private: | |
269 | // silence MSVC warning C4512: assignment operator could not be generated | |
270 | data& operator= (data const&); | |
271 | }; | |
272 | ||
273 | /////////////////////////////////////////////////////////////////////// | |
274 | // does support lexer semantic actions, may support state | |
275 | template <typename Iterator, typename HasState, typename TokenValue> | |
276 | class data<Iterator, mpl::true_, HasState, TokenValue> | |
277 | : public data<Iterator, mpl::false_, HasState, TokenValue> | |
278 | { | |
279 | public: | |
280 | typedef semantic_actions<Iterator, HasState, data> | |
281 | semantic_actions_type; | |
282 | ||
283 | protected: | |
284 | typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type; | |
285 | typedef typename base_type::char_type char_type; | |
286 | typedef typename semantic_actions_type::functor_wrapper_type | |
287 | functor_wrapper_type; | |
288 | ||
289 | public: | |
290 | typedef Iterator base_iterator_type; | |
291 | typedef TokenValue token_value_type; | |
292 | typedef TokenValue const& get_value_type; | |
293 | typedef typename base_type::state_type state_type; | |
294 | typedef typename base_type::state_name_type state_name_type; | |
295 | ||
296 | typedef detail::wrap_action<functor_wrapper_type | |
297 | , Iterator, data, std::size_t> wrap_action_type; | |
298 | ||
299 | template <typename IterData> | |
300 | data (IterData const& data_, Iterator& first, Iterator const& last) | |
301 | : base_type(data_, first, last) | |
302 | , actions_(data_.actions_), hold_() | |
303 | , value_(iterator_range<Iterator>(last, last)) | |
304 | , has_value_(false), has_hold_(false) {} | |
305 | ||
306 | // invoke attached semantic actions, if defined | |
307 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state | |
308 | , std::size_t& id, std::size_t unique_id, Iterator& end) | |
309 | { | |
310 | return actions_.invoke_actions(state, id, unique_id, end, *this); | |
311 | } | |
312 | ||
313 | // The function less() is used by the implementation of the support | |
314 | // function lex::less(). Its functionality is equivalent to flex' | |
315 | // function yyless(): it returns an iterator positioned to the | |
316 | // nth input character beyond the current start iterator (i.e. by | |
317 | // assigning the return value to the placeholder '_end' it is | |
318 | // possible to return all but the first n characters of the current | |
319 | // token back to the input stream). | |
320 | Iterator const& less(Iterator& it, int n) | |
321 | { | |
322 | it = this->get_first(); | |
323 | std::advance(it, n); | |
324 | return it; | |
325 | } | |
326 | ||
327 | // The function more() is used by the implementation of the support | |
328 | // function lex::more(). Its functionality is equivalent to flex' | |
329 | // function yymore(): it tells the lexer that the next time it | |
330 | // matches a rule, the corresponding token should be appended onto | |
331 | // the current token value rather than replacing it. | |
332 | void more() | |
333 | { | |
334 | hold_ = this->get_first(); | |
335 | has_hold_ = true; | |
336 | } | |
337 | ||
338 | // The function lookahead() is used by the implementation of the | |
339 | // support function lex::lookahead. It can be used to implement | |
340 | // lookahead for lexer engines not supporting constructs like flex' | |
341 | // a/b (match a, but only when followed by b) | |
342 | bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) | |
343 | { | |
344 | Iterator end = end_; | |
345 | std::size_t unique_id = boost::lexer::npos; | |
346 | bool bol = this->bol_; | |
347 | ||
348 | if (std::size_t(~0) == state) | |
349 | state = this->state_; | |
350 | ||
351 | typedef basic_iterator_tokeniser<Iterator> tokenizer; | |
352 | return id == tokenizer::next(this->state_machine_, state, | |
353 | bol, end, this->get_eoi(), unique_id); | |
354 | } | |
355 | ||
356 | // The adjust_start() and revert_adjust_start() are helper | |
357 | // functions needed to implement the functionality required for | |
358 | // lex::more(). It is called from the functor body below. | |
359 | bool adjust_start() | |
360 | { | |
361 | if (!has_hold_) | |
362 | return false; | |
363 | ||
364 | std::swap(this->get_first(), hold_); | |
365 | has_hold_ = false; | |
366 | return true; | |
367 | } | |
368 | void revert_adjust_start() | |
369 | { | |
370 | // this will be called only if adjust_start above returned true | |
371 | std::swap(this->get_first(), hold_); | |
372 | has_hold_ = true; | |
373 | } | |
374 | ||
375 | TokenValue const& get_value() const | |
376 | { | |
377 | if (!has_value_) { | |
378 | value_ = iterator_range<Iterator>(this->get_first(), end_); | |
379 | has_value_ = true; | |
380 | } | |
381 | return value_; | |
382 | } | |
383 | template <typename Value> | |
384 | void set_value(Value const& val) | |
385 | { | |
386 | value_ = val; | |
387 | has_value_ = true; | |
388 | } | |
389 | void set_end(Iterator const& it) | |
390 | { | |
391 | end_ = it; | |
392 | } | |
393 | bool has_value() const { return has_value_; } | |
394 | void reset_value() { has_value_ = false; } | |
395 | ||
396 | protected: | |
397 | semantic_actions_type const& actions_; | |
398 | Iterator hold_; // iterator needed to support lex::more() | |
399 | Iterator end_; // iterator pointing to end of matched token | |
400 | mutable TokenValue value_; // token value to use | |
401 | mutable bool has_value_; // 'true' if value_ is valid | |
402 | bool has_hold_; // 'true' if hold_ is valid | |
403 | ||
404 | private: | |
405 | // silence MSVC warning C4512: assignment operator could not be generated | |
406 | data& operator= (data const&); | |
407 | }; | |
408 | ||
409 | /////////////////////////////////////////////////////////////////////// | |
410 | // does support lexer semantic actions, may support state, is used for | |
411 | // position_token exposing exactly one type | |
412 | template <typename Iterator, typename HasState, typename TokenValue> | |
413 | class data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> > | |
414 | : public data<Iterator, mpl::false_, HasState, TokenValue> | |
415 | { | |
416 | public: | |
417 | typedef semantic_actions<Iterator, HasState, data> | |
418 | semantic_actions_type; | |
419 | ||
420 | protected: | |
421 | typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type; | |
422 | typedef typename base_type::char_type char_type; | |
423 | typedef typename semantic_actions_type::functor_wrapper_type | |
424 | functor_wrapper_type; | |
425 | ||
426 | public: | |
427 | typedef Iterator base_iterator_type; | |
428 | typedef boost::optional<TokenValue> token_value_type; | |
429 | typedef boost::optional<TokenValue> const& get_value_type; | |
430 | typedef typename base_type::state_type state_type; | |
431 | typedef typename base_type::state_name_type state_name_type; | |
432 | ||
433 | typedef detail::wrap_action<functor_wrapper_type | |
434 | , Iterator, data, std::size_t> wrap_action_type; | |
435 | ||
436 | template <typename IterData> | |
437 | data (IterData const& data_, Iterator& first, Iterator const& last) | |
438 | : base_type(data_, first, last) | |
439 | , actions_(data_.actions_), hold_() | |
440 | , has_value_(false), has_hold_(false) | |
441 | { | |
442 | spirit::traits::assign_to(first, last, value_); | |
443 | has_value_ = true; | |
444 | } | |
445 | ||
446 | // invoke attached semantic actions, if defined | |
447 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state | |
448 | , std::size_t& id, std::size_t unique_id, Iterator& end) | |
449 | { | |
450 | return actions_.invoke_actions(state, id, unique_id, end, *this); | |
451 | } | |
452 | ||
453 | // The function less() is used by the implementation of the support | |
454 | // function lex::less(). Its functionality is equivalent to flex' | |
455 | // function yyless(): it returns an iterator positioned to the | |
456 | // nth input character beyond the current start iterator (i.e. by | |
457 | // assigning the return value to the placeholder '_end' it is | |
458 | // possible to return all but the first n characters of the current | |
459 | // token back to the input stream). | |
460 | Iterator const& less(Iterator& it, int n) | |
461 | { | |
462 | it = this->get_first(); | |
463 | std::advance(it, n); | |
464 | return it; | |
465 | } | |
466 | ||
467 | // The function more() is used by the implementation of the support | |
468 | // function lex::more(). Its functionality is equivalent to flex' | |
469 | // function yymore(): it tells the lexer that the next time it | |
470 | // matches a rule, the corresponding token should be appended onto | |
471 | // the current token value rather than replacing it. | |
472 | void more() | |
473 | { | |
474 | hold_ = this->get_first(); | |
475 | has_hold_ = true; | |
476 | } | |
477 | ||
478 | // The function lookahead() is used by the implementation of the | |
479 | // support function lex::lookahead. It can be used to implement | |
480 | // lookahead for lexer engines not supporting constructs like flex' | |
481 | // a/b (match a, but only when followed by b) | |
482 | bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) | |
483 | { | |
484 | Iterator end = end_; | |
485 | std::size_t unique_id = boost::lexer::npos; | |
486 | bool bol = this->bol_; | |
487 | ||
488 | if (std::size_t(~0) == state) | |
489 | state = this->state_; | |
490 | ||
491 | typedef basic_iterator_tokeniser<Iterator> tokenizer; | |
492 | return id == tokenizer::next(this->state_machine_, state, | |
493 | bol, end, this->get_eoi(), unique_id); | |
494 | } | |
495 | ||
496 | // The adjust_start() and revert_adjust_start() are helper | |
497 | // functions needed to implement the functionality required for | |
498 | // lex::more(). It is called from the functor body below. | |
499 | bool adjust_start() | |
500 | { | |
501 | if (!has_hold_) | |
502 | return false; | |
503 | ||
504 | std::swap(this->get_first(), hold_); | |
505 | has_hold_ = false; | |
506 | return true; | |
507 | } | |
508 | void revert_adjust_start() | |
509 | { | |
510 | // this will be called only if adjust_start above returned true | |
511 | std::swap(this->get_first(), hold_); | |
512 | has_hold_ = true; | |
513 | } | |
514 | ||
515 | token_value_type const& get_value() const | |
516 | { | |
517 | if (!has_value_) { | |
518 | spirit::traits::assign_to(this->get_first(), end_, value_); | |
519 | has_value_ = true; | |
520 | } | |
521 | return value_; | |
522 | } | |
523 | template <typename Value> | |
524 | void set_value(Value const& val) | |
525 | { | |
526 | value_ = val; | |
527 | has_value_ = true; | |
528 | } | |
529 | void set_end(Iterator const& it) | |
530 | { | |
531 | end_ = it; | |
532 | } | |
533 | bool has_value() const { return has_value_; } | |
534 | void reset_value() { has_value_ = false; } | |
535 | ||
536 | protected: | |
537 | semantic_actions_type const& actions_; | |
538 | Iterator hold_; // iterator needed to support lex::more() | |
539 | Iterator end_; // iterator pointing to end of matched token | |
540 | mutable token_value_type value_; // token value to use | |
541 | mutable bool has_value_; // 'true' if value_ is valid | |
542 | bool has_hold_; // 'true' if hold_ is valid | |
543 | ||
544 | private: | |
545 | // silence MSVC warning C4512: assignment operator could not be generated | |
546 | data& operator= (data const&); | |
547 | }; | |
548 | } | |
549 | }}}} | |
550 | ||
551 | #endif | |
552 |