1 // Copyright (c) 2001-2011 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM)
7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM
13 #include <boost/spirit/home/qi/detail/assign_to.hpp>
14 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
15 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
16 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
18 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
19 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
20 #include <boost/mpl/bool.hpp>
21 #include <boost/optional.hpp>
23 namespace boost { namespace spirit { namespace lex { namespace lexertl
27 ///////////////////////////////////////////////////////////////////////
28 template <typename Iterator, typename HasActors, typename HasState
29 , typename TokenValue>
30 class data; // no default specialization
32 ///////////////////////////////////////////////////////////////////////
33 // neither supports state, nor actors
34 template <typename Iterator, typename TokenValue>
35 class data<Iterator, mpl::false_, mpl::false_, TokenValue>
39 boost::detail::iterator_traits<Iterator>::value_type
43 typedef Iterator base_iterator_type;
44 typedef iterator_range<Iterator> token_value_type;
45 typedef token_value_type get_value_type;
46 typedef std::size_t state_type;
47 typedef char_type const* state_name_type;
48 typedef unused_type semantic_actions_type;
49 typedef detail::wrap_action<unused_type, Iterator, data, std::size_t>
52 typedef unused_type next_token_functor;
53 typedef unused_type get_state_name_type;
55 // initialize the shared data
56 template <typename IterData>
57 data (IterData const& data_, Iterator& first, Iterator const& last)
58 : first_(first), last_(last)
59 , state_machine_(data_.state_machine_)
60 , rules_(data_.rules_)
61 , bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
63 // The following functions are used by the implementation of the
64 // placeholder '_state'.
65 template <typename Char>
66 void set_state_name (Char const*)
68 // some (random) versions of gcc instantiate this function even if it's not
69 // needed leading to false static asserts
70 #if !defined(__GNUC__)
71 // If you see a compile time assertion below you're probably
72 // using a token type not supporting lexer states (the 3rd
73 // template parameter of the token is mpl::false_), but your
74 // code uses state changes anyways.
75 BOOST_STATIC_ASSERT(false);
78 char_type const* get_state_name() const { return rules_.initial(); }
79 std::size_t get_state_id (char_type const*) const
84 // The function get_eoi() is used by the implementation of the
85 // placeholder '_eoi'.
86 Iterator const& get_eoi() const { return last_; }
88 // The function less() is used by the implementation of the support
89 // function lex::less(). Its functionality is equivalent to flex'
90 // function yyless(): it returns an iterator positioned to the
91 // nth input character beyond the current start iterator (i.e. by
92 // assigning the return value to the placeholder '_end' it is
93 // possible to return all but the first n characters of the current
94 // token back to the input stream.
96 // This function does nothing as long as no semantic actions are
98 Iterator const& less(Iterator const& it, int)
100 // The following assertion fires most likely because you are
101 // using lexer semantic actions without using the actor_lexer
102 // as the base class for your token definition class.
103 BOOST_ASSERT(false &&
104 "Are you using lexer semantic actions without using the "
105 "actor_lexer base?");
109 // The function more() is used by the implementation of the support
110 // function lex::more(). Its functionality is equivalent to flex'
111 // function yymore(): it tells the lexer that the next time it
112 // matches a rule, the corresponding token should be appended onto
113 // the current token value rather than replacing it.
115 // These functions do nothing as long as no semantic actions are
119 // The following assertion fires most likely because you are
120 // using lexer semantic actions without using the actor_lexer
121 // as the base class for your token definition class.
122 BOOST_ASSERT(false &&
123 "Are you using lexer semantic actions without using the "
124 "actor_lexer base?");
126 bool adjust_start() { return false; }
127 void revert_adjust_start() {}
129 // The function lookahead() is used by the implementation of the
130 // support function lex::lookahead. It can be used to implement
131 // lookahead for lexer engines not supporting constructs like flex'
132 // a/b (match a, but only when followed by b):
134 // This function does nothing as long as no semantic actions are
136 bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0))
138 // The following assertion fires most likely because you are
139 // using lexer semantic actions without using the actor_lexer
140 // as the base class for your token definition class.
141 BOOST_ASSERT(false &&
142 "Are you using lexer semantic actions without using the "
143 "actor_lexer base?");
147 // the functions next, invoke_actions, and get_state are used by
148 // the functor implementation below
150 // The function next() tries to match the next token from the
151 // underlying input sequence.
152 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
156 typedef basic_iterator_tokeniser<Iterator> tokenizer;
157 return tokenizer::next(state_machine_, bol_, end, last_
161 // nothing to invoke, so this is empty
162 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
163 , std::size_t, std::size_t, Iterator const&)
165 return pass_flags::pass_normal; // always accept
168 std::size_t get_state() const { return 0; }
169 void set_state(std::size_t) {}
171 void set_end(Iterator const& /*it*/) {}
173 Iterator& get_first() { return first_; }
174 Iterator const& get_first() const { return first_; }
175 Iterator const& get_last() const { return last_; }
177 iterator_range<Iterator> get_value() const
179 return iterator_range<Iterator>(first_, last_);
181 bool has_value() const { return false; }
182 void reset_value() {}
184 void reset_bol(bool bol) { bol_ = bol; }
190 boost::lexer::basic_state_machine<char_type> const& state_machine_;
191 boost::lexer::basic_rules<char_type> const& rules_;
193 bool bol_; // helper storing whether last character was \n
196 // silence MSVC warning C4512: assignment operator could not be generated
197 data& operator= (data const&);
200 ///////////////////////////////////////////////////////////////////////
201 // doesn't support lexer semantic actions, but supports state
202 template <typename Iterator, typename TokenValue>
203 class data<Iterator, mpl::false_, mpl::true_, TokenValue>
204 : public data<Iterator, mpl::false_, mpl::false_, TokenValue>
207 typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
208 typedef typename base_type::char_type char_type;
211 typedef Iterator base_iterator_type;
212 typedef iterator_range<Iterator> token_value_type;
213 typedef token_value_type get_value_type;
214 typedef typename base_type::state_type state_type;
215 typedef typename base_type::state_name_type state_name_type;
216 typedef typename base_type::semantic_actions_type
217 semantic_actions_type;
219 // initialize the shared data
220 template <typename IterData>
221 data (IterData const& data_, Iterator& first, Iterator const& last)
222 : base_type(data_, first, last)
225 // The following functions are used by the implementation of the
226 // placeholder '_state'.
227 void set_state_name (char_type const* new_state)
229 std::size_t state_id = this->rules_.state(new_state);
231 // If the following assertion fires you've probably been using
232 // a lexer state name which was not defined in your token
234 BOOST_ASSERT(state_id != boost::lexer::npos);
236 if (state_id != boost::lexer::npos)
239 char_type const* get_state_name() const
241 return this->rules_.state(state_);
243 std::size_t get_state_id (char_type const* state) const
245 return this->rules_.state(state);
248 // the functions next() and get_state() are used by the functor
249 // implementation below
251 // The function next() tries to match the next token from the
252 // underlying input sequence.
253 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
255 prev_bol = this->bol_;
257 typedef basic_iterator_tokeniser<Iterator> tokenizer;
258 return tokenizer::next(this->state_machine_, state_,
259 this->bol_, end, this->get_eoi(), unique_id);
262 std::size_t& get_state() { return state_; }
263 void set_state(std::size_t state) { state_ = state; }
269 // silence MSVC warning C4512: assignment operator could not be generated
270 data& operator= (data const&);
273 ///////////////////////////////////////////////////////////////////////
274 // does support lexer semantic actions, may support state
275 template <typename Iterator, typename HasState, typename TokenValue>
276 class data<Iterator, mpl::true_, HasState, TokenValue>
277 : public data<Iterator, mpl::false_, HasState, TokenValue>
280 typedef semantic_actions<Iterator, HasState, data>
281 semantic_actions_type;
284 typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
285 typedef typename base_type::char_type char_type;
286 typedef typename semantic_actions_type::functor_wrapper_type
287 functor_wrapper_type;
290 typedef Iterator base_iterator_type;
291 typedef TokenValue token_value_type;
292 typedef TokenValue const& get_value_type;
293 typedef typename base_type::state_type state_type;
294 typedef typename base_type::state_name_type state_name_type;
296 typedef detail::wrap_action<functor_wrapper_type
297 , Iterator, data, std::size_t> wrap_action_type;
299 template <typename IterData>
300 data (IterData const& data_, Iterator& first, Iterator const& last)
301 : base_type(data_, first, last)
302 , actions_(data_.actions_), hold_()
303 , value_(iterator_range<Iterator>(last, last))
304 , has_value_(false), has_hold_(false) {}
306 // invoke attached semantic actions, if defined
307 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
308 , std::size_t& id, std::size_t unique_id, Iterator& end)
310 return actions_.invoke_actions(state, id, unique_id, end, *this);
313 // The function less() is used by the implementation of the support
314 // function lex::less(). Its functionality is equivalent to flex'
315 // function yyless(): it returns an iterator positioned to the
316 // nth input character beyond the current start iterator (i.e. by
317 // assigning the return value to the placeholder '_end' it is
318 // possible to return all but the first n characters of the current
319 // token back to the input stream).
320 Iterator const& less(Iterator& it, int n)
322 it = this->get_first();
327 // The function more() is used by the implementation of the support
328 // function lex::more(). Its functionality is equivalent to flex'
329 // function yymore(): it tells the lexer that the next time it
330 // matches a rule, the corresponding token should be appended onto
331 // the current token value rather than replacing it.
334 hold_ = this->get_first();
338 // The function lookahead() is used by the implementation of the
339 // support function lex::lookahead. It can be used to implement
340 // lookahead for lexer engines not supporting constructs like flex'
341 // a/b (match a, but only when followed by b)
342 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
345 std::size_t unique_id = boost::lexer::npos;
346 bool bol = this->bol_;
348 if (std::size_t(~0) == state)
349 state = this->state_;
351 typedef basic_iterator_tokeniser<Iterator> tokenizer;
352 return id == tokenizer::next(this->state_machine_, state,
353 bol, end, this->get_eoi(), unique_id);
356 // The adjust_start() and revert_adjust_start() are helper
357 // functions needed to implement the functionality required for
358 // lex::more(). It is called from the functor body below.
364 std::swap(this->get_first(), hold_);
368 void revert_adjust_start()
370 // this will be called only if adjust_start above returned true
371 std::swap(this->get_first(), hold_);
375 TokenValue const& get_value() const
378 value_ = iterator_range<Iterator>(this->get_first(), end_);
383 template <typename Value>
384 void set_value(Value const& val)
389 void set_end(Iterator const& it)
393 bool has_value() const { return has_value_; }
394 void reset_value() { has_value_ = false; }
397 semantic_actions_type const& actions_;
398 Iterator hold_; // iterator needed to support lex::more()
399 Iterator end_; // iterator pointing to end of matched token
400 mutable TokenValue value_; // token value to use
401 mutable bool has_value_; // 'true' if value_ is valid
402 bool has_hold_; // 'true' if hold_ is valid
405 // silence MSVC warning C4512: assignment operator could not be generated
406 data& operator= (data const&);
409 ///////////////////////////////////////////////////////////////////////
410 // does support lexer semantic actions, may support state, is used for
411 // position_token exposing exactly one type
412 template <typename Iterator, typename HasState, typename TokenValue>
413 class data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> >
414 : public data<Iterator, mpl::false_, HasState, TokenValue>
417 typedef semantic_actions<Iterator, HasState, data>
418 semantic_actions_type;
421 typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
422 typedef typename base_type::char_type char_type;
423 typedef typename semantic_actions_type::functor_wrapper_type
424 functor_wrapper_type;
427 typedef Iterator base_iterator_type;
428 typedef boost::optional<TokenValue> token_value_type;
429 typedef boost::optional<TokenValue> const& get_value_type;
430 typedef typename base_type::state_type state_type;
431 typedef typename base_type::state_name_type state_name_type;
433 typedef detail::wrap_action<functor_wrapper_type
434 , Iterator, data, std::size_t> wrap_action_type;
436 template <typename IterData>
437 data (IterData const& data_, Iterator& first, Iterator const& last)
438 : base_type(data_, first, last)
439 , actions_(data_.actions_), hold_()
440 , has_value_(false), has_hold_(false)
442 spirit::traits::assign_to(first, last, value_);
446 // invoke attached semantic actions, if defined
447 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
448 , std::size_t& id, std::size_t unique_id, Iterator& end)
450 return actions_.invoke_actions(state, id, unique_id, end, *this);
453 // The function less() is used by the implementation of the support
454 // function lex::less(). Its functionality is equivalent to flex'
455 // function yyless(): it returns an iterator positioned to the
456 // nth input character beyond the current start iterator (i.e. by
457 // assigning the return value to the placeholder '_end' it is
458 // possible to return all but the first n characters of the current
459 // token back to the input stream).
460 Iterator const& less(Iterator& it, int n)
462 it = this->get_first();
467 // The function more() is used by the implementation of the support
468 // function lex::more(). Its functionality is equivalent to flex'
469 // function yymore(): it tells the lexer that the next time it
470 // matches a rule, the corresponding token should be appended onto
471 // the current token value rather than replacing it.
474 hold_ = this->get_first();
478 // The function lookahead() is used by the implementation of the
479 // support function lex::lookahead. It can be used to implement
480 // lookahead for lexer engines not supporting constructs like flex'
481 // a/b (match a, but only when followed by b)
482 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
485 std::size_t unique_id = boost::lexer::npos;
486 bool bol = this->bol_;
488 if (std::size_t(~0) == state)
489 state = this->state_;
491 typedef basic_iterator_tokeniser<Iterator> tokenizer;
492 return id == tokenizer::next(this->state_machine_, state,
493 bol, end, this->get_eoi(), unique_id);
496 // The adjust_start() and revert_adjust_start() are helper
497 // functions needed to implement the functionality required for
498 // lex::more(). It is called from the functor body below.
504 std::swap(this->get_first(), hold_);
508 void revert_adjust_start()
510 // this will be called only if adjust_start above returned true
511 std::swap(this->get_first(), hold_);
515 token_value_type const& get_value() const
518 spirit::traits::assign_to(this->get_first(), end_, value_);
523 template <typename Value>
524 void set_value(Value const& val)
529 void set_end(Iterator const& it)
533 bool has_value() const { return has_value_; }
534 void reset_value() { has_value_ = false; }
537 semantic_actions_type const& actions_;
538 Iterator hold_; // iterator needed to support lex::more()
539 Iterator end_; // iterator pointing to end of matched token
540 mutable token_value_type value_; // token value to use
541 mutable bool has_value_; // 'true' if value_ is valid
542 bool has_hold_; // 'true' if hold_ is valid
545 // silence MSVC warning C4512: assignment operator could not be generated
546 data& operator= (data const&);