1 // Copyright (c) 2001-2011 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM)
7 #define BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM
13 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
14 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
15 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
16 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
18 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
19 #include <boost/mpl/bool.hpp>
20 #include <boost/algorithm/string/predicate.hpp>
22 namespace boost { namespace spirit { namespace lex { namespace lexertl
26 ///////////////////////////////////////////////////////////////////////
27 template <typename Char, typename F>
28 inline std::size_t get_state_id(Char const* state, F f
29 , std::size_t numstates)
31 for (std::size_t i = 0; i < numstates; ++i)
33 if (boost::algorithm::equals(f(i), state))
36 return boost::lexer::npos;
39 ///////////////////////////////////////////////////////////////////////
40 template <typename Iterator, typename HasActors, typename HasState
41 , typename TokenValue>
42 class static_data; // no default specialization
44 ///////////////////////////////////////////////////////////////////////
45 // doesn't support no state and no actors
46 template <typename Iterator, typename TokenValue>
47 class static_data<Iterator, mpl::false_, mpl::false_, TokenValue>
51 boost::detail::iterator_traits<Iterator>::value_type
55 typedef Iterator base_iterator_type;
56 typedef iterator_range<Iterator> token_value_type;
57 typedef token_value_type get_value_type;
58 typedef std::size_t state_type;
59 typedef char_type const* state_name_type;
60 typedef unused_type semantic_actions_type;
61 typedef detail::wrap_action<unused_type, Iterator, static_data
62 , std::size_t> wrap_action_type;
64 typedef std::size_t (*next_token_functor)(std::size_t&,
65 bool&, Iterator&, Iterator const&, std::size_t&);
66 typedef char_type const* (*get_state_name_type)(std::size_t);
68 // initialize the shared data
69 template <typename IterData>
70 static_data (IterData const& data, Iterator& first
71 , Iterator const& last)
72 : first_(first), last_(last)
73 , next_token_(data.next_)
74 , get_state_name_(data.get_state_name_)
77 // The following functions are used by the implementation of the
78 // placeholder '_state'.
79 template <typename Char>
80 void set_state_name (Char const*)
82 // some (random) versions of gcc instantiate this function even if it's not
83 // needed leading to false static asserts
84 #if !defined(__GNUC__)
85 // If you see a compile time assertion below you're probably
86 // using a token type not supporting lexer states (the 3rd
87 // template parameter of the token is mpl::false_), but your
88 // code uses state changes anyways.
89 BOOST_STATIC_ASSERT(false);
92 char_type const* get_state_name() const
94 return get_state_name_(0);
96 std::size_t get_state_id(char_type const*) const
101 // The function get_eoi() is used by the implementation of the
102 // placeholder '_eoi'.
103 Iterator const& get_eoi() const { return last_; }
105 // The function less() is used by the implementation of the support
106 // function lex::less(). Its functionality is equivalent to flex'
107 // function yyless(): it returns an iterator positioned to the
108 // nth input character beyond the current start iterator (i.e. by
109 // assigning the return value to the placeholder '_end' it is
110 // possible to return all but the first n characters of the current
111 // token back to the input stream.
113 // This function does nothing as long as no semantic actions are
115 Iterator const& less(Iterator const& it, int)
117 // The following assertion fires most likely because you are
118 // using lexer semantic actions without using the actor_lexer
119 // as the base class for your token definition class.
120 BOOST_ASSERT(false &&
121 "Are you using lexer semantic actions without using the "
122 "actor_lexer base?");
126 // The function more() is used by the implementation of the support
127 // function lex::more(). Its functionality is equivalent to flex'
128 // function yymore(): it tells the lexer that the next time it
129 // matches a rule, the corresponding token should be appended onto
130 // the current token value rather than replacing it.
132 // These functions do nothing as long as no semantic actions are
136 // The following assertion fires most likely because you are
137 // using lexer semantic actions without using the actor_lexer
138 // as the base class for your token definition class.
139 BOOST_ASSERT(false &&
140 "Are you using lexer semantic actions without using the "
141 "actor_lexer base?");
143 bool adjust_start() { return false; }
144 void revert_adjust_start() {}
146 // The function lookahead() is used by the implementation of the
147 // support function lex::lookahead. It can be used to implement
148 // lookahead for lexer engines not supporting constructs like flex'
149 // a/b (match a, but only when followed by b):
151 // This function does nothing as long as no semantic actions are
153 bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0))
155 // The following assertion fires most likely because you are
156 // using lexer semantic actions without using the actor_lexer
157 // as the base class for your token definition class.
158 BOOST_ASSERT(false &&
159 "Are you using lexer semantic actions without using the "
160 "actor_lexer base?");
164 // the functions next, invoke_actions, and get_state are used by
165 // the functor implementation below
167 // The function next() tries to match the next token from the
168 // underlying input sequence.
169 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
173 std::size_t state = 0;
174 return next_token_(state, bol_, end, last_, unique_id);
177 // nothing to invoke, so this is empty
178 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
179 , std::size_t, std::size_t, Iterator const&)
181 return pass_flags::pass_normal; // always accept
184 std::size_t get_state() const { return 0; }
185 void set_state(std::size_t) {}
187 void set_end(Iterator const& it) {}
189 Iterator& get_first() { return first_; }
190 Iterator const& get_first() const { return first_; }
191 Iterator const& get_last() const { return last_; }
193 iterator_range<Iterator> get_value() const
195 return iterator_range<Iterator>(first_, last_);
197 bool has_value() const { return false; }
198 void reset_value() {}
200 void reset_bol(bool bol) { bol_ = bol; }
206 next_token_functor next_token_;
207 get_state_name_type get_state_name_;
209 bool bol_; // helper storing whether last character was \n
212 // silence MSVC warning C4512: assignment operator could not be generated
213 static_data& operator= (static_data const&);
216 ///////////////////////////////////////////////////////////////////////
217 // doesn't support lexer semantic actions, but supports state
218 template <typename Iterator, typename TokenValue>
219 class static_data<Iterator, mpl::false_, mpl::true_, TokenValue>
220 : public static_data<Iterator, mpl::false_, mpl::false_, TokenValue>
223 typedef static_data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
224 typedef typename base_type::char_type char_type;
227 typedef Iterator base_iterator_type;
228 typedef iterator_range<Iterator> token_value_type;
229 typedef token_value_type get_value_type;
230 typedef typename base_type::state_type state_type;
231 typedef typename base_type::state_name_type state_name_type;
232 typedef typename base_type::semantic_actions_type
233 semantic_actions_type;
235 // initialize the shared data
236 template <typename IterData>
237 static_data (IterData const& data, Iterator& first
238 , Iterator const& last)
239 : base_type(data, first, last), state_(0)
240 , num_states_(data.num_states_) {}
242 // The following functions are used by the implementation of the
243 // placeholder '_state'.
244 void set_state_name (char_type const* new_state)
246 std::size_t state_id = lexertl::detail::get_state_id(new_state
247 , this->get_state_name_, num_states_);
249 // if the following assertion fires you've probably been using
250 // a lexer state name which was not defined in your token
252 BOOST_ASSERT(state_id != boost::lexer::npos);
254 if (state_id != boost::lexer::npos)
257 char_type const* get_state_name() const
259 return this->get_state_name_(state_);
261 std::size_t get_state_id(char_type const* state) const
263 return lexertl::detail::get_state_id(state
264 , this->get_state_name_, num_states_);
267 // the functions next() and get_state() are used by the functor
268 // implementation below
270 // The function next() tries to match the next token from the
271 // underlying input sequence.
272 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
274 prev_bol = this->bol_;
275 return this->next_token_(state_, this->bol_, end, this->last_
279 std::size_t& get_state() { return state_; }
280 void set_state(std::size_t state) { state_ = state; }
284 std::size_t num_states_;
287 // silence MSVC warning C4512: assignment operator could not be generated
288 static_data& operator= (static_data const&);
291 ///////////////////////////////////////////////////////////////////////
292 // does support actors, but may have no state
293 template <typename Iterator, typename HasState, typename TokenValue>
294 class static_data<Iterator, mpl::true_, HasState, TokenValue>
295 : public static_data<Iterator, mpl::false_, HasState, TokenValue>
298 typedef semantic_actions<Iterator, HasState, static_data>
299 semantic_actions_type;
302 typedef static_data<Iterator, mpl::false_, HasState, TokenValue>
304 typedef typename base_type::char_type char_type;
305 typedef typename semantic_actions_type::functor_wrapper_type
306 functor_wrapper_type;
309 typedef Iterator base_iterator_type;
310 typedef TokenValue token_value_type;
311 typedef TokenValue const& get_value_type;
312 typedef typename base_type::state_type state_type;
313 typedef typename base_type::state_name_type state_name_type;
315 typedef detail::wrap_action<functor_wrapper_type
316 , Iterator, static_data, std::size_t> wrap_action_type;
318 template <typename IterData>
319 static_data (IterData const& data, Iterator& first
320 , Iterator const& last)
321 : base_type(data, first, last)
322 , actions_(data.actions_), hold_()
323 , value_(iterator_range<Iterator>(first, last))
328 // invoke attached semantic actions, if defined
329 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
330 , std::size_t& id, std::size_t unique_id, Iterator& end)
332 return actions_.invoke_actions(state, id, unique_id, end, *this);
335 // The function less() is used by the implementation of the support
336 // function lex::less(). Its functionality is equivalent to flex'
337 // function yyless(): it returns an iterator positioned to the
338 // nth input character beyond the current start iterator (i.e. by
339 // assigning the return value to the placeholder '_end' it is
340 // possible to return all but the first n characters of the current
341 // token back to the input stream).
342 Iterator const& less(Iterator& it, int n)
344 it = this->get_first();
349 // The function more() is used by the implementation of the support
350 // function lex::more(). Its functionality is equivalent to flex'
351 // function yymore(): it tells the lexer that the next time it
352 // matches a rule, the corresponding token should be appended onto
353 // the current token value rather than replacing it.
356 hold_ = this->get_first();
360 // The function lookahead() is used by the implementation of the
361 // support function lex::lookahead. It can be used to implement
362 // lookahead for lexer engines not supporting constructs like flex'
363 // a/b (match a, but only when followed by b)
364 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
367 std::size_t unique_id = boost::lexer::npos;
368 bool bol = this->bol_;
370 if (std::size_t(~0) == state)
371 state = this->state_;
373 return id == this->next_token_(
374 state, bol, end, this->get_eoi(), unique_id);
377 // The adjust_start() and revert_adjust_start() are helper
378 // functions needed to implement the functionality required for
379 // lex::more(). It is called from the functor body below.
385 std::swap(this->get_first(), hold_);
389 void revert_adjust_start()
391 // this will be called only if adjust_start above returned true
392 std::swap(this->get_first(), hold_);
396 TokenValue const& get_value() const
399 value_ = iterator_range<Iterator>(this->get_first(), end_);
404 template <typename Value>
405 void set_value(Value const& val)
410 void set_end(Iterator const& it)
414 bool has_value() const { return has_value_; }
415 void reset_value() { has_value_ = false; }
418 semantic_actions_type const& actions_;
419 Iterator hold_; // iterator needed to support lex::more()
420 Iterator end_; // iterator pointing to end of matched token
421 mutable TokenValue value_; // token value to use
422 mutable bool has_value_; // 'true' if value_ is valid
423 bool has_hold_; // 'true' if hold_ is valid
426 // silence MSVC warning C4512: assignment operator could not be generated
427 static_data& operator= (static_data const&);
430 ///////////////////////////////////////////////////////////////////////
431 // does support lexer semantic actions, may support state, is used for
432 // position_token exposing exactly one type
433 template <typename Iterator, typename HasState, typename TokenValue>
434 class static_data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> >
435 : public static_data<Iterator, mpl::false_, HasState, TokenValue>
438 typedef semantic_actions<Iterator, HasState, static_data>
439 semantic_actions_type;
442 typedef static_data<Iterator, mpl::false_, HasState, TokenValue>
444 typedef typename base_type::char_type char_type;
445 typedef typename semantic_actions_type::functor_wrapper_type
446 functor_wrapper_type;
449 typedef Iterator base_iterator_type;
450 typedef boost::optional<TokenValue> token_value_type;
451 typedef boost::optional<TokenValue> const& get_value_type;
452 typedef typename base_type::state_type state_type;
453 typedef typename base_type::state_name_type state_name_type;
455 typedef detail::wrap_action<functor_wrapper_type
456 , Iterator, static_data, std::size_t> wrap_action_type;
458 template <typename IterData>
459 static_data (IterData const& data_, Iterator& first, Iterator const& last)
460 : base_type(data_, first, last)
461 , actions_(data_.actions_), hold_()
462 , has_value_(false), has_hold_(false)
464 spirit::traits::assign_to(first, last, value_);
468 // invoke attached semantic actions, if defined
469 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
470 , std::size_t& id, std::size_t unique_id, Iterator& end)
472 return actions_.invoke_actions(state, id, unique_id, end, *this);
475 // The function less() is used by the implementation of the support
476 // function lex::less(). Its functionality is equivalent to flex'
477 // function yyless(): it returns an iterator positioned to the
478 // nth input character beyond the current start iterator (i.e. by
479 // assigning the return value to the placeholder '_end' it is
480 // possible to return all but the first n characters of the current
481 // token back to the input stream).
482 Iterator const& less(Iterator& it, int n)
484 it = this->get_first();
489 // The function more() is used by the implementation of the support
490 // function lex::more(). Its functionality is equivalent to flex'
491 // function yymore(): it tells the lexer that the next time it
492 // matches a rule, the corresponding token should be appended onto
493 // the current token value rather than replacing it.
496 hold_ = this->get_first();
500 // The function lookahead() is used by the implementation of the
501 // support function lex::lookahead. It can be used to implement
502 // lookahead for lexer engines not supporting constructs like flex'
503 // a/b (match a, but only when followed by b)
504 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
507 std::size_t unique_id = boost::lexer::npos;
508 bool bol = this->bol_;
510 if (std::size_t(~0) == state)
511 state = this->state_;
513 return id == this->next_token_(
514 state, bol, end, this->get_eoi(), unique_id);
517 // The adjust_start() and revert_adjust_start() are helper
518 // functions needed to implement the functionality required for
519 // lex::more(). It is called from the functor body below.
525 std::swap(this->get_first(), hold_);
529 void revert_adjust_start()
531 // this will be called only if adjust_start above returned true
532 std::swap(this->get_first(), hold_);
536 TokenValue const& get_value() const
539 spirit::traits::assign_to(this->get_first(), end_, value_);
544 template <typename Value>
545 void set_value(Value const& val)
550 void set_end(Iterator const& it)
554 bool has_value() const { return has_value_; }
555 void reset_value() { has_value_ = false; }
558 semantic_actions_type const& actions_;
559 Iterator hold_; // iterator needed to support lex::more()
560 Iterator end_; // iterator pointing to end of matched token
561 mutable token_value_type value_; // token value to use
562 mutable bool has_value_; // 'true' if value_ is valid
563 bool has_hold_; // 'true' if hold_ is valid
566 // silence MSVC warning C4512: assignment operator could not be generated
567 static_data& operator= (static_data const&);