]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM) | |
7 | #define BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/spirit/home/support/detail/lexer/generator.hpp> | |
14 | #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
16 | #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp> | |
17 | #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp> | |
18 | #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp> | |
20effc67 | 19 | #include <boost/spirit/home/support/assert_msg.hpp> |
7c673cae | 20 | #include <boost/mpl/bool.hpp> |
92f5a8d4 | 21 | #include <iterator> // for std::iterator_traits |
7c673cae FG |
22 | |
23 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
24 | { | |
25 | namespace detail | |
26 | { | |
27 | /////////////////////////////////////////////////////////////////////// | |
f67539c2 TL |
28 | template <typename Char> |
29 | inline bool zstr_compare(Char const* s1, Char const* s2) | |
30 | { | |
31 | for (; *s1 || *s2; ++s1, ++s2) | |
32 | if (*s1 != *s2) | |
33 | return false; | |
34 | return true; | |
35 | } | |
36 | ||
7c673cae FG |
37 | template <typename Char, typename F> |
38 | inline std::size_t get_state_id(Char const* state, F f | |
39 | , std::size_t numstates) | |
40 | { | |
41 | for (std::size_t i = 0; i < numstates; ++i) | |
42 | { | |
f67539c2 | 43 | if (zstr_compare(f(i), state)) |
7c673cae FG |
44 | return i; |
45 | } | |
46 | return boost::lexer::npos; | |
47 | } | |
48 | ||
49 | /////////////////////////////////////////////////////////////////////// | |
50 | template <typename Iterator, typename HasActors, typename HasState | |
51 | , typename TokenValue> | |
52 | class static_data; // no default specialization | |
53 | ||
54 | /////////////////////////////////////////////////////////////////////// | |
55 | // doesn't support no state and no actors | |
56 | template <typename Iterator, typename TokenValue> | |
57 | class static_data<Iterator, mpl::false_, mpl::false_, TokenValue> | |
58 | { | |
59 | protected: | |
60 | typedef typename | |
92f5a8d4 | 61 | std::iterator_traits<Iterator>::value_type |
7c673cae FG |
62 | char_type; |
63 | ||
64 | public: | |
65 | typedef Iterator base_iterator_type; | |
66 | typedef iterator_range<Iterator> token_value_type; | |
67 | typedef token_value_type get_value_type; | |
68 | typedef std::size_t state_type; | |
69 | typedef char_type const* state_name_type; | |
70 | typedef unused_type semantic_actions_type; | |
71 | typedef detail::wrap_action<unused_type, Iterator, static_data | |
72 | , std::size_t> wrap_action_type; | |
73 | ||
74 | typedef std::size_t (*next_token_functor)(std::size_t&, | |
75 | bool&, Iterator&, Iterator const&, std::size_t&); | |
76 | typedef char_type const* (*get_state_name_type)(std::size_t); | |
77 | ||
78 | // initialize the shared data | |
79 | template <typename IterData> | |
80 | static_data (IterData const& data, Iterator& first | |
81 | , Iterator const& last) | |
82 | : first_(first), last_(last) | |
83 | , next_token_(data.next_) | |
84 | , get_state_name_(data.get_state_name_) | |
85 | , bol_(data.bol_) {} | |
86 | ||
87 | // The following functions are used by the implementation of the | |
88 | // placeholder '_state'. | |
89 | template <typename Char> | |
90 | void set_state_name (Char const*) | |
91 | { | |
7c673cae FG |
92 | // If you see a compile time assertion below you're probably |
93 | // using a token type not supporting lexer states (the 3rd | |
94 | // template parameter of the token is mpl::false_), but your | |
95 | // code uses state changes anyways. | |
20effc67 TL |
96 | BOOST_SPIRIT_ASSERT_FAIL(Char, |
97 | tried_to_set_state_of_stateless_token, ()); | |
7c673cae FG |
98 | } |
99 | char_type const* get_state_name() const | |
100 | { | |
101 | return get_state_name_(0); | |
102 | } | |
103 | std::size_t get_state_id(char_type const*) const | |
104 | { | |
105 | return 0; | |
106 | } | |
107 | ||
108 | // The function get_eoi() is used by the implementation of the | |
109 | // placeholder '_eoi'. | |
110 | Iterator const& get_eoi() const { return last_; } | |
111 | ||
112 | // The function less() is used by the implementation of the support | |
113 | // function lex::less(). Its functionality is equivalent to flex' | |
114 | // function yyless(): it returns an iterator positioned to the | |
115 | // nth input character beyond the current start iterator (i.e. by | |
116 | // assigning the return value to the placeholder '_end' it is | |
117 | // possible to return all but the first n characters of the current | |
118 | // token back to the input stream. | |
119 | // | |
120 | // This function does nothing as long as no semantic actions are | |
121 | // used. | |
122 | Iterator const& less(Iterator const& it, int) | |
123 | { | |
124 | // The following assertion fires most likely because you are | |
125 | // using lexer semantic actions without using the actor_lexer | |
126 | // as the base class for your token definition class. | |
127 | BOOST_ASSERT(false && | |
128 | "Are you using lexer semantic actions without using the " | |
129 | "actor_lexer base?"); | |
130 | return it; | |
131 | } | |
132 | ||
133 | // The function more() is used by the implementation of the support | |
134 | // function lex::more(). Its functionality is equivalent to flex' | |
135 | // function yymore(): it tells the lexer that the next time it | |
136 | // matches a rule, the corresponding token should be appended onto | |
137 | // the current token value rather than replacing it. | |
138 | // | |
139 | // These functions do nothing as long as no semantic actions are | |
140 | // used. | |
141 | void more() | |
142 | { | |
143 | // The following assertion fires most likely because you are | |
144 | // using lexer semantic actions without using the actor_lexer | |
145 | // as the base class for your token definition class. | |
146 | BOOST_ASSERT(false && | |
147 | "Are you using lexer semantic actions without using the " | |
148 | "actor_lexer base?"); | |
149 | } | |
150 | bool adjust_start() { return false; } | |
151 | void revert_adjust_start() {} | |
152 | ||
153 | // The function lookahead() is used by the implementation of the | |
154 | // support function lex::lookahead. It can be used to implement | |
155 | // lookahead for lexer engines not supporting constructs like flex' | |
156 | // a/b (match a, but only when followed by b): | |
157 | // | |
158 | // This function does nothing as long as no semantic actions are | |
159 | // used. | |
160 | bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) | |
161 | { | |
162 | // The following assertion fires most likely because you are | |
163 | // using lexer semantic actions without using the actor_lexer | |
164 | // as the base class for your token definition class. | |
165 | BOOST_ASSERT(false && | |
166 | "Are you using lexer semantic actions without using the " | |
167 | "actor_lexer base?"); | |
168 | return false; | |
169 | } | |
170 | ||
171 | // the functions next, invoke_actions, and get_state are used by | |
172 | // the functor implementation below | |
173 | ||
174 | // The function next() tries to match the next token from the | |
175 | // underlying input sequence. | |
176 | std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) | |
177 | { | |
178 | prev_bol = bol_; | |
179 | ||
180 | std::size_t state = 0; | |
181 | return next_token_(state, bol_, end, last_, unique_id); | |
182 | } | |
183 | ||
184 | // nothing to invoke, so this is empty | |
185 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t | |
186 | , std::size_t, std::size_t, Iterator const&) | |
187 | { | |
188 | return pass_flags::pass_normal; // always accept | |
189 | } | |
190 | ||
191 | std::size_t get_state() const { return 0; } | |
192 | void set_state(std::size_t) {} | |
193 | ||
20effc67 | 194 | void set_end(Iterator const&) {} |
7c673cae FG |
195 | |
196 | Iterator& get_first() { return first_; } | |
197 | Iterator const& get_first() const { return first_; } | |
198 | Iterator const& get_last() const { return last_; } | |
199 | ||
200 | iterator_range<Iterator> get_value() const | |
201 | { | |
202 | return iterator_range<Iterator>(first_, last_); | |
203 | } | |
204 | bool has_value() const { return false; } | |
205 | void reset_value() {} | |
206 | ||
207 | void reset_bol(bool bol) { bol_ = bol; } | |
208 | ||
209 | protected: | |
210 | Iterator& first_; | |
211 | Iterator last_; | |
212 | ||
213 | next_token_functor next_token_; | |
214 | get_state_name_type get_state_name_; | |
215 | ||
216 | bool bol_; // helper storing whether last character was \n | |
217 | ||
7c673cae | 218 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 219 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
220 | }; |
221 | ||
222 | /////////////////////////////////////////////////////////////////////// | |
223 | // doesn't support lexer semantic actions, but supports state | |
224 | template <typename Iterator, typename TokenValue> | |
225 | class static_data<Iterator, mpl::false_, mpl::true_, TokenValue> | |
226 | : public static_data<Iterator, mpl::false_, mpl::false_, TokenValue> | |
227 | { | |
228 | protected: | |
229 | typedef static_data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type; | |
230 | typedef typename base_type::char_type char_type; | |
231 | ||
232 | public: | |
233 | typedef Iterator base_iterator_type; | |
234 | typedef iterator_range<Iterator> token_value_type; | |
235 | typedef token_value_type get_value_type; | |
236 | typedef typename base_type::state_type state_type; | |
237 | typedef typename base_type::state_name_type state_name_type; | |
238 | typedef typename base_type::semantic_actions_type | |
239 | semantic_actions_type; | |
240 | ||
241 | // initialize the shared data | |
242 | template <typename IterData> | |
243 | static_data (IterData const& data, Iterator& first | |
244 | , Iterator const& last) | |
245 | : base_type(data, first, last), state_(0) | |
246 | , num_states_(data.num_states_) {} | |
247 | ||
248 | // The following functions are used by the implementation of the | |
249 | // placeholder '_state'. | |
250 | void set_state_name (char_type const* new_state) | |
251 | { | |
252 | std::size_t state_id = lexertl::detail::get_state_id(new_state | |
253 | , this->get_state_name_, num_states_); | |
254 | ||
255 | // if the following assertion fires you've probably been using | |
256 | // a lexer state name which was not defined in your token | |
257 | // definition | |
258 | BOOST_ASSERT(state_id != boost::lexer::npos); | |
259 | ||
260 | if (state_id != boost::lexer::npos) | |
261 | state_ = state_id; | |
262 | } | |
263 | char_type const* get_state_name() const | |
264 | { | |
265 | return this->get_state_name_(state_); | |
266 | } | |
267 | std::size_t get_state_id(char_type const* state) const | |
268 | { | |
269 | return lexertl::detail::get_state_id(state | |
270 | , this->get_state_name_, num_states_); | |
271 | } | |
272 | ||
273 | // the functions next() and get_state() are used by the functor | |
274 | // implementation below | |
275 | ||
276 | // The function next() tries to match the next token from the | |
277 | // underlying input sequence. | |
278 | std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) | |
279 | { | |
280 | prev_bol = this->bol_; | |
281 | return this->next_token_(state_, this->bol_, end, this->last_ | |
282 | , unique_id); | |
283 | } | |
284 | ||
285 | std::size_t& get_state() { return state_; } | |
286 | void set_state(std::size_t state) { state_ = state; } | |
287 | ||
288 | protected: | |
289 | std::size_t state_; | |
290 | std::size_t num_states_; | |
291 | ||
7c673cae | 292 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 293 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
294 | }; |
295 | ||
296 | /////////////////////////////////////////////////////////////////////// | |
297 | // does support actors, but may have no state | |
298 | template <typename Iterator, typename HasState, typename TokenValue> | |
299 | class static_data<Iterator, mpl::true_, HasState, TokenValue> | |
300 | : public static_data<Iterator, mpl::false_, HasState, TokenValue> | |
301 | { | |
302 | public: | |
303 | typedef semantic_actions<Iterator, HasState, static_data> | |
304 | semantic_actions_type; | |
305 | ||
306 | protected: | |
307 | typedef static_data<Iterator, mpl::false_, HasState, TokenValue> | |
308 | base_type; | |
309 | typedef typename base_type::char_type char_type; | |
310 | typedef typename semantic_actions_type::functor_wrapper_type | |
311 | functor_wrapper_type; | |
312 | ||
313 | public: | |
314 | typedef Iterator base_iterator_type; | |
315 | typedef TokenValue token_value_type; | |
316 | typedef TokenValue const& get_value_type; | |
317 | typedef typename base_type::state_type state_type; | |
318 | typedef typename base_type::state_name_type state_name_type; | |
319 | ||
320 | typedef detail::wrap_action<functor_wrapper_type | |
321 | , Iterator, static_data, std::size_t> wrap_action_type; | |
322 | ||
323 | template <typename IterData> | |
324 | static_data (IterData const& data, Iterator& first | |
325 | , Iterator const& last) | |
326 | : base_type(data, first, last) | |
327 | , actions_(data.actions_), hold_() | |
328 | , value_(iterator_range<Iterator>(first, last)) | |
329 | , has_value_(false) | |
330 | , has_hold_(false) | |
331 | {} | |
332 | ||
333 | // invoke attached semantic actions, if defined | |
334 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state | |
335 | , std::size_t& id, std::size_t unique_id, Iterator& end) | |
336 | { | |
337 | return actions_.invoke_actions(state, id, unique_id, end, *this); | |
338 | } | |
339 | ||
340 | // The function less() is used by the implementation of the support | |
341 | // function lex::less(). Its functionality is equivalent to flex' | |
342 | // function yyless(): it returns an iterator positioned to the | |
343 | // nth input character beyond the current start iterator (i.e. by | |
344 | // assigning the return value to the placeholder '_end' it is | |
345 | // possible to return all but the first n characters of the current | |
346 | // token back to the input stream). | |
347 | Iterator const& less(Iterator& it, int n) | |
348 | { | |
349 | it = this->get_first(); | |
350 | std::advance(it, n); | |
351 | return it; | |
352 | } | |
353 | ||
354 | // The function more() is used by the implementation of the support | |
355 | // function lex::more(). Its functionality is equivalent to flex' | |
356 | // function yymore(): it tells the lexer that the next time it | |
357 | // matches a rule, the corresponding token should be appended onto | |
358 | // the current token value rather than replacing it. | |
359 | void more() | |
360 | { | |
361 | hold_ = this->get_first(); | |
362 | has_hold_ = true; | |
363 | } | |
364 | ||
365 | // The function lookahead() is used by the implementation of the | |
366 | // support function lex::lookahead. It can be used to implement | |
367 | // lookahead for lexer engines not supporting constructs like flex' | |
368 | // a/b (match a, but only when followed by b) | |
369 | bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) | |
370 | { | |
371 | Iterator end = end_; | |
372 | std::size_t unique_id = boost::lexer::npos; | |
373 | bool bol = this->bol_; | |
374 | ||
375 | if (std::size_t(~0) == state) | |
376 | state = this->state_; | |
377 | ||
378 | return id == this->next_token_( | |
379 | state, bol, end, this->get_eoi(), unique_id); | |
380 | } | |
381 | ||
382 | // The adjust_start() and revert_adjust_start() are helper | |
383 | // functions needed to implement the functionality required for | |
384 | // lex::more(). It is called from the functor body below. | |
385 | bool adjust_start() | |
386 | { | |
387 | if (!has_hold_) | |
388 | return false; | |
389 | ||
390 | std::swap(this->get_first(), hold_); | |
391 | has_hold_ = false; | |
392 | return true; | |
393 | } | |
394 | void revert_adjust_start() | |
395 | { | |
396 | // this will be called only if adjust_start above returned true | |
397 | std::swap(this->get_first(), hold_); | |
398 | has_hold_ = true; | |
399 | } | |
400 | ||
401 | TokenValue const& get_value() const | |
402 | { | |
403 | if (!has_value_) { | |
404 | value_ = iterator_range<Iterator>(this->get_first(), end_); | |
405 | has_value_ = true; | |
406 | } | |
407 | return value_; | |
408 | } | |
409 | template <typename Value> | |
410 | void set_value(Value const& val) | |
411 | { | |
412 | value_ = val; | |
413 | has_value_ = true; | |
414 | } | |
415 | void set_end(Iterator const& it) | |
416 | { | |
417 | end_ = it; | |
418 | } | |
419 | bool has_value() const { return has_value_; } | |
420 | void reset_value() { has_value_ = false; } | |
421 | ||
422 | protected: | |
423 | semantic_actions_type const& actions_; | |
424 | Iterator hold_; // iterator needed to support lex::more() | |
425 | Iterator end_; // iterator pointing to end of matched token | |
426 | mutable TokenValue value_; // token value to use | |
427 | mutable bool has_value_; // 'true' if value_ is valid | |
428 | bool has_hold_; // 'true' if hold_ is valid | |
429 | ||
7c673cae | 430 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 431 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
432 | }; |
433 | ||
434 | /////////////////////////////////////////////////////////////////////// | |
435 | // does support lexer semantic actions, may support state, is used for | |
436 | // position_token exposing exactly one type | |
437 | template <typename Iterator, typename HasState, typename TokenValue> | |
438 | class static_data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> > | |
439 | : public static_data<Iterator, mpl::false_, HasState, TokenValue> | |
440 | { | |
441 | public: | |
442 | typedef semantic_actions<Iterator, HasState, static_data> | |
443 | semantic_actions_type; | |
444 | ||
445 | protected: | |
446 | typedef static_data<Iterator, mpl::false_, HasState, TokenValue> | |
447 | base_type; | |
448 | typedef typename base_type::char_type char_type; | |
449 | typedef typename semantic_actions_type::functor_wrapper_type | |
450 | functor_wrapper_type; | |
451 | ||
452 | public: | |
453 | typedef Iterator base_iterator_type; | |
454 | typedef boost::optional<TokenValue> token_value_type; | |
455 | typedef boost::optional<TokenValue> const& get_value_type; | |
456 | typedef typename base_type::state_type state_type; | |
457 | typedef typename base_type::state_name_type state_name_type; | |
458 | ||
459 | typedef detail::wrap_action<functor_wrapper_type | |
460 | , Iterator, static_data, std::size_t> wrap_action_type; | |
461 | ||
462 | template <typename IterData> | |
463 | static_data (IterData const& data_, Iterator& first, Iterator const& last) | |
464 | : base_type(data_, first, last) | |
465 | , actions_(data_.actions_), hold_() | |
466 | , has_value_(false), has_hold_(false) | |
467 | { | |
468 | spirit::traits::assign_to(first, last, value_); | |
469 | has_value_ = true; | |
470 | } | |
471 | ||
472 | // invoke attached semantic actions, if defined | |
473 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state | |
474 | , std::size_t& id, std::size_t unique_id, Iterator& end) | |
475 | { | |
476 | return actions_.invoke_actions(state, id, unique_id, end, *this); | |
477 | } | |
478 | ||
479 | // The function less() is used by the implementation of the support | |
480 | // function lex::less(). Its functionality is equivalent to flex' | |
481 | // function yyless(): it returns an iterator positioned to the | |
482 | // nth input character beyond the current start iterator (i.e. by | |
483 | // assigning the return value to the placeholder '_end' it is | |
484 | // possible to return all but the first n characters of the current | |
485 | // token back to the input stream). | |
486 | Iterator const& less(Iterator& it, int n) | |
487 | { | |
488 | it = this->get_first(); | |
489 | std::advance(it, n); | |
490 | return it; | |
491 | } | |
492 | ||
493 | // The function more() is used by the implementation of the support | |
494 | // function lex::more(). Its functionality is equivalent to flex' | |
495 | // function yymore(): it tells the lexer that the next time it | |
496 | // matches a rule, the corresponding token should be appended onto | |
497 | // the current token value rather than replacing it. | |
498 | void more() | |
499 | { | |
500 | hold_ = this->get_first(); | |
501 | has_hold_ = true; | |
502 | } | |
503 | ||
504 | // The function lookahead() is used by the implementation of the | |
505 | // support function lex::lookahead. It can be used to implement | |
506 | // lookahead for lexer engines not supporting constructs like flex' | |
507 | // a/b (match a, but only when followed by b) | |
508 | bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) | |
509 | { | |
510 | Iterator end = end_; | |
511 | std::size_t unique_id = boost::lexer::npos; | |
512 | bool bol = this->bol_; | |
513 | ||
514 | if (std::size_t(~0) == state) | |
515 | state = this->state_; | |
516 | ||
517 | return id == this->next_token_( | |
518 | state, bol, end, this->get_eoi(), unique_id); | |
519 | } | |
520 | ||
521 | // The adjust_start() and revert_adjust_start() are helper | |
522 | // functions needed to implement the functionality required for | |
523 | // lex::more(). It is called from the functor body below. | |
524 | bool adjust_start() | |
525 | { | |
526 | if (!has_hold_) | |
527 | return false; | |
528 | ||
529 | std::swap(this->get_first(), hold_); | |
530 | has_hold_ = false; | |
531 | return true; | |
532 | } | |
533 | void revert_adjust_start() | |
534 | { | |
535 | // this will be called only if adjust_start above returned true | |
536 | std::swap(this->get_first(), hold_); | |
537 | has_hold_ = true; | |
538 | } | |
539 | ||
540 | TokenValue const& get_value() const | |
541 | { | |
542 | if (!has_value_) { | |
543 | spirit::traits::assign_to(this->get_first(), end_, value_); | |
544 | has_value_ = true; | |
545 | } | |
546 | return value_; | |
547 | } | |
548 | template <typename Value> | |
549 | void set_value(Value const& val) | |
550 | { | |
551 | value_ = val; | |
552 | has_value_ = true; | |
553 | } | |
554 | void set_end(Iterator const& it) | |
555 | { | |
556 | end_ = it; | |
557 | } | |
558 | bool has_value() const { return has_value_; } | |
559 | void reset_value() { has_value_ = false; } | |
560 | ||
561 | protected: | |
562 | semantic_actions_type const& actions_; | |
563 | Iterator hold_; // iterator needed to support lex::more() | |
564 | Iterator end_; // iterator pointing to end of matched token | |
565 | mutable token_value_type value_; // token value to use | |
566 | mutable bool has_value_; // 'true' if value_ is valid | |
567 | bool has_hold_; // 'true' if hold_ is valid | |
568 | ||
7c673cae | 569 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 570 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
571 | }; |
572 | } | |
573 | }}}} | |
574 | ||
575 | #endif |