]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM) | |
7 | #define BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/spirit/home/support/detail/lexer/generator.hpp> | |
14 | #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
16 | #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp> | |
17 | #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp> | |
18 | #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp> | |
19 | #include <boost/mpl/bool.hpp> | |
92f5a8d4 | 20 | #include <iterator> // for std::iterator_traits |
7c673cae FG |
21 | |
22 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
23 | { | |
24 | namespace detail | |
25 | { | |
26 | /////////////////////////////////////////////////////////////////////// | |
f67539c2 TL |
27 | template <typename Char> |
28 | inline bool zstr_compare(Char const* s1, Char const* s2) | |
29 | { | |
30 | for (; *s1 || *s2; ++s1, ++s2) | |
31 | if (*s1 != *s2) | |
32 | return false; | |
33 | return true; | |
34 | } | |
35 | ||
7c673cae FG |
36 | template <typename Char, typename F> |
37 | inline std::size_t get_state_id(Char const* state, F f | |
38 | , std::size_t numstates) | |
39 | { | |
40 | for (std::size_t i = 0; i < numstates; ++i) | |
41 | { | |
f67539c2 | 42 | if (zstr_compare(f(i), state)) |
7c673cae FG |
43 | return i; |
44 | } | |
45 | return boost::lexer::npos; | |
46 | } | |
47 | ||
48 | /////////////////////////////////////////////////////////////////////// | |
49 | template <typename Iterator, typename HasActors, typename HasState | |
50 | , typename TokenValue> | |
51 | class static_data; // no default specialization | |
52 | ||
53 | /////////////////////////////////////////////////////////////////////// | |
54 | // doesn't support no state and no actors | |
55 | template <typename Iterator, typename TokenValue> | |
56 | class static_data<Iterator, mpl::false_, mpl::false_, TokenValue> | |
57 | { | |
58 | protected: | |
59 | typedef typename | |
92f5a8d4 | 60 | std::iterator_traits<Iterator>::value_type |
7c673cae FG |
61 | char_type; |
62 | ||
63 | public: | |
64 | typedef Iterator base_iterator_type; | |
65 | typedef iterator_range<Iterator> token_value_type; | |
66 | typedef token_value_type get_value_type; | |
67 | typedef std::size_t state_type; | |
68 | typedef char_type const* state_name_type; | |
69 | typedef unused_type semantic_actions_type; | |
70 | typedef detail::wrap_action<unused_type, Iterator, static_data | |
71 | , std::size_t> wrap_action_type; | |
72 | ||
73 | typedef std::size_t (*next_token_functor)(std::size_t&, | |
74 | bool&, Iterator&, Iterator const&, std::size_t&); | |
75 | typedef char_type const* (*get_state_name_type)(std::size_t); | |
76 | ||
77 | // initialize the shared data | |
78 | template <typename IterData> | |
79 | static_data (IterData const& data, Iterator& first | |
80 | , Iterator const& last) | |
81 | : first_(first), last_(last) | |
82 | , next_token_(data.next_) | |
83 | , get_state_name_(data.get_state_name_) | |
84 | , bol_(data.bol_) {} | |
85 | ||
86 | // The following functions are used by the implementation of the | |
87 | // placeholder '_state'. | |
88 | template <typename Char> | |
89 | void set_state_name (Char const*) | |
90 | { | |
91 | // some (random) versions of gcc instantiate this function even if it's not | |
92 | // needed leading to false static asserts | |
93 | #if !defined(__GNUC__) | |
94 | // If you see a compile time assertion below you're probably | |
95 | // using a token type not supporting lexer states (the 3rd | |
96 | // template parameter of the token is mpl::false_), but your | |
97 | // code uses state changes anyways. | |
98 | BOOST_STATIC_ASSERT(false); | |
99 | #endif | |
100 | } | |
101 | char_type const* get_state_name() const | |
102 | { | |
103 | return get_state_name_(0); | |
104 | } | |
105 | std::size_t get_state_id(char_type const*) const | |
106 | { | |
107 | return 0; | |
108 | } | |
109 | ||
110 | // The function get_eoi() is used by the implementation of the | |
111 | // placeholder '_eoi'. | |
112 | Iterator const& get_eoi() const { return last_; } | |
113 | ||
114 | // The function less() is used by the implementation of the support | |
115 | // function lex::less(). Its functionality is equivalent to flex' | |
116 | // function yyless(): it returns an iterator positioned to the | |
117 | // nth input character beyond the current start iterator (i.e. by | |
118 | // assigning the return value to the placeholder '_end' it is | |
119 | // possible to return all but the first n characters of the current | |
120 | // token back to the input stream. | |
121 | // | |
122 | // This function does nothing as long as no semantic actions are | |
123 | // used. | |
124 | Iterator const& less(Iterator const& it, int) | |
125 | { | |
126 | // The following assertion fires most likely because you are | |
127 | // using lexer semantic actions without using the actor_lexer | |
128 | // as the base class for your token definition class. | |
129 | BOOST_ASSERT(false && | |
130 | "Are you using lexer semantic actions without using the " | |
131 | "actor_lexer base?"); | |
132 | return it; | |
133 | } | |
134 | ||
135 | // The function more() is used by the implementation of the support | |
136 | // function lex::more(). Its functionality is equivalent to flex' | |
137 | // function yymore(): it tells the lexer that the next time it | |
138 | // matches a rule, the corresponding token should be appended onto | |
139 | // the current token value rather than replacing it. | |
140 | // | |
141 | // These functions do nothing as long as no semantic actions are | |
142 | // used. | |
143 | void more() | |
144 | { | |
145 | // The following assertion fires most likely because you are | |
146 | // using lexer semantic actions without using the actor_lexer | |
147 | // as the base class for your token definition class. | |
148 | BOOST_ASSERT(false && | |
149 | "Are you using lexer semantic actions without using the " | |
150 | "actor_lexer base?"); | |
151 | } | |
152 | bool adjust_start() { return false; } | |
153 | void revert_adjust_start() {} | |
154 | ||
155 | // The function lookahead() is used by the implementation of the | |
156 | // support function lex::lookahead. It can be used to implement | |
157 | // lookahead for lexer engines not supporting constructs like flex' | |
158 | // a/b (match a, but only when followed by b): | |
159 | // | |
160 | // This function does nothing as long as no semantic actions are | |
161 | // used. | |
162 | bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) | |
163 | { | |
164 | // The following assertion fires most likely because you are | |
165 | // using lexer semantic actions without using the actor_lexer | |
166 | // as the base class for your token definition class. | |
167 | BOOST_ASSERT(false && | |
168 | "Are you using lexer semantic actions without using the " | |
169 | "actor_lexer base?"); | |
170 | return false; | |
171 | } | |
172 | ||
173 | // the functions next, invoke_actions, and get_state are used by | |
174 | // the functor implementation below | |
175 | ||
176 | // The function next() tries to match the next token from the | |
177 | // underlying input sequence. | |
178 | std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) | |
179 | { | |
180 | prev_bol = bol_; | |
181 | ||
182 | std::size_t state = 0; | |
183 | return next_token_(state, bol_, end, last_, unique_id); | |
184 | } | |
185 | ||
186 | // nothing to invoke, so this is empty | |
187 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t | |
188 | , std::size_t, std::size_t, Iterator const&) | |
189 | { | |
190 | return pass_flags::pass_normal; // always accept | |
191 | } | |
192 | ||
193 | std::size_t get_state() const { return 0; } | |
194 | void set_state(std::size_t) {} | |
195 | ||
196 | void set_end(Iterator const& it) {} | |
197 | ||
198 | Iterator& get_first() { return first_; } | |
199 | Iterator const& get_first() const { return first_; } | |
200 | Iterator const& get_last() const { return last_; } | |
201 | ||
202 | iterator_range<Iterator> get_value() const | |
203 | { | |
204 | return iterator_range<Iterator>(first_, last_); | |
205 | } | |
206 | bool has_value() const { return false; } | |
207 | void reset_value() {} | |
208 | ||
209 | void reset_bol(bool bol) { bol_ = bol; } | |
210 | ||
211 | protected: | |
212 | Iterator& first_; | |
213 | Iterator last_; | |
214 | ||
215 | next_token_functor next_token_; | |
216 | get_state_name_type get_state_name_; | |
217 | ||
218 | bool bol_; // helper storing whether last character was \n | |
219 | ||
7c673cae | 220 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 221 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
222 | }; |
223 | ||
224 | /////////////////////////////////////////////////////////////////////// | |
225 | // doesn't support lexer semantic actions, but supports state | |
226 | template <typename Iterator, typename TokenValue> | |
227 | class static_data<Iterator, mpl::false_, mpl::true_, TokenValue> | |
228 | : public static_data<Iterator, mpl::false_, mpl::false_, TokenValue> | |
229 | { | |
230 | protected: | |
231 | typedef static_data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type; | |
232 | typedef typename base_type::char_type char_type; | |
233 | ||
234 | public: | |
235 | typedef Iterator base_iterator_type; | |
236 | typedef iterator_range<Iterator> token_value_type; | |
237 | typedef token_value_type get_value_type; | |
238 | typedef typename base_type::state_type state_type; | |
239 | typedef typename base_type::state_name_type state_name_type; | |
240 | typedef typename base_type::semantic_actions_type | |
241 | semantic_actions_type; | |
242 | ||
243 | // initialize the shared data | |
244 | template <typename IterData> | |
245 | static_data (IterData const& data, Iterator& first | |
246 | , Iterator const& last) | |
247 | : base_type(data, first, last), state_(0) | |
248 | , num_states_(data.num_states_) {} | |
249 | ||
250 | // The following functions are used by the implementation of the | |
251 | // placeholder '_state'. | |
252 | void set_state_name (char_type const* new_state) | |
253 | { | |
254 | std::size_t state_id = lexertl::detail::get_state_id(new_state | |
255 | , this->get_state_name_, num_states_); | |
256 | ||
257 | // if the following assertion fires you've probably been using | |
258 | // a lexer state name which was not defined in your token | |
259 | // definition | |
260 | BOOST_ASSERT(state_id != boost::lexer::npos); | |
261 | ||
262 | if (state_id != boost::lexer::npos) | |
263 | state_ = state_id; | |
264 | } | |
265 | char_type const* get_state_name() const | |
266 | { | |
267 | return this->get_state_name_(state_); | |
268 | } | |
269 | std::size_t get_state_id(char_type const* state) const | |
270 | { | |
271 | return lexertl::detail::get_state_id(state | |
272 | , this->get_state_name_, num_states_); | |
273 | } | |
274 | ||
275 | // the functions next() and get_state() are used by the functor | |
276 | // implementation below | |
277 | ||
278 | // The function next() tries to match the next token from the | |
279 | // underlying input sequence. | |
280 | std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) | |
281 | { | |
282 | prev_bol = this->bol_; | |
283 | return this->next_token_(state_, this->bol_, end, this->last_ | |
284 | , unique_id); | |
285 | } | |
286 | ||
287 | std::size_t& get_state() { return state_; } | |
288 | void set_state(std::size_t state) { state_ = state; } | |
289 | ||
290 | protected: | |
291 | std::size_t state_; | |
292 | std::size_t num_states_; | |
293 | ||
7c673cae | 294 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 295 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
296 | }; |
297 | ||
298 | /////////////////////////////////////////////////////////////////////// | |
299 | // does support actors, but may have no state | |
300 | template <typename Iterator, typename HasState, typename TokenValue> | |
301 | class static_data<Iterator, mpl::true_, HasState, TokenValue> | |
302 | : public static_data<Iterator, mpl::false_, HasState, TokenValue> | |
303 | { | |
304 | public: | |
305 | typedef semantic_actions<Iterator, HasState, static_data> | |
306 | semantic_actions_type; | |
307 | ||
308 | protected: | |
309 | typedef static_data<Iterator, mpl::false_, HasState, TokenValue> | |
310 | base_type; | |
311 | typedef typename base_type::char_type char_type; | |
312 | typedef typename semantic_actions_type::functor_wrapper_type | |
313 | functor_wrapper_type; | |
314 | ||
315 | public: | |
316 | typedef Iterator base_iterator_type; | |
317 | typedef TokenValue token_value_type; | |
318 | typedef TokenValue const& get_value_type; | |
319 | typedef typename base_type::state_type state_type; | |
320 | typedef typename base_type::state_name_type state_name_type; | |
321 | ||
322 | typedef detail::wrap_action<functor_wrapper_type | |
323 | , Iterator, static_data, std::size_t> wrap_action_type; | |
324 | ||
325 | template <typename IterData> | |
326 | static_data (IterData const& data, Iterator& first | |
327 | , Iterator const& last) | |
328 | : base_type(data, first, last) | |
329 | , actions_(data.actions_), hold_() | |
330 | , value_(iterator_range<Iterator>(first, last)) | |
331 | , has_value_(false) | |
332 | , has_hold_(false) | |
333 | {} | |
334 | ||
335 | // invoke attached semantic actions, if defined | |
336 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state | |
337 | , std::size_t& id, std::size_t unique_id, Iterator& end) | |
338 | { | |
339 | return actions_.invoke_actions(state, id, unique_id, end, *this); | |
340 | } | |
341 | ||
342 | // The function less() is used by the implementation of the support | |
343 | // function lex::less(). Its functionality is equivalent to flex' | |
344 | // function yyless(): it returns an iterator positioned to the | |
345 | // nth input character beyond the current start iterator (i.e. by | |
346 | // assigning the return value to the placeholder '_end' it is | |
347 | // possible to return all but the first n characters of the current | |
348 | // token back to the input stream). | |
349 | Iterator const& less(Iterator& it, int n) | |
350 | { | |
351 | it = this->get_first(); | |
352 | std::advance(it, n); | |
353 | return it; | |
354 | } | |
355 | ||
356 | // The function more() is used by the implementation of the support | |
357 | // function lex::more(). Its functionality is equivalent to flex' | |
358 | // function yymore(): it tells the lexer that the next time it | |
359 | // matches a rule, the corresponding token should be appended onto | |
360 | // the current token value rather than replacing it. | |
361 | void more() | |
362 | { | |
363 | hold_ = this->get_first(); | |
364 | has_hold_ = true; | |
365 | } | |
366 | ||
367 | // The function lookahead() is used by the implementation of the | |
368 | // support function lex::lookahead. It can be used to implement | |
369 | // lookahead for lexer engines not supporting constructs like flex' | |
370 | // a/b (match a, but only when followed by b) | |
371 | bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) | |
372 | { | |
373 | Iterator end = end_; | |
374 | std::size_t unique_id = boost::lexer::npos; | |
375 | bool bol = this->bol_; | |
376 | ||
377 | if (std::size_t(~0) == state) | |
378 | state = this->state_; | |
379 | ||
380 | return id == this->next_token_( | |
381 | state, bol, end, this->get_eoi(), unique_id); | |
382 | } | |
383 | ||
384 | // The adjust_start() and revert_adjust_start() are helper | |
385 | // functions needed to implement the functionality required for | |
386 | // lex::more(). It is called from the functor body below. | |
387 | bool adjust_start() | |
388 | { | |
389 | if (!has_hold_) | |
390 | return false; | |
391 | ||
392 | std::swap(this->get_first(), hold_); | |
393 | has_hold_ = false; | |
394 | return true; | |
395 | } | |
396 | void revert_adjust_start() | |
397 | { | |
398 | // this will be called only if adjust_start above returned true | |
399 | std::swap(this->get_first(), hold_); | |
400 | has_hold_ = true; | |
401 | } | |
402 | ||
403 | TokenValue const& get_value() const | |
404 | { | |
405 | if (!has_value_) { | |
406 | value_ = iterator_range<Iterator>(this->get_first(), end_); | |
407 | has_value_ = true; | |
408 | } | |
409 | return value_; | |
410 | } | |
411 | template <typename Value> | |
412 | void set_value(Value const& val) | |
413 | { | |
414 | value_ = val; | |
415 | has_value_ = true; | |
416 | } | |
417 | void set_end(Iterator const& it) | |
418 | { | |
419 | end_ = it; | |
420 | } | |
421 | bool has_value() const { return has_value_; } | |
422 | void reset_value() { has_value_ = false; } | |
423 | ||
424 | protected: | |
425 | semantic_actions_type const& actions_; | |
426 | Iterator hold_; // iterator needed to support lex::more() | |
427 | Iterator end_; // iterator pointing to end of matched token | |
428 | mutable TokenValue value_; // token value to use | |
429 | mutable bool has_value_; // 'true' if value_ is valid | |
430 | bool has_hold_; // 'true' if hold_ is valid | |
431 | ||
7c673cae | 432 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 433 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
434 | }; |
435 | ||
436 | /////////////////////////////////////////////////////////////////////// | |
437 | // does support lexer semantic actions, may support state, is used for | |
438 | // position_token exposing exactly one type | |
439 | template <typename Iterator, typename HasState, typename TokenValue> | |
440 | class static_data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> > | |
441 | : public static_data<Iterator, mpl::false_, HasState, TokenValue> | |
442 | { | |
443 | public: | |
444 | typedef semantic_actions<Iterator, HasState, static_data> | |
445 | semantic_actions_type; | |
446 | ||
447 | protected: | |
448 | typedef static_data<Iterator, mpl::false_, HasState, TokenValue> | |
449 | base_type; | |
450 | typedef typename base_type::char_type char_type; | |
451 | typedef typename semantic_actions_type::functor_wrapper_type | |
452 | functor_wrapper_type; | |
453 | ||
454 | public: | |
455 | typedef Iterator base_iterator_type; | |
456 | typedef boost::optional<TokenValue> token_value_type; | |
457 | typedef boost::optional<TokenValue> const& get_value_type; | |
458 | typedef typename base_type::state_type state_type; | |
459 | typedef typename base_type::state_name_type state_name_type; | |
460 | ||
461 | typedef detail::wrap_action<functor_wrapper_type | |
462 | , Iterator, static_data, std::size_t> wrap_action_type; | |
463 | ||
464 | template <typename IterData> | |
465 | static_data (IterData const& data_, Iterator& first, Iterator const& last) | |
466 | : base_type(data_, first, last) | |
467 | , actions_(data_.actions_), hold_() | |
468 | , has_value_(false), has_hold_(false) | |
469 | { | |
470 | spirit::traits::assign_to(first, last, value_); | |
471 | has_value_ = true; | |
472 | } | |
473 | ||
474 | // invoke attached semantic actions, if defined | |
475 | BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state | |
476 | , std::size_t& id, std::size_t unique_id, Iterator& end) | |
477 | { | |
478 | return actions_.invoke_actions(state, id, unique_id, end, *this); | |
479 | } | |
480 | ||
481 | // The function less() is used by the implementation of the support | |
482 | // function lex::less(). Its functionality is equivalent to flex' | |
483 | // function yyless(): it returns an iterator positioned to the | |
484 | // nth input character beyond the current start iterator (i.e. by | |
485 | // assigning the return value to the placeholder '_end' it is | |
486 | // possible to return all but the first n characters of the current | |
487 | // token back to the input stream). | |
488 | Iterator const& less(Iterator& it, int n) | |
489 | { | |
490 | it = this->get_first(); | |
491 | std::advance(it, n); | |
492 | return it; | |
493 | } | |
494 | ||
495 | // The function more() is used by the implementation of the support | |
496 | // function lex::more(). Its functionality is equivalent to flex' | |
497 | // function yymore(): it tells the lexer that the next time it | |
498 | // matches a rule, the corresponding token should be appended onto | |
499 | // the current token value rather than replacing it. | |
500 | void more() | |
501 | { | |
502 | hold_ = this->get_first(); | |
503 | has_hold_ = true; | |
504 | } | |
505 | ||
506 | // The function lookahead() is used by the implementation of the | |
507 | // support function lex::lookahead. It can be used to implement | |
508 | // lookahead for lexer engines not supporting constructs like flex' | |
509 | // a/b (match a, but only when followed by b) | |
510 | bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) | |
511 | { | |
512 | Iterator end = end_; | |
513 | std::size_t unique_id = boost::lexer::npos; | |
514 | bool bol = this->bol_; | |
515 | ||
516 | if (std::size_t(~0) == state) | |
517 | state = this->state_; | |
518 | ||
519 | return id == this->next_token_( | |
520 | state, bol, end, this->get_eoi(), unique_id); | |
521 | } | |
522 | ||
523 | // The adjust_start() and revert_adjust_start() are helper | |
524 | // functions needed to implement the functionality required for | |
525 | // lex::more(). It is called from the functor body below. | |
526 | bool adjust_start() | |
527 | { | |
528 | if (!has_hold_) | |
529 | return false; | |
530 | ||
531 | std::swap(this->get_first(), hold_); | |
532 | has_hold_ = false; | |
533 | return true; | |
534 | } | |
535 | void revert_adjust_start() | |
536 | { | |
537 | // this will be called only if adjust_start above returned true | |
538 | std::swap(this->get_first(), hold_); | |
539 | has_hold_ = true; | |
540 | } | |
541 | ||
542 | TokenValue const& get_value() const | |
543 | { | |
544 | if (!has_value_) { | |
545 | spirit::traits::assign_to(this->get_first(), end_, value_); | |
546 | has_value_ = true; | |
547 | } | |
548 | return value_; | |
549 | } | |
550 | template <typename Value> | |
551 | void set_value(Value const& val) | |
552 | { | |
553 | value_ = val; | |
554 | has_value_ = true; | |
555 | } | |
556 | void set_end(Iterator const& it) | |
557 | { | |
558 | end_ = it; | |
559 | } | |
560 | bool has_value() const { return has_value_; } | |
561 | void reset_value() { has_value_ = false; } | |
562 | ||
563 | protected: | |
564 | semantic_actions_type const& actions_; | |
565 | Iterator hold_; // iterator needed to support lex::more() | |
566 | Iterator end_; // iterator pointing to end of matched token | |
567 | mutable token_value_type value_; // token value to use | |
568 | mutable bool has_value_; // 'true' if value_ is valid | |
569 | bool has_hold_; // 'true' if hold_ is valid | |
570 | ||
7c673cae | 571 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 572 | BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) |
7c673cae FG |
573 | }; |
574 | } | |
575 | }}}} | |
576 | ||
577 | #endif |