]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM) | |
7 | #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/mpl/bool.hpp> | |
7c673cae FG |
14 | #include <boost/detail/workaround.hpp> |
15 | #include <boost/spirit/home/lex/lexer/pass_flags.hpp> | |
16 | #include <boost/assert.hpp> | |
92f5a8d4 | 17 | #include <iterator> // for std::iterator_traits |
7c673cae FG |
18 | |
19 | #if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310) | |
20 | #define BOOST_SPIRIT_STATIC_EOF 1 | |
21 | #define BOOST_SPIRIT_EOF_PREFIX static | |
22 | #else | |
23 | #define BOOST_SPIRIT_EOF_PREFIX | |
24 | #endif | |
25 | ||
26 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
27 | { | |
28 | /////////////////////////////////////////////////////////////////////////// | |
29 | // | |
30 | // functor is a template usable as the functor object for the | |
31 | // multi_pass iterator allowing to wrap a lexertl based dfa into a | |
32 | // iterator based interface. | |
33 | // | |
34 | // Token: the type of the tokens produced by this functor | |
35 | // this needs to expose a constructor with the following | |
36 | // prototype: | |
37 | // | |
38 | // Token(std::size_t id, std::size_t state, | |
39 | // Iterator start, Iterator end) | |
40 | // | |
41 | // where 'id' is the token id, state is the lexer state, | |
42 | // this token has been matched in, and 'first' and 'end' | |
43 | // mark the start and the end of the token with respect | |
44 | // to the underlying character stream. | |
45 | // FunctorData: | |
46 | // this is expected to encapsulate the shared part of the | |
47 | // functor (see lex/lexer/lexertl/functor_data.hpp for an | |
48 | // example and documentation). | |
49 | // Iterator: the type of the underlying iterator | |
50 | // SupportsActors: | |
51 | // this is expected to be a mpl::bool_, if mpl::true_ the | |
52 | // functor invokes functors which (optionally) have | |
53 | // been attached to the token definitions. | |
54 | // SupportState: | |
55 | // this is expected to be a mpl::bool_, if mpl::true_ the | |
56 | // functor supports different lexer states, | |
57 | // otherwise no lexer state is supported. | |
58 | // | |
59 | /////////////////////////////////////////////////////////////////////////// | |
60 | template <typename Token | |
61 | , template <typename, typename, typename, typename> class FunctorData | |
62 | , typename Iterator = typename Token::iterator_type | |
63 | , typename SupportsActors = mpl::false_ | |
64 | , typename SupportsState = typename Token::has_state> | |
65 | class functor | |
66 | { | |
67 | public: | |
68 | typedef typename | |
92f5a8d4 | 69 | std::iterator_traits<Iterator>::value_type |
7c673cae FG |
70 | char_type; |
71 | ||
72 | private: | |
73 | // Needed by compilers not implementing the resolution to DR45. For | |
74 | // reference, see | |
75 | // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. | |
76 | typedef typename Token::token_value_type token_value_type; | |
77 | friend class FunctorData<Iterator, SupportsActors, SupportsState | |
78 | , token_value_type>; | |
79 | ||
80 | // Helper template allowing to assign a value on exit | |
81 | template <typename T> | |
82 | struct assign_on_exit | |
83 | { | |
84 | assign_on_exit(T& dst, T const& src) | |
85 | : dst_(dst), src_(src) {} | |
86 | ||
87 | ~assign_on_exit() | |
88 | { | |
89 | dst_ = src_; | |
90 | } | |
91 | ||
92 | T& dst_; | |
93 | T const& src_; | |
94 | ||
7c673cae | 95 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 96 | BOOST_DELETED_FUNCTION(assign_on_exit& operator= (assign_on_exit const&)) |
7c673cae FG |
97 | }; |
98 | ||
99 | public: | |
b32b8144 | 100 | functor() {} |
7c673cae FG |
101 | |
102 | #if BOOST_WORKAROUND(BOOST_MSVC, <= 1310) | |
103 | // somehow VC7.1 needs this (meaningless) assignment operator | |
104 | functor& operator=(functor const& rhs) | |
105 | { | |
106 | return *this; | |
107 | } | |
108 | #endif | |
109 | ||
110 | /////////////////////////////////////////////////////////////////////// | |
111 | // interface to the iterator_policies::split_functor_input policy | |
112 | typedef Token result_type; | |
113 | typedef functor unique; | |
114 | typedef FunctorData<Iterator, SupportsActors, SupportsState | |
115 | , token_value_type> shared; | |
116 | ||
117 | BOOST_SPIRIT_EOF_PREFIX result_type const eof; | |
118 | ||
119 | /////////////////////////////////////////////////////////////////////// | |
120 | typedef Iterator iterator_type; | |
121 | typedef typename shared::semantic_actions_type semantic_actions_type; | |
122 | typedef typename shared::next_token_functor next_token_functor; | |
123 | typedef typename shared::get_state_name_type get_state_name_type; | |
124 | ||
125 | // this is needed to wrap the semantic actions in a proper way | |
126 | typedef typename shared::wrap_action_type wrap_action_type; | |
127 | ||
128 | /////////////////////////////////////////////////////////////////////// | |
129 | template <typename MultiPass> | |
130 | static result_type& get_next(MultiPass& mp, result_type& result) | |
131 | { | |
132 | typedef typename result_type::id_type id_type; | |
133 | ||
134 | shared& data = mp.shared()->ftor; | |
135 | for(;;) | |
136 | { | |
137 | if (data.get_first() == data.get_last()) | |
138 | #if defined(BOOST_SPIRIT_STATIC_EOF) | |
139 | return result = eof; | |
140 | #else | |
141 | return result = mp.ftor.eof; | |
142 | #endif | |
143 | ||
144 | data.reset_value(); | |
145 | Iterator end = data.get_first(); | |
146 | std::size_t unique_id = boost::lexer::npos; | |
147 | bool prev_bol = false; | |
148 | ||
149 | // lexer matching might change state | |
150 | std::size_t state = data.get_state(); | |
151 | std::size_t id = data.next(end, unique_id, prev_bol); | |
152 | ||
153 | if (boost::lexer::npos == id) { // no match | |
154 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
155 | std::string next; | |
156 | Iterator it = data.get_first(); | |
157 | for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i) | |
158 | next += *it; | |
159 | ||
160 | std::cerr << "Not matched, in state: " << state | |
161 | << ", lookahead: >" << next << "<" << std::endl; | |
162 | #endif | |
163 | return result = result_type(0); | |
164 | } | |
165 | else if (0 == id) { // EOF reached | |
166 | #if defined(BOOST_SPIRIT_STATIC_EOF) | |
167 | return result = eof; | |
168 | #else | |
169 | return result = mp.ftor.eof; | |
170 | #endif | |
171 | } | |
172 | ||
173 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
174 | { | |
175 | std::string next; | |
176 | Iterator it = end; | |
177 | for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i) | |
178 | next += *it; | |
179 | ||
180 | std::cerr << "Matched: " << id << ", in state: " | |
181 | << state << ", string: >" | |
182 | << std::basic_string<char_type>(data.get_first(), end) << "<" | |
183 | << ", lookahead: >" << next << "<" << std::endl; | |
184 | if (data.get_state() != state) { | |
185 | std::cerr << "Switched to state: " | |
186 | << data.get_state() << std::endl; | |
187 | } | |
188 | } | |
189 | #endif | |
190 | // account for a possibly pending lex::more(), i.e. moving | |
191 | // data.first_ back to the start of the previously matched token. | |
192 | bool adjusted = data.adjust_start(); | |
193 | ||
194 | // set the end of the matched input sequence in the token data | |
195 | data.set_end(end); | |
196 | ||
197 | // invoke attached semantic actions, if defined, might change | |
198 | // state, id, data.first_, and/or end | |
199 | BOOST_SCOPED_ENUM(pass_flags) pass = | |
200 | data.invoke_actions(state, id, unique_id, end); | |
201 | ||
202 | if (data.has_value()) { | |
203 | // return matched token using the token value as set before | |
204 | // using data.set_value(), advancing 'data.first_' past the | |
205 | // matched sequence | |
206 | assign_on_exit<Iterator> on_exit(data.get_first(), end); | |
207 | return result = result_type(id_type(id), state, data.get_value()); | |
208 | } | |
209 | else if (pass_flags::pass_normal == pass) { | |
210 | // return matched token, advancing 'data.first_' past the | |
211 | // matched sequence | |
212 | assign_on_exit<Iterator> on_exit(data.get_first(), end); | |
213 | return result = result_type(id_type(id), state, data.get_first(), end); | |
214 | } | |
215 | else if (pass_flags::pass_fail == pass) { | |
216 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
217 | std::cerr << "Matching forced to fail" << std::endl; | |
218 | #endif | |
219 | // if the data.first_ got adjusted above, revert this adjustment | |
220 | if (adjusted) | |
221 | data.revert_adjust_start(); | |
222 | ||
223 | // one of the semantic actions signaled no-match | |
224 | data.reset_bol(prev_bol); | |
225 | if (state != data.get_state()) | |
226 | continue; // retry matching if state has changed | |
227 | ||
228 | // if the state is unchanged repeating the match wouldn't | |
229 | // move the input forward, causing an infinite loop | |
230 | return result = result_type(0); | |
231 | } | |
232 | ||
233 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
234 | std::cerr << "Token ignored, continuing matching" << std::endl; | |
235 | #endif | |
236 | // if this token needs to be ignored, just repeat the matching, | |
237 | // while starting right after the current match | |
238 | data.get_first() = end; | |
239 | } | |
240 | } | |
241 | ||
242 | // set_state are propagated up to the iterator interface, allowing to | |
243 | // manipulate the current lexer state through any of the exposed | |
244 | // iterators. | |
245 | template <typename MultiPass> | |
246 | static std::size_t set_state(MultiPass& mp, std::size_t state) | |
247 | { | |
248 | std::size_t oldstate = mp.shared()->ftor.get_state(); | |
249 | mp.shared()->ftor.set_state(state); | |
250 | ||
251 | #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
252 | std::cerr << "Switching state from: " << oldstate | |
253 | << " to: " << state | |
254 | << std::endl; | |
255 | #endif | |
256 | return oldstate; | |
257 | } | |
258 | ||
259 | template <typename MultiPass> | |
260 | static std::size_t get_state(MultiPass& mp) | |
261 | { | |
262 | return mp.shared()->ftor.get_state(); | |
263 | } | |
264 | ||
265 | template <typename MultiPass> | |
266 | static std::size_t | |
267 | map_state(MultiPass const& mp, char_type const* statename) | |
268 | { | |
269 | return mp.shared()->ftor.get_state_id(statename); | |
270 | } | |
271 | ||
272 | // we don't need this, but it must be there | |
273 | template <typename MultiPass> | |
274 | static void destroy(MultiPass const&) {} | |
275 | }; | |
276 | ||
277 | #if defined(BOOST_SPIRIT_STATIC_EOF) | |
278 | /////////////////////////////////////////////////////////////////////////// | |
279 | // eof token | |
280 | /////////////////////////////////////////////////////////////////////////// | |
281 | template <typename Token | |
282 | , template <typename, typename, typename, typename> class FunctorData | |
283 | , typename Iterator, typename SupportsActors, typename SupportsState> | |
284 | typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const | |
285 | functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof = | |
286 | typename functor<Token, FunctorData, Iterator, SupportsActors | |
287 | , SupportsState>::result_type(); | |
288 | #endif | |
289 | ||
290 | }}}} | |
291 | ||
292 | #undef BOOST_SPIRIT_EOF_PREFIX | |
293 | #undef BOOST_SPIRIT_STATIC_EOF | |
294 | ||
295 | #endif |