]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/spirit/home/lex/lexer/lexertl/lexer.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / spirit / home / lex / lexer / lexertl / lexer.hpp
1 // Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM)
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM
8
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12
13 #include <iosfwd>
14
15 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
16 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
17 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
18 #include <boost/spirit/home/support/unused.hpp>
19
20 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
21 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
22 #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp>
23 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
24 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
25 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
26 #endif
27
28 #include <boost/foreach.hpp>
29
30 #include <iterator> // for std::iterator_traits
31
32 namespace boost { namespace spirit { namespace lex { namespace lexertl
33 {
34 ///////////////////////////////////////////////////////////////////////////
35 namespace detail
36 {
37 ///////////////////////////////////////////////////////////////////////
38 // The must_escape function checks if the given character value needs
39 // to be preceded by a backslash character to disable its special
40 // meaning in the context of a regular expression
41 ///////////////////////////////////////////////////////////////////////
42 template <typename Char>
43 inline bool must_escape(Char c)
44 {
45 // FIXME: more needed?
46 switch (c) {
47 case '+': case '/': case '*': case '?':
48 case '|':
49 case '(': case ')':
50 case '[': case ']':
51 case '{': case '}':
52 case '.':
53 case '^': case '$':
54 case '\\':
55 case '"':
56 return true;
57
58 default:
59 break;
60 }
61 return false;
62 }
63
64 ///////////////////////////////////////////////////////////////////////
65 // The escape function returns the string representation of the given
66 // character value, possibly escaped with a backslash character, to
67 // allow it being safely used in a regular expression definition.
68 ///////////////////////////////////////////////////////////////////////
69 template <typename Char>
70 inline std::basic_string<Char> escape(Char ch)
71 {
72 std::basic_string<Char> result(1, ch);
73 if (detail::must_escape(ch))
74 {
75 typedef typename std::basic_string<Char>::size_type size_type;
76 result.insert((size_type)0, 1, '\\');
77 }
78 return result;
79 }
80
81 ///////////////////////////////////////////////////////////////////////
82 //
83 ///////////////////////////////////////////////////////////////////////
84 inline boost::lexer::regex_flags map_flags(unsigned int flags)
85 {
86 unsigned int retval = boost::lexer::none;
87 if (flags & match_flags::match_not_dot_newline)
88 retval |= boost::lexer::dot_not_newline;
89 if (flags & match_flags::match_icase)
90 retval |= boost::lexer::icase;
91
92 return boost::lexer::regex_flags(retval);
93 }
94 }
95
96 ///////////////////////////////////////////////////////////////////////////
97 template <typename Lexer, typename F>
98 bool generate_static(Lexer const&
99 , std::basic_ostream<typename Lexer::char_type>&
100 , typename Lexer::char_type const*, F);
101
102 ///////////////////////////////////////////////////////////////////////////
103 //
104 // Every lexer type to be used as a lexer for Spirit has to conform to
105 // the following public interface:
106 //
107 // typedefs:
108 // iterator_type The type of the iterator exposed by this lexer.
109 // token_type The type of the tokens returned from the exposed
110 // iterators.
111 //
112 // functions:
113 // default constructor
114 // Since lexers are instantiated as base classes
115 // only it might be a good idea to make this
116 // constructor protected.
117 // begin, end Return a pair of iterators, when dereferenced
118 // returning the sequence of tokens recognized in
119 // the input stream given as the parameters to the
120 // begin() function.
121 // add_token Should add the definition of a token to be
122 // recognized by this lexer.
123 // clear Should delete all current token definitions
124 // associated with the given state of this lexer
125 // object.
126 //
127 // template parameters:
128 // Iterator The type of the iterator used to access the
129 // underlying character stream.
130 // Token The type of the tokens to be returned from the
131 // exposed token iterator.
132 // Functor The type of the InputPolicy to use to instantiate
133 // the multi_pass iterator type to be used as the
134 // token iterator (returned from begin()/end()).
135 //
136 ///////////////////////////////////////////////////////////////////////////
137
138 ///////////////////////////////////////////////////////////////////////////
139 //
140 // The lexer class is a implementation of a Spirit.Lex lexer on
141 // top of Ben Hanson's lexertl library as outlined above (For more
142 // information about lexertl go here: http://www.benhanson.net/lexertl.html).
143 //
144 // This class is supposed to be used as the first and only template
145 // parameter while instantiating instances of a lex::lexer class.
146 //
147 ///////////////////////////////////////////////////////////////////////////
148 template <typename Token = token<>
149 , typename Iterator = typename Token::iterator_type
150 , typename Functor = functor<Token, lexertl::detail::data, Iterator> >
151 class lexer
152 {
153 private:
154 struct dummy { void true_() {} };
155 typedef void (dummy::*safe_bool)();
156
157 static std::size_t const all_states_id = static_cast<std::size_t>(-2);
158
159 public:
160 operator safe_bool() const
161 { return initialized_dfa_ ? &dummy::true_ : 0; }
162
163 typedef typename std::iterator_traits<Iterator>::value_type char_type;
164 typedef std::basic_string<char_type> string_type;
165
166 typedef boost::lexer::basic_rules<char_type> basic_rules_type;
167
168 // Every lexer type to be used as a lexer for Spirit has to conform to
169 // a public interface .
170 typedef Token token_type;
171 typedef typename Token::id_type id_type;
172 typedef iterator<Functor> iterator_type;
173
174 private:
175 // this type is purely used for the iterator_type construction below
176 struct iterator_data_type
177 {
178 typedef typename Functor::semantic_actions_type semantic_actions_type;
179
180 iterator_data_type(
181 boost::lexer::basic_state_machine<char_type> const& sm
182 , boost::lexer::basic_rules<char_type> const& rules
183 , semantic_actions_type const& actions)
184 : state_machine_(sm), rules_(rules), actions_(actions)
185 {}
186
187 boost::lexer::basic_state_machine<char_type> const& state_machine_;
188 boost::lexer::basic_rules<char_type> const& rules_;
189 semantic_actions_type const& actions_;
190
191 // silence MSVC warning C4512: assignment operator could not be generated
192 BOOST_DELETED_FUNCTION(iterator_data_type& operator= (iterator_data_type const&))
193 };
194
195 public:
196 // Return the start iterator usable for iterating over the generated
197 // tokens.
198 iterator_type begin(Iterator& first, Iterator const& last
199 , char_type const* initial_state = 0) const
200 {
201 if (!init_dfa()) // never minimize DFA for dynamic lexers
202 return iterator_type();
203
204 iterator_data_type iterator_data(state_machine_, rules_, actions_);
205 return iterator_type(iterator_data, first, last, initial_state);
206 }
207
208 // Return the end iterator usable to stop iterating over the generated
209 // tokens.
210 iterator_type end() const
211 {
212 return iterator_type();
213 }
214
215 protected:
216 // Lexer instances can be created by means of a derived class only.
217 lexer(unsigned int flags)
218 : flags_(detail::map_flags(flags))
219 , rules_(flags_)
220 , initialized_dfa_(false)
221 {}
222
223 public:
224 // interface for token definition management
225 std::size_t add_token(char_type const* state, char_type tokendef,
226 std::size_t token_id, char_type const* targetstate)
227 {
228 add_state(state);
229 initialized_dfa_ = false;
230 if (state == all_states())
231 return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot());
232
233 if (0 == targetstate)
234 targetstate = state;
235 else
236 add_state(targetstate);
237 return rules_.add(state, detail::escape(tokendef), token_id, targetstate);
238 }
239 std::size_t add_token(char_type const* state, string_type const& tokendef,
240 std::size_t token_id, char_type const* targetstate)
241 {
242 add_state(state);
243 initialized_dfa_ = false;
244 if (state == all_states())
245 return rules_.add(state, tokendef, token_id, rules_.dot());
246
247 if (0 == targetstate)
248 targetstate = state;
249 else
250 add_state(targetstate);
251 return rules_.add(state, tokendef, token_id, targetstate);
252 }
253
254 // interface for pattern definition management
255 void add_pattern (char_type const* state, string_type const& name,
256 string_type const& patterndef)
257 {
258 add_state(state);
259 rules_.add_macro(name.c_str(), patterndef);
260 initialized_dfa_ = false;
261 }
262
263 boost::lexer::rules const& get_rules() const { return rules_; }
264
265 void clear(char_type const* state)
266 {
267 std::size_t s = rules_.state(state);
268 if (boost::lexer::npos != s)
269 rules_.clear(state);
270 initialized_dfa_ = false;
271 }
272 std::size_t add_state(char_type const* state)
273 {
274 if (state == all_states())
275 return all_states_id;
276
277 std::size_t stateid = rules_.state(state);
278 if (boost::lexer::npos == stateid) {
279 stateid = rules_.add_state(state);
280 initialized_dfa_ = false;
281 }
282 return stateid;
283 }
284 string_type initial_state() const
285 {
286 return string_type(rules_.initial());
287 }
288 string_type all_states() const
289 {
290 return string_type(rules_.all_states());
291 }
292
293 // Register a semantic action with the given id
294 template <typename F>
295 void add_action(std::size_t unique_id, std::size_t state, F act)
296 {
297 // If you see an error here stating add_action is not a member of
298 // fusion::unused_type then you are probably having semantic actions
299 // attached to at least one token in the lexer definition without
300 // using the lex::lexertl::actor_lexer<> as its base class.
301 typedef typename Functor::wrap_action_type wrapper_type;
302 if (state == all_states_id) {
303 // add the action to all known states
304 typedef typename
305 basic_rules_type::string_size_t_map::value_type
306 state_type;
307
308 std::size_t states = rules_.statemap().size();
309 BOOST_FOREACH(state_type const& s, rules_.statemap()) {
310 for (std::size_t j = 0; j < states; ++j)
311 actions_.add_action(unique_id + j, s.second, wrapper_type::call(act));
312 }
313 }
314 else {
315 actions_.add_action(unique_id, state, wrapper_type::call(act));
316 }
317 }
318 // template <typename F>
319 // void add_action(std::size_t unique_id, char_type const* state, F act)
320 // {
321 // typedef typename Functor::wrap_action_type wrapper_type;
322 // actions_.add_action(unique_id, add_state(state), wrapper_type::call(act));
323 // }
324
325 // We do not minimize the state machine by default anymore because
326 // Ben said: "If you can afford to generate a lexer at runtime, there
327 // is little point in calling minimise."
328 // Go figure.
329 bool init_dfa(bool minimize = false) const
330 {
331 if (!initialized_dfa_) {
332 state_machine_.clear();
333 typedef boost::lexer::basic_generator<char_type> generator;
334 generator::build (rules_, state_machine_);
335 if (minimize)
336 generator::minimise (state_machine_);
337
338 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
339 boost::lexer::debug::dump(state_machine_, std::cerr);
340 #endif
341 initialized_dfa_ = true;
342
343 // // release memory held by rules description
344 // basic_rules_type rules;
345 // rules.init_state_info(rules_); // preserve states
346 // std::swap(rules, rules_);
347 }
348 return true;
349 }
350
351 private:
352 // lexertl specific data
353 mutable boost::lexer::basic_state_machine<char_type> state_machine_;
354 boost::lexer::regex_flags flags_;
355 /*mutable*/ basic_rules_type rules_;
356
357 typename Functor::semantic_actions_type actions_;
358 mutable bool initialized_dfa_;
359
360 // generator functions must be able to access members directly
361 template <typename Lexer, typename F>
362 friend bool generate_static(Lexer const&
363 , std::basic_ostream<typename Lexer::char_type>&
364 , typename Lexer::char_type const*, F);
365 };
366
367 ///////////////////////////////////////////////////////////////////////////
368 //
369 // The actor_lexer class is another implementation of a Spirit.Lex
370 // lexer on top of Ben Hanson's lexertl library as outlined above (For
371 // more information about lexertl go here:
372 // http://www.benhanson.net/lexertl.html).
373 //
374 // The only difference to the lexer class above is that token_def
375 // definitions may have semantic (lexer) actions attached while being
376 // defined:
377 //
378 // int w;
379 // token_def word = "[^ \t\n]+";
380 // self = word[++ref(w)]; // see example: word_count_lexer
381 //
382 // This class is supposed to be used as the first and only template
383 // parameter while instantiating instances of a lex::lexer class.
384 //
385 ///////////////////////////////////////////////////////////////////////////
386 template <typename Token = token<>
387 , typename Iterator = typename Token::iterator_type
388 , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> >
389 class actor_lexer : public lexer<Token, Iterator, Functor>
390 {
391 protected:
392 // Lexer instances can be created by means of a derived class only.
393 actor_lexer(unsigned int flags)
394 : lexer<Token, Iterator, Functor>(flags) {}
395 };
396
397 }}}}
398
399 #endif