]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) | |
7 | #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/spirit/home/support/info.hpp> | |
14 | #include <boost/spirit/home/qi/skip_over.hpp> | |
15 | #include <boost/spirit/home/qi/parser.hpp> | |
16 | #include <boost/spirit/home/qi/detail/assign_to.hpp> | |
17 | #include <boost/spirit/home/lex/reference.hpp> | |
18 | #include <boost/spirit/home/lex/meta_compiler.hpp> | |
19 | #include <boost/spirit/home/lex/lexer_type.hpp> | |
20 | #include <boost/spirit/home/lex/lexer/token_def.hpp> | |
21 | #include <boost/assert.hpp> | |
22 | #include <boost/noncopyable.hpp> | |
7c673cae FG |
23 | #include <boost/fusion/include/vector.hpp> |
24 | #include <boost/mpl/assert.hpp> | |
f67539c2 TL |
25 | #include <boost/proto/extends.hpp> |
26 | #include <boost/proto/traits.hpp> | |
20effc67 | 27 | #include <boost/range/iterator_range_core.hpp> |
92f5a8d4 | 28 | #include <iterator> // for std::iterator_traits |
7c673cae FG |
29 | #include <string> |
30 | ||
31 | namespace boost { namespace spirit { namespace lex | |
32 | { | |
33 | /////////////////////////////////////////////////////////////////////////// | |
34 | namespace detail | |
35 | { | |
36 | /////////////////////////////////////////////////////////////////////// | |
37 | template <typename LexerDef> | |
38 | struct lexer_def_ | |
39 | : proto::extends< | |
40 | typename proto::terminal< | |
41 | lex::reference<lexer_def_<LexerDef> const> | |
42 | >::type | |
43 | , lexer_def_<LexerDef> > | |
44 | , qi::parser<lexer_def_<LexerDef> > | |
45 | , lex::lexer_type<lexer_def_<LexerDef> > | |
46 | { | |
47 | private: | |
48 | // avoid warnings about using 'this' in constructor | |
49 | lexer_def_& this_() { return *this; } | |
50 | ||
51 | typedef typename LexerDef::char_type char_type; | |
52 | typedef typename LexerDef::string_type string_type; | |
53 | typedef typename LexerDef::id_type id_type; | |
54 | ||
55 | typedef lex::reference<lexer_def_ const> reference_; | |
56 | typedef typename proto::terminal<reference_>::type terminal_type; | |
57 | typedef proto::extends<terminal_type, lexer_def_> proto_base_type; | |
58 | ||
59 | reference_ alias() const | |
60 | { | |
61 | return reference_(*this); | |
62 | } | |
63 | ||
64 | public: | |
65 | // Qi interface: metafunction calculating parser attribute type | |
66 | template <typename Context, typename Iterator> | |
67 | struct attribute | |
68 | { | |
69 | // the return value of a token set contains the matched token | |
70 | // id, and the corresponding pair of iterators | |
71 | typedef typename Iterator::base_iterator_type iterator_type; | |
72 | typedef | |
73 | fusion::vector2<id_type, iterator_range<iterator_type> > | |
74 | type; | |
75 | }; | |
76 | ||
77 | // Qi interface: parse functionality | |
78 | template <typename Iterator, typename Context | |
79 | , typename Skipper, typename Attribute> | |
80 | bool parse(Iterator& first, Iterator const& last | |
81 | , Context& /*context*/, Skipper const& skipper | |
82 | , Attribute& attr) const | |
83 | { | |
84 | qi::skip_over(first, last, skipper); // always do a pre-skip | |
85 | ||
86 | if (first != last) { | |
87 | typedef typename | |
92f5a8d4 | 88 | std::iterator_traits<Iterator>::value_type |
7c673cae FG |
89 | token_type; |
90 | ||
91 | token_type const& t = *first; | |
92 | if (token_is_valid(t) && t.state() == first.get_state()) { | |
93 | // any of the token definitions matched | |
94 | spirit::traits::assign_to(t, attr); | |
95 | ++first; | |
96 | return true; | |
97 | } | |
98 | } | |
99 | return false; | |
100 | } | |
101 | ||
102 | // Qi interface: 'what' functionality | |
103 | template <typename Context> | |
104 | info what(Context& /*context*/) const | |
105 | { | |
106 | return info("lexer"); | |
107 | } | |
108 | ||
109 | private: | |
110 | // allow to use the lexer.self.add("regex1", id1)("regex2", id2); | |
111 | // syntax | |
112 | struct adder | |
113 | { | |
114 | adder(lexer_def_& def_) | |
115 | : def(def_) {} | |
116 | ||
117 | // Add a token definition based on a single character as given | |
118 | // by the first parameter, the second parameter allows to | |
119 | // specify the token id to use for the new token. If no token | |
120 | // id is given the character code is used. | |
121 | adder const& operator()(char_type c | |
122 | , id_type token_id = id_type()) const | |
123 | { | |
124 | if (id_type() == token_id) | |
125 | token_id = static_cast<id_type>(c); | |
126 | def.def.add_token (def.state.c_str(), c, token_id | |
127 | , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
128 | return *this; | |
129 | } | |
130 | ||
131 | // Add a token definition based on a character sequence as | |
132 | // given by the first parameter, the second parameter allows to | |
133 | // specify the token id to use for the new token. If no token | |
134 | // id is given this function will generate a unique id to be | |
135 | // used as the token's id. | |
136 | adder const& operator()(string_type const& s | |
137 | , id_type token_id = id_type()) const | |
138 | { | |
139 | if (id_type() == token_id) | |
140 | token_id = def.def.get_next_id(); | |
141 | def.def.add_token (def.state.c_str(), s, token_id | |
142 | , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
143 | return *this; | |
144 | } | |
145 | ||
146 | template <typename Attribute> | |
147 | adder const& operator()( | |
148 | token_def<Attribute, char_type, id_type>& tokdef | |
149 | , id_type token_id = id_type()) const | |
150 | { | |
151 | // make sure we have a token id | |
152 | if (id_type() == token_id) { | |
153 | if (id_type() == tokdef.id()) { | |
154 | token_id = def.def.get_next_id(); | |
155 | tokdef.id(token_id); | |
156 | } | |
157 | else { | |
158 | token_id = tokdef.id(); | |
159 | } | |
160 | } | |
161 | else { | |
162 | // the following assertion makes sure that the token_def | |
163 | // instance has not been assigned a different id earlier | |
164 | BOOST_ASSERT(id_type() == tokdef.id() | |
165 | || token_id == tokdef.id()); | |
166 | tokdef.id(token_id); | |
167 | } | |
168 | ||
169 | def.define(tokdef); | |
170 | return *this; | |
171 | } | |
172 | ||
173 | // template <typename F> | |
174 | // adder const& operator()(char_type c, id_type token_id, F act) const | |
175 | // { | |
176 | // if (id_type() == token_id) | |
177 | // token_id = def.def.get_next_id(); | |
178 | // std::size_t unique_id = | |
179 | // def.def.add_token (def.state.c_str(), s, token_id); | |
180 | // def.def.add_action(unique_id, def.state.c_str(), act); | |
181 | // return *this; | |
182 | // } | |
183 | ||
184 | lexer_def_& def; | |
185 | ||
7c673cae | 186 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 187 | BOOST_DELETED_FUNCTION(adder& operator= (adder const&)) |
7c673cae FG |
188 | }; |
189 | friend struct adder; | |
190 | ||
191 | // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); | |
192 | // syntax | |
193 | struct pattern_adder | |
194 | { | |
195 | pattern_adder(lexer_def_& def_) | |
196 | : def(def_) {} | |
197 | ||
198 | pattern_adder const& operator()(string_type const& p | |
199 | , string_type const& s) const | |
200 | { | |
201 | def.def.add_pattern (def.state.c_str(), p, s); | |
202 | return *this; | |
203 | } | |
204 | ||
205 | lexer_def_& def; | |
206 | ||
7c673cae | 207 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 208 | BOOST_DELETED_FUNCTION(pattern_adder& operator= (pattern_adder const&)) |
7c673cae FG |
209 | }; |
210 | friend struct pattern_adder; | |
211 | ||
212 | private: | |
213 | // Helper function to invoke the necessary 2 step compilation | |
214 | // process on token definition expressions | |
215 | template <typename TokenExpr> | |
216 | void compile2pass(TokenExpr const& expr) | |
217 | { | |
218 | expr.collect(def, state, targetstate); | |
219 | expr.add_actions(def); | |
220 | } | |
221 | ||
222 | public: | |
223 | /////////////////////////////////////////////////////////////////// | |
224 | template <typename Expr> | |
225 | void define(Expr const& expr) | |
226 | { | |
227 | compile2pass(compile<lex::domain>(expr)); | |
228 | } | |
229 | ||
230 | lexer_def_(LexerDef& def_, string_type const& state_ | |
231 | , string_type const& targetstate_ = string_type()) | |
232 | : proto_base_type(terminal_type::make(alias())) | |
233 | , add(this_()), add_pattern(this_()), def(def_) | |
234 | , state(state_), targetstate(targetstate_) | |
235 | {} | |
236 | ||
237 | // allow to switch states | |
238 | lexer_def_ operator()(char_type const* state) const | |
239 | { | |
240 | return lexer_def_(def, state); | |
241 | } | |
242 | lexer_def_ operator()(char_type const* state | |
243 | , char_type const* targetstate) const | |
244 | { | |
245 | return lexer_def_(def, state, targetstate); | |
246 | } | |
247 | lexer_def_ operator()(string_type const& state | |
248 | , string_type const& targetstate = string_type()) const | |
249 | { | |
250 | return lexer_def_(def, state, targetstate); | |
251 | } | |
252 | ||
253 | // allow to assign a token definition expression | |
254 | template <typename Expr> | |
255 | lexer_def_& operator= (Expr const& xpr) | |
256 | { | |
257 | // Report invalid expression error as early as possible. | |
258 | // If you got an error_invalid_expression error message here, | |
259 | // then the expression (expr) is not a valid spirit lex | |
260 | // expression. | |
261 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
262 | ||
263 | def.clear(state.c_str()); | |
264 | define(xpr); | |
265 | return *this; | |
266 | } | |
267 | ||
268 | // explicitly tell the lexer that the given state will be defined | |
269 | // (useful in conjunction with "*") | |
270 | std::size_t add_state(char_type const* state = 0) | |
271 | { | |
272 | return def.add_state(state ? state : def.initial_state().c_str()); | |
273 | } | |
274 | ||
275 | adder add; | |
276 | pattern_adder add_pattern; | |
277 | ||
278 | private: | |
279 | LexerDef& def; | |
280 | string_type state; | |
281 | string_type targetstate; | |
282 | ||
7c673cae | 283 | // silence MSVC warning C4512: assignment operator could not be generated |
92f5a8d4 | 284 | BOOST_DELETED_FUNCTION(lexer_def_& operator= (lexer_def_ const&)) |
7c673cae FG |
285 | }; |
286 | ||
287 | #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES) | |
288 | // allow to assign a token definition expression | |
289 | template <typename LexerDef, typename Expr> | |
290 | inline lexer_def_<LexerDef>& | |
291 | operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) | |
292 | { | |
293 | // Report invalid expression error as early as possible. | |
294 | // If you got an error_invalid_expression error message here, | |
295 | // then the expression (expr) is not a valid spirit lex | |
296 | // expression. | |
297 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
298 | ||
299 | lexdef.define(xpr); | |
300 | return lexdef; | |
301 | } | |
302 | #else | |
303 | // allow to assign a token definition expression | |
304 | template <typename LexerDef, typename Expr> | |
305 | inline lexer_def_<LexerDef>& | |
306 | operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) | |
307 | { | |
308 | // Report invalid expression error as early as possible. | |
309 | // If you got an error_invalid_expression error message here, | |
310 | // then the expression (expr) is not a valid spirit lex | |
311 | // expression. | |
312 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
313 | ||
314 | lexdef.define(xpr); | |
315 | return lexdef; | |
316 | } | |
317 | #endif | |
318 | ||
319 | template <typename LexerDef, typename Expr> | |
320 | inline lexer_def_<LexerDef>& | |
321 | operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) | |
322 | { | |
323 | // Report invalid expression error as early as possible. | |
324 | // If you got an error_invalid_expression error message here, | |
325 | // then the expression (expr) is not a valid spirit lex | |
326 | // expression. | |
327 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
328 | ||
329 | lexdef.define(xpr); | |
330 | return lexdef; | |
331 | } | |
332 | } | |
333 | ||
334 | /////////////////////////////////////////////////////////////////////////// | |
335 | // The match_flags flags are used to influence different matching | |
336 | // modes of the lexer | |
337 | struct match_flags | |
338 | { | |
339 | enum enum_type | |
340 | { | |
341 | match_default = 0, // no flags | |
342 | match_not_dot_newline = 1, // the regex '.' doesn't match newlines | |
343 | match_icase = 2 // all matching operations are case insensitive | |
344 | }; | |
345 | }; | |
346 | ||
347 | /////////////////////////////////////////////////////////////////////////// | |
348 | // This represents a lexer object | |
349 | /////////////////////////////////////////////////////////////////////////// | |
350 | ||
351 | /////////////////////////////////////////////////////////////////////////// | |
352 | // This is the first token id automatically assigned by the library | |
353 | // if needed | |
354 | enum tokenids | |
355 | { | |
356 | min_token_id = 0x10000 | |
357 | }; | |
358 | ||
359 | template <typename Lexer> | |
360 | class lexer : public Lexer | |
361 | { | |
362 | private: | |
363 | // avoid warnings about using 'this' in constructor | |
364 | lexer& this_() { return *this; } | |
365 | ||
366 | std::size_t next_token_id; // has to be an integral type | |
367 | ||
368 | public: | |
369 | typedef Lexer lexer_type; | |
370 | typedef typename Lexer::id_type id_type; | |
371 | typedef typename Lexer::char_type char_type; | |
372 | typedef typename Lexer::iterator_type iterator_type; | |
373 | typedef lexer base_type; | |
374 | ||
375 | typedef detail::lexer_def_<lexer> lexer_def; | |
376 | typedef std::basic_string<char_type> string_type; | |
377 | ||
92f5a8d4 TL |
378 | // if `id_type` was specified but `first_id` is not provided |
379 | // the `min_token_id` value may be out of range for `id_type`, | |
380 | // but it will be a problem only if unique ids feature is in use. | |
381 | lexer(unsigned int flags = match_flags::match_default) | |
382 | : lexer_type(flags) | |
383 | , next_token_id(min_token_id) | |
384 | , self(this_(), lexer_type::initial_state()) | |
385 | {} | |
386 | ||
387 | lexer(unsigned int flags, id_type first_id) | |
7c673cae FG |
388 | : lexer_type(flags) |
389 | , next_token_id(first_id) | |
390 | , self(this_(), lexer_type::initial_state()) | |
391 | {} | |
392 | ||
393 | // access iterator interface | |
394 | template <typename Iterator> | |
395 | iterator_type begin(Iterator& first, Iterator const& last | |
396 | , char_type const* initial_state = 0) const | |
397 | { return this->lexer_type::begin(first, last, initial_state); } | |
398 | iterator_type end() const | |
399 | { return this->lexer_type::end(); } | |
400 | ||
401 | std::size_t map_state(char_type const* state) | |
402 | { return this->lexer_type::add_state(state); } | |
403 | ||
404 | // create a unique token id | |
405 | id_type get_next_id() { return id_type(next_token_id++); } | |
406 | ||
407 | lexer_def self; // allow for easy token definition | |
408 | }; | |
409 | ||
410 | }}} | |
411 | ||
412 | #endif |