]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) | |
7 | #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/spirit/home/support/info.hpp> | |
14 | #include <boost/spirit/home/qi/skip_over.hpp> | |
15 | #include <boost/spirit/home/qi/parser.hpp> | |
16 | #include <boost/spirit/home/qi/detail/assign_to.hpp> | |
17 | #include <boost/spirit/home/lex/reference.hpp> | |
18 | #include <boost/spirit/home/lex/meta_compiler.hpp> | |
19 | #include <boost/spirit/home/lex/lexer_type.hpp> | |
20 | #include <boost/spirit/home/lex/lexer/token_def.hpp> | |
21 | #include <boost/assert.hpp> | |
22 | #include <boost/noncopyable.hpp> | |
23 | #include <boost/detail/iterator.hpp> | |
24 | #include <boost/fusion/include/vector.hpp> | |
25 | #include <boost/mpl/assert.hpp> | |
26 | #include <boost/range/iterator_range.hpp> | |
27 | #include <string> | |
28 | ||
29 | namespace boost { namespace spirit { namespace lex | |
30 | { | |
31 | /////////////////////////////////////////////////////////////////////////// | |
32 | namespace detail | |
33 | { | |
34 | /////////////////////////////////////////////////////////////////////// | |
35 | template <typename LexerDef> | |
36 | struct lexer_def_ | |
37 | : proto::extends< | |
38 | typename proto::terminal< | |
39 | lex::reference<lexer_def_<LexerDef> const> | |
40 | >::type | |
41 | , lexer_def_<LexerDef> > | |
42 | , qi::parser<lexer_def_<LexerDef> > | |
43 | , lex::lexer_type<lexer_def_<LexerDef> > | |
44 | { | |
45 | private: | |
46 | // avoid warnings about using 'this' in constructor | |
47 | lexer_def_& this_() { return *this; } | |
48 | ||
49 | typedef typename LexerDef::char_type char_type; | |
50 | typedef typename LexerDef::string_type string_type; | |
51 | typedef typename LexerDef::id_type id_type; | |
52 | ||
53 | typedef lex::reference<lexer_def_ const> reference_; | |
54 | typedef typename proto::terminal<reference_>::type terminal_type; | |
55 | typedef proto::extends<terminal_type, lexer_def_> proto_base_type; | |
56 | ||
57 | reference_ alias() const | |
58 | { | |
59 | return reference_(*this); | |
60 | } | |
61 | ||
62 | public: | |
63 | // Qi interface: metafunction calculating parser attribute type | |
64 | template <typename Context, typename Iterator> | |
65 | struct attribute | |
66 | { | |
67 | // the return value of a token set contains the matched token | |
68 | // id, and the corresponding pair of iterators | |
69 | typedef typename Iterator::base_iterator_type iterator_type; | |
70 | typedef | |
71 | fusion::vector2<id_type, iterator_range<iterator_type> > | |
72 | type; | |
73 | }; | |
74 | ||
75 | // Qi interface: parse functionality | |
76 | template <typename Iterator, typename Context | |
77 | , typename Skipper, typename Attribute> | |
78 | bool parse(Iterator& first, Iterator const& last | |
79 | , Context& /*context*/, Skipper const& skipper | |
80 | , Attribute& attr) const | |
81 | { | |
82 | qi::skip_over(first, last, skipper); // always do a pre-skip | |
83 | ||
84 | if (first != last) { | |
85 | typedef typename | |
86 | boost::detail::iterator_traits<Iterator>::value_type | |
87 | token_type; | |
88 | ||
89 | token_type const& t = *first; | |
90 | if (token_is_valid(t) && t.state() == first.get_state()) { | |
91 | // any of the token definitions matched | |
92 | spirit::traits::assign_to(t, attr); | |
93 | ++first; | |
94 | return true; | |
95 | } | |
96 | } | |
97 | return false; | |
98 | } | |
99 | ||
100 | // Qi interface: 'what' functionality | |
101 | template <typename Context> | |
102 | info what(Context& /*context*/) const | |
103 | { | |
104 | return info("lexer"); | |
105 | } | |
106 | ||
107 | private: | |
108 | // allow to use the lexer.self.add("regex1", id1)("regex2", id2); | |
109 | // syntax | |
110 | struct adder | |
111 | { | |
112 | adder(lexer_def_& def_) | |
113 | : def(def_) {} | |
114 | ||
115 | // Add a token definition based on a single character as given | |
116 | // by the first parameter, the second parameter allows to | |
117 | // specify the token id to use for the new token. If no token | |
118 | // id is given the character code is used. | |
119 | adder const& operator()(char_type c | |
120 | , id_type token_id = id_type()) const | |
121 | { | |
122 | if (id_type() == token_id) | |
123 | token_id = static_cast<id_type>(c); | |
124 | def.def.add_token (def.state.c_str(), c, token_id | |
125 | , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
126 | return *this; | |
127 | } | |
128 | ||
129 | // Add a token definition based on a character sequence as | |
130 | // given by the first parameter, the second parameter allows to | |
131 | // specify the token id to use for the new token. If no token | |
132 | // id is given this function will generate a unique id to be | |
133 | // used as the token's id. | |
134 | adder const& operator()(string_type const& s | |
135 | , id_type token_id = id_type()) const | |
136 | { | |
137 | if (id_type() == token_id) | |
138 | token_id = def.def.get_next_id(); | |
139 | def.def.add_token (def.state.c_str(), s, token_id | |
140 | , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
141 | return *this; | |
142 | } | |
143 | ||
144 | template <typename Attribute> | |
145 | adder const& operator()( | |
146 | token_def<Attribute, char_type, id_type>& tokdef | |
147 | , id_type token_id = id_type()) const | |
148 | { | |
149 | // make sure we have a token id | |
150 | if (id_type() == token_id) { | |
151 | if (id_type() == tokdef.id()) { | |
152 | token_id = def.def.get_next_id(); | |
153 | tokdef.id(token_id); | |
154 | } | |
155 | else { | |
156 | token_id = tokdef.id(); | |
157 | } | |
158 | } | |
159 | else { | |
160 | // the following assertion makes sure that the token_def | |
161 | // instance has not been assigned a different id earlier | |
162 | BOOST_ASSERT(id_type() == tokdef.id() | |
163 | || token_id == tokdef.id()); | |
164 | tokdef.id(token_id); | |
165 | } | |
166 | ||
167 | def.define(tokdef); | |
168 | return *this; | |
169 | } | |
170 | ||
171 | // template <typename F> | |
172 | // adder const& operator()(char_type c, id_type token_id, F act) const | |
173 | // { | |
174 | // if (id_type() == token_id) | |
175 | // token_id = def.def.get_next_id(); | |
176 | // std::size_t unique_id = | |
177 | // def.def.add_token (def.state.c_str(), s, token_id); | |
178 | // def.def.add_action(unique_id, def.state.c_str(), act); | |
179 | // return *this; | |
180 | // } | |
181 | ||
182 | lexer_def_& def; | |
183 | ||
184 | private: | |
185 | // silence MSVC warning C4512: assignment operator could not be generated | |
186 | adder& operator= (adder const&); | |
187 | }; | |
188 | friend struct adder; | |
189 | ||
190 | // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); | |
191 | // syntax | |
192 | struct pattern_adder | |
193 | { | |
194 | pattern_adder(lexer_def_& def_) | |
195 | : def(def_) {} | |
196 | ||
197 | pattern_adder const& operator()(string_type const& p | |
198 | , string_type const& s) const | |
199 | { | |
200 | def.def.add_pattern (def.state.c_str(), p, s); | |
201 | return *this; | |
202 | } | |
203 | ||
204 | lexer_def_& def; | |
205 | ||
206 | private: | |
207 | // silence MSVC warning C4512: assignment operator could not be generated | |
208 | pattern_adder& operator= (pattern_adder const&); | |
209 | }; | |
210 | friend struct pattern_adder; | |
211 | ||
212 | private: | |
213 | // Helper function to invoke the necessary 2 step compilation | |
214 | // process on token definition expressions | |
215 | template <typename TokenExpr> | |
216 | void compile2pass(TokenExpr const& expr) | |
217 | { | |
218 | expr.collect(def, state, targetstate); | |
219 | expr.add_actions(def); | |
220 | } | |
221 | ||
222 | public: | |
223 | /////////////////////////////////////////////////////////////////// | |
224 | template <typename Expr> | |
225 | void define(Expr const& expr) | |
226 | { | |
227 | compile2pass(compile<lex::domain>(expr)); | |
228 | } | |
229 | ||
230 | lexer_def_(LexerDef& def_, string_type const& state_ | |
231 | , string_type const& targetstate_ = string_type()) | |
232 | : proto_base_type(terminal_type::make(alias())) | |
233 | , add(this_()), add_pattern(this_()), def(def_) | |
234 | , state(state_), targetstate(targetstate_) | |
235 | {} | |
236 | ||
237 | // allow to switch states | |
238 | lexer_def_ operator()(char_type const* state) const | |
239 | { | |
240 | return lexer_def_(def, state); | |
241 | } | |
242 | lexer_def_ operator()(char_type const* state | |
243 | , char_type const* targetstate) const | |
244 | { | |
245 | return lexer_def_(def, state, targetstate); | |
246 | } | |
247 | lexer_def_ operator()(string_type const& state | |
248 | , string_type const& targetstate = string_type()) const | |
249 | { | |
250 | return lexer_def_(def, state, targetstate); | |
251 | } | |
252 | ||
253 | // allow to assign a token definition expression | |
254 | template <typename Expr> | |
255 | lexer_def_& operator= (Expr const& xpr) | |
256 | { | |
257 | // Report invalid expression error as early as possible. | |
258 | // If you got an error_invalid_expression error message here, | |
259 | // then the expression (expr) is not a valid spirit lex | |
260 | // expression. | |
261 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
262 | ||
263 | def.clear(state.c_str()); | |
264 | define(xpr); | |
265 | return *this; | |
266 | } | |
267 | ||
268 | // explicitly tell the lexer that the given state will be defined | |
269 | // (useful in conjunction with "*") | |
270 | std::size_t add_state(char_type const* state = 0) | |
271 | { | |
272 | return def.add_state(state ? state : def.initial_state().c_str()); | |
273 | } | |
274 | ||
275 | adder add; | |
276 | pattern_adder add_pattern; | |
277 | ||
278 | private: | |
279 | LexerDef& def; | |
280 | string_type state; | |
281 | string_type targetstate; | |
282 | ||
283 | private: | |
284 | // silence MSVC warning C4512: assignment operator could not be generated | |
285 | lexer_def_& operator= (lexer_def_ const&); | |
286 | }; | |
287 | ||
288 | #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES) | |
289 | // allow to assign a token definition expression | |
290 | template <typename LexerDef, typename Expr> | |
291 | inline lexer_def_<LexerDef>& | |
292 | operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) | |
293 | { | |
294 | // Report invalid expression error as early as possible. | |
295 | // If you got an error_invalid_expression error message here, | |
296 | // then the expression (expr) is not a valid spirit lex | |
297 | // expression. | |
298 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
299 | ||
300 | lexdef.define(xpr); | |
301 | return lexdef; | |
302 | } | |
303 | #else | |
304 | // allow to assign a token definition expression | |
305 | template <typename LexerDef, typename Expr> | |
306 | inline lexer_def_<LexerDef>& | |
307 | operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) | |
308 | { | |
309 | // Report invalid expression error as early as possible. | |
310 | // If you got an error_invalid_expression error message here, | |
311 | // then the expression (expr) is not a valid spirit lex | |
312 | // expression. | |
313 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
314 | ||
315 | lexdef.define(xpr); | |
316 | return lexdef; | |
317 | } | |
318 | #endif | |
319 | ||
320 | template <typename LexerDef, typename Expr> | |
321 | inline lexer_def_<LexerDef>& | |
322 | operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) | |
323 | { | |
324 | // Report invalid expression error as early as possible. | |
325 | // If you got an error_invalid_expression error message here, | |
326 | // then the expression (expr) is not a valid spirit lex | |
327 | // expression. | |
328 | BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
329 | ||
330 | lexdef.define(xpr); | |
331 | return lexdef; | |
332 | } | |
333 | } | |
334 | ||
335 | /////////////////////////////////////////////////////////////////////////// | |
336 | // The match_flags flags are used to influence different matching | |
337 | // modes of the lexer | |
338 | struct match_flags | |
339 | { | |
340 | enum enum_type | |
341 | { | |
342 | match_default = 0, // no flags | |
343 | match_not_dot_newline = 1, // the regex '.' doesn't match newlines | |
344 | match_icase = 2 // all matching operations are case insensitive | |
345 | }; | |
346 | }; | |
347 | ||
348 | /////////////////////////////////////////////////////////////////////////// | |
349 | // This represents a lexer object | |
350 | /////////////////////////////////////////////////////////////////////////// | |
351 | ||
352 | /////////////////////////////////////////////////////////////////////////// | |
353 | // This is the first token id automatically assigned by the library | |
354 | // if needed | |
355 | enum tokenids | |
356 | { | |
357 | min_token_id = 0x10000 | |
358 | }; | |
359 | ||
360 | template <typename Lexer> | |
361 | class lexer : public Lexer | |
362 | { | |
363 | private: | |
364 | // avoid warnings about using 'this' in constructor | |
365 | lexer& this_() { return *this; } | |
366 | ||
367 | std::size_t next_token_id; // has to be an integral type | |
368 | ||
369 | public: | |
370 | typedef Lexer lexer_type; | |
371 | typedef typename Lexer::id_type id_type; | |
372 | typedef typename Lexer::char_type char_type; | |
373 | typedef typename Lexer::iterator_type iterator_type; | |
374 | typedef lexer base_type; | |
375 | ||
376 | typedef detail::lexer_def_<lexer> lexer_def; | |
377 | typedef std::basic_string<char_type> string_type; | |
378 | ||
379 | lexer(unsigned int flags = match_flags::match_default | |
380 | , id_type first_id = id_type(min_token_id)) | |
381 | : lexer_type(flags) | |
382 | , next_token_id(first_id) | |
383 | , self(this_(), lexer_type::initial_state()) | |
384 | {} | |
385 | ||
386 | // access iterator interface | |
387 | template <typename Iterator> | |
388 | iterator_type begin(Iterator& first, Iterator const& last | |
389 | , char_type const* initial_state = 0) const | |
390 | { return this->lexer_type::begin(first, last, initial_state); } | |
391 | iterator_type end() const | |
392 | { return this->lexer_type::end(); } | |
393 | ||
394 | std::size_t map_state(char_type const* state) | |
395 | { return this->lexer_type::add_state(state); } | |
396 | ||
397 | // create a unique token id | |
398 | id_type get_next_id() { return id_type(next_token_id++); } | |
399 | ||
400 | lexer_def self; // allow for easy token definition | |
401 | }; | |
402 | ||
403 | }}} | |
404 | ||
405 | #endif |