]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Boost.Wave: A Standard compliant C++ preprocessor library | |
3 | ||
4 | Re2C based C++ lexer | |
5 | ||
6 | http://www.boost.org/ | |
7 | ||
8 | Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost | |
9 | Software License, Version 1.0. (See accompanying file | |
10 | LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
11 | =============================================================================*/ | |
12 | ||
20effc67 TL |
13 | #if !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED) |
14 | #define BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED | |
7c673cae FG |
15 | |
16 | #include <string> | |
17 | #include <cstdio> | |
18 | #include <cstdarg> | |
19 | #if defined(BOOST_SPIRIT_DEBUG) | |
20 | #include <iostream> | |
21 | #endif // defined(BOOST_SPIRIT_DEBUG) | |
22 | ||
23 | #include <boost/concept_check.hpp> | |
24 | #include <boost/assert.hpp> | |
25 | #include <boost/spirit/include/classic_core.hpp> | |
26 | ||
27 | #include <boost/wave/wave_config.hpp> | |
28 | #include <boost/wave/language_support.hpp> | |
29 | #include <boost/wave/token_ids.hpp> | |
30 | #include <boost/wave/util/file_position.hpp> | |
31 | #include <boost/wave/cpplexer/validate_universal_char.hpp> | |
32 | #include <boost/wave/cpplexer/cpplexer_exceptions.hpp> | |
33 | #include <boost/wave/cpplexer/token_cache.hpp> | |
34 | #include <boost/wave/cpplexer/convert_trigraphs.hpp> | |
35 | ||
36 | #include <boost/wave/cpplexer/cpp_lex_interface.hpp> | |
37 | #include <boost/wave/cpplexer/re2clex/scanner.hpp> | |
38 | #include <boost/wave/cpplexer/re2clex/cpp_re.hpp> | |
39 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
40 | #include <boost/wave/cpplexer/detect_include_guards.hpp> | |
41 | #endif | |
42 | ||
43 | #include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp> | |
44 | ||
45 | // this must occur after all of the includes and before any code appears | |
46 | #ifdef BOOST_HAS_ABI_HEADERS | |
47 | #include BOOST_ABI_PREFIX | |
48 | #endif | |
49 | ||
50 | /////////////////////////////////////////////////////////////////////////////// | |
51 | namespace boost { | |
52 | namespace wave { | |
53 | namespace cpplexer { | |
54 | namespace re2clex { | |
55 | ||
56 | /////////////////////////////////////////////////////////////////////////////// | |
57 | // | |
58 | // encapsulation of the re2c based cpp lexer | |
59 | // | |
60 | /////////////////////////////////////////////////////////////////////////////// | |
61 | ||
62 | template <typename IteratorT, | |
63 | typename PositionT = boost::wave::util::file_position_type, | |
64 | typename TokenT = lex_token<PositionT> > | |
65 | class lexer | |
66 | { | |
67 | public: | |
68 | typedef TokenT token_type; | |
69 | typedef typename token_type::string_type string_type; | |
70 | ||
71 | lexer(IteratorT const &first, IteratorT const &last, | |
72 | PositionT const &pos, boost::wave::language_support language_); | |
73 | ~lexer(); | |
74 | ||
75 | token_type& get(token_type&); | |
76 | void set_position(PositionT const &pos) | |
77 | { | |
78 | // set position has to change the file name and line number only | |
79 | filename = pos.get_file(); | |
80 | scanner.line = pos.get_line(); | |
81 | // scanner.column = scanner.curr_column = pos.get_column(); | |
82 | scanner.file_name = filename.c_str(); | |
83 | } | |
84 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
85 | bool has_include_guards(std::string& guard_name) const | |
86 | { | |
87 | return guards.detected(guard_name); | |
88 | } | |
89 | #endif | |
90 | ||
20effc67 | 91 | // error reporting from the re2c generated lexer |
11fdf7f2 | 92 | static int report_error(Scanner<IteratorT> const* s, int code, char const *, ...); |
7c673cae FG |
93 | |
94 | private: | |
95 | static char const *tok_names[]; | |
96 | ||
11fdf7f2 | 97 | Scanner<IteratorT> scanner; |
7c673cae FG |
98 | string_type filename; |
99 | string_type value; | |
100 | bool at_eof; | |
101 | boost::wave::language_support language; | |
102 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
103 | include_guards<token_type> guards; | |
104 | #endif | |
105 | ||
106 | #if BOOST_WAVE_SUPPORT_THREADING == 0 | |
107 | static token_cache<string_type> const cache; | |
108 | #else | |
109 | token_cache<string_type> const cache; | |
110 | #endif | |
111 | }; | |
112 | ||
113 | /////////////////////////////////////////////////////////////////////////////// | |
114 | // initialize cpp lexer | |
115 | template <typename IteratorT, typename PositionT, typename TokenT> | |
116 | inline | |
117 | lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first, | |
118 | IteratorT const &last, PositionT const &pos, | |
119 | boost::wave::language_support language_) | |
11fdf7f2 TL |
120 | : scanner(first, last), |
121 | filename(pos.get_file()), at_eof(false), language(language_) | |
7c673cae FG |
122 | #if BOOST_WAVE_SUPPORT_THREADING != 0 |
123 | , cache() | |
124 | #endif | |
125 | { | |
126 | using namespace std; // some systems have memset in std | |
7c673cae FG |
127 | scanner.line = pos.get_line(); |
128 | scanner.column = scanner.curr_column = pos.get_column(); | |
129 | scanner.error_proc = report_error; | |
130 | scanner.file_name = filename.c_str(); | |
131 | ||
132 | #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 | |
133 | scanner.enable_ms_extensions = true; | |
134 | #else | |
135 | scanner.enable_ms_extensions = false; | |
136 | #endif | |
137 | ||
138 | #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0 | |
139 | scanner.act_in_c99_mode = boost::wave::need_c99(language_); | |
140 | #endif | |
141 | ||
142 | #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0 | |
143 | scanner.enable_import_keyword = !boost::wave::need_c99(language_); | |
144 | #else | |
145 | scanner.enable_import_keyword = false; | |
146 | #endif | |
147 | ||
148 | scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_); | |
149 | scanner.single_line_only = boost::wave::need_single_line(language_); | |
150 | ||
151 | #if BOOST_WAVE_SUPPORT_CPP0X != 0 | |
152 | scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_); | |
153 | #else | |
154 | scanner.act_in_cpp0x_mode = false; | |
155 | #endif | |
20effc67 TL |
156 | |
157 | #if BOOST_WAVE_SUPPORT_CPP2A != 0 | |
158 | scanner.act_in_cpp2a_mode = boost::wave::need_cpp2a(language_); | |
159 | scanner.act_in_cpp0x_mode = boost::wave::need_cpp2a(language_) | |
160 | || boost::wave::need_cpp0x(language_); | |
161 | #else | |
162 | scanner.act_in_cpp2a_mode = false; | |
163 | #endif | |
7c673cae FG |
164 | } |
165 | ||
166 | template <typename IteratorT, typename PositionT, typename TokenT> | |
167 | inline | |
168 | lexer<IteratorT, PositionT, TokenT>::~lexer() | |
169 | { | |
170 | using namespace std; // some systems have free in std | |
7c673cae FG |
171 | free(scanner.bot); |
172 | } | |
173 | ||
174 | /////////////////////////////////////////////////////////////////////////////// | |
175 | // get the next token from the input stream | |
176 | template <typename IteratorT, typename PositionT, typename TokenT> | |
177 | inline TokenT& | |
178 | lexer<IteratorT, PositionT, TokenT>::get(TokenT& result) | |
179 | { | |
180 | if (at_eof) | |
181 | return result = token_type(); // return T_EOI | |
182 | ||
183 | std::size_t actline = scanner.line; | |
184 | token_id id = token_id(scan(&scanner)); | |
185 | ||
b32b8144 | 186 | switch (id) { |
7c673cae FG |
187 | case T_IDENTIFIER: |
188 | // test identifier characters for validity (throws if invalid chars found) | |
189 | value = string_type((char const *)scanner.tok, | |
190 | scanner.cur-scanner.tok); | |
191 | if (!boost::wave::need_no_character_validation(language)) | |
192 | impl::validate_identifier_name(value, actline, scanner.column, filename); | |
193 | break; | |
194 | ||
195 | case T_STRINGLIT: | |
196 | case T_CHARLIT: | |
197 | case T_RAWSTRINGLIT: | |
198 | // test literal characters for validity (throws if invalid chars found) | |
199 | value = string_type((char const *)scanner.tok, | |
200 | scanner.cur-scanner.tok); | |
201 | if (boost::wave::need_convert_trigraphs(language)) | |
202 | value = impl::convert_trigraphs(value); | |
203 | if (!boost::wave::need_no_character_validation(language)) | |
204 | impl::validate_literal(value, actline, scanner.column, filename); | |
205 | break; | |
206 | ||
7c673cae FG |
207 | case T_PP_HHEADER: |
208 | case T_PP_QHEADER: | |
209 | case T_PP_INCLUDE: | |
210 | // convert to the corresponding ..._next token, if appropriate | |
211 | { | |
212 | value = string_type((char const *)scanner.tok, | |
213 | scanner.cur-scanner.tok); | |
214 | ||
f67539c2 | 215 | #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 |
7c673cae FG |
216 | // Skip '#' and whitespace and see whether we find an 'include_next' here. |
217 | typename string_type::size_type start = value.find("include"); | |
218 | if (value.compare(start, 12, "include_next", 12) == 0) | |
219 | id = token_id(id | AltTokenType); | |
f67539c2 | 220 | #endif |
7c673cae FG |
221 | break; |
222 | } | |
7c673cae FG |
223 | |
224 | case T_LONGINTLIT: // supported in C++11, C99 and long_long mode | |
225 | value = string_type((char const *)scanner.tok, | |
226 | scanner.cur-scanner.tok); | |
227 | if (!boost::wave::need_long_long(language)) { | |
228 | // syntax error: not allowed in C++ mode | |
229 | BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal, | |
230 | value.c_str(), actline, scanner.column, filename.c_str()); | |
231 | } | |
232 | break; | |
233 | ||
234 | case T_OCTALINT: | |
235 | case T_DECIMALINT: | |
236 | case T_HEXAINT: | |
237 | case T_INTLIT: | |
238 | case T_FLOATLIT: | |
239 | case T_FIXEDPOINTLIT: | |
240 | case T_CCOMMENT: | |
241 | case T_CPPCOMMENT: | |
242 | case T_SPACE: | |
243 | case T_SPACE2: | |
244 | case T_ANY: | |
245 | case T_PP_NUMBER: | |
246 | value = string_type((char const *)scanner.tok, | |
247 | scanner.cur-scanner.tok); | |
248 | break; | |
249 | ||
250 | case T_EOF: | |
20effc67 TL |
251 | // T_EOF is returned as a valid token, the next call will return T_EOI, |
252 | // i.e. the actual end of input | |
7c673cae FG |
253 | at_eof = true; |
254 | value.clear(); | |
255 | break; | |
256 | ||
257 | case T_OR_TRIGRAPH: | |
258 | case T_XOR_TRIGRAPH: | |
259 | case T_LEFTBRACE_TRIGRAPH: | |
260 | case T_RIGHTBRACE_TRIGRAPH: | |
261 | case T_LEFTBRACKET_TRIGRAPH: | |
262 | case T_RIGHTBRACKET_TRIGRAPH: | |
263 | case T_COMPL_TRIGRAPH: | |
264 | case T_POUND_TRIGRAPH: | |
265 | if (boost::wave::need_convert_trigraphs(language)) { | |
266 | value = cache.get_token_value(BASEID_FROM_TOKEN(id)); | |
267 | } | |
268 | else { | |
269 | value = string_type((char const *)scanner.tok, | |
270 | scanner.cur-scanner.tok); | |
271 | } | |
272 | break; | |
273 | ||
274 | case T_ANY_TRIGRAPH: | |
275 | if (boost::wave::need_convert_trigraphs(language)) { | |
276 | value = impl::convert_trigraph( | |
277 | string_type((char const *)scanner.tok)); | |
278 | } | |
279 | else { | |
280 | value = string_type((char const *)scanner.tok, | |
281 | scanner.cur-scanner.tok); | |
282 | } | |
283 | break; | |
284 | ||
285 | default: | |
286 | if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) || | |
287 | IS_CATEGORY(id, UnknownTokenType)) | |
288 | { | |
289 | value = string_type((char const *)scanner.tok, | |
290 | scanner.cur-scanner.tok); | |
291 | } | |
292 | else { | |
293 | value = cache.get_token_value(id); | |
294 | } | |
295 | break; | |
296 | } | |
297 | ||
298 | // std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl; | |
299 | ||
300 | // the re2c lexer reports the new line number for newline tokens | |
301 | result = token_type(id, value, PositionT(filename, actline, scanner.column)); | |
302 | ||
303 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
304 | return guards.detect_guard(result); | |
305 | #else | |
306 | return result; | |
307 | #endif | |
308 | } | |
309 | ||
310 | template <typename IteratorT, typename PositionT, typename TokenT> | |
311 | inline int | |
11fdf7f2 | 312 | lexer<IteratorT, PositionT, TokenT>::report_error(Scanner<IteratorT> const *s, int errcode, |
7c673cae FG |
313 | char const *msg, ...) |
314 | { | |
315 | BOOST_ASSERT(0 != s); | |
316 | BOOST_ASSERT(0 != msg); | |
317 | ||
318 | using namespace std; // some system have vsprintf in namespace std | |
319 | ||
320 | char buffer[200]; // should be large enough | |
321 | va_list params; | |
322 | va_start(params, msg); | |
323 | vsprintf(buffer, msg, params); | |
324 | va_end(params); | |
325 | ||
326 | BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line, | |
327 | s->column, s->file_name); | |
20effc67 | 328 | // BOOST_UNREACHABLE_RETURN(0); |
7c673cae FG |
329 | return 0; |
330 | } | |
331 | ||
332 | /////////////////////////////////////////////////////////////////////////////// | |
333 | // | |
334 | // lex_functor | |
335 | // | |
336 | /////////////////////////////////////////////////////////////////////////////// | |
337 | ||
338 | template <typename IteratorT, | |
339 | typename PositionT = boost::wave::util::file_position_type, | |
340 | typename TokenT = typename lexer<IteratorT, PositionT>::token_type> | |
341 | class lex_functor | |
342 | : public lex_input_interface_generator<TokenT> | |
343 | { | |
344 | public: | |
345 | typedef TokenT token_type; | |
346 | ||
347 | lex_functor(IteratorT const &first, IteratorT const &last, | |
348 | PositionT const &pos, boost::wave::language_support language) | |
349 | : re2c_lexer(first, last, pos, language) | |
350 | {} | |
351 | virtual ~lex_functor() {} | |
352 | ||
20effc67 TL |
353 | // get the next token from the input stream |
354 | token_type& get(token_type& result) BOOST_OVERRIDE { return re2c_lexer.get(result); } | |
355 | void set_position(PositionT const &pos) BOOST_OVERRIDE { re2c_lexer.set_position(pos); } | |
7c673cae | 356 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 |
20effc67 | 357 | bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE |
7c673cae FG |
358 | { return re2c_lexer.has_include_guards(guard_name); } |
359 | #endif | |
360 | ||
361 | private: | |
362 | lexer<IteratorT, PositionT, TokenT> re2c_lexer; | |
363 | }; | |
364 | ||
365 | #if BOOST_WAVE_SUPPORT_THREADING == 0 | |
366 | /////////////////////////////////////////////////////////////////////////////// | |
367 | template <typename IteratorT, typename PositionT, typename TokenT> | |
368 | token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const | |
369 | lexer<IteratorT, PositionT, TokenT>::cache = | |
370 | token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>(); | |
371 | #endif | |
372 | ||
373 | } // namespace re2clex | |
374 | ||
375 | /////////////////////////////////////////////////////////////////////////////// | |
376 | // | |
377 | // The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp) | |
378 | // should be defined inline, if the lex_functor shouldn't be instantiated | |
379 | // separately from the lex_iterator. | |
380 | // | |
381 | // Separate (explicit) instantiation helps to reduce compilation time. | |
382 | // | |
383 | /////////////////////////////////////////////////////////////////////////////// | |
384 | ||
385 | #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0 | |
386 | #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE | |
387 | #else | |
388 | #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline | |
389 | #endif | |
390 | ||
391 | /////////////////////////////////////////////////////////////////////////////// | |
392 | // | |
393 | // The 'new_lexer' function allows the opaque generation of a new lexer object. | |
394 | // It is coupled to the iterator type to allow to decouple the lexer/iterator | |
395 | // configurations at compile time. | |
396 | // | |
397 | // This function is declared inside the cpp_lex_token.hpp file, which is | |
398 | // referenced by the source file calling the lexer and the source file, which | |
399 | // instantiates the lex_functor. But it is defined here, so it will be | |
400 | // instantiated only while compiling the source file, which instantiates the | |
401 | // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere, | |
402 | // this file (cpp_re2c_lexer.hpp) should be included only once. This allows | |
403 | // to decouple the lexer interface from the lexer implementation and reduces | |
404 | // compilation time. | |
405 | // | |
406 | /////////////////////////////////////////////////////////////////////////////// | |
407 | ||
408 | template <typename IteratorT, typename PositionT, typename TokenT> | |
409 | BOOST_WAVE_RE2C_NEW_LEXER_INLINE | |
410 | lex_input_interface<TokenT> * | |
411 | new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first, | |
412 | IteratorT const &last, PositionT const &pos, | |
413 | boost::wave::language_support language) | |
414 | { | |
415 | using re2clex::lex_functor; | |
416 | return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language); | |
417 | } | |
418 | ||
419 | #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE | |
420 | ||
421 | /////////////////////////////////////////////////////////////////////////////// | |
422 | } // namespace cpplexer | |
423 | } // namespace wave | |
424 | } // namespace boost | |
425 | ||
426 | // the suffix header occurs after all of the code | |
427 | #ifdef BOOST_HAS_ABI_HEADERS | |
428 | #include BOOST_ABI_SUFFIX | |
429 | #endif | |
430 | ||
20effc67 | 431 | #endif // !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED) |