1 /*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
4 SLex (Spirit Lex) based C++ lexer
8 Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
13 #if !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
14 #define SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED
17 #if defined(BOOST_SPIRIT_DEBUG)
19 #endif // defined(BOOST_SPIRIT_DEBUG)
21 #include <boost/assert.hpp>
22 #include <boost/spirit/include/classic_core.hpp>
24 #include <boost/wave/wave_config.hpp>
25 #include <boost/wave/language_support.hpp>
26 #include <boost/wave/token_ids.hpp>
27 #include <boost/wave/util/file_position.hpp>
28 #include <boost/wave/util/time_conversion_helper.hpp>
29 #include <boost/wave/cpplexer/validate_universal_char.hpp>
30 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
31 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
32 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
33 #include <boost/wave/cpplexer/detect_include_guards.hpp>
35 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
37 #include "../slex_interface.hpp"
38 #include "../slex_token.hpp"
39 #include "../slex_iterator.hpp"
41 #include "lexer.hpp" // "spirit/lexer.hpp"
43 ///////////////////////////////////////////////////////////////////////////////
50 ///////////////////////////////////////////////////////////////////////////////
51 // The following numbers are the array sizes of the token regex's which we
52 // need to specify to make the CW compiler happy (at least up to V9.5).
53 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
54 #define INIT_DATA_SIZE 175
56 #define INIT_DATA_SIZE 158
58 #define INIT_DATA_CPP_SIZE 15
59 #define INIT_DATA_PP_NUMBER_SIZE 2
60 #define INIT_DATA_CPP0X_SIZE 15
62 ///////////////////////////////////////////////////////////////////////////////
64 // encapsulation of the boost::spirit::classic::slex based cpp lexer
66 ///////////////////////////////////////////////////////////////////////////////
68 ///////////////////////////////////////////////////////////////////////////////
69 // The following lexer_base class was necessary to workaround a CodeWarrior
70 // bug (at least up to CW V9.5).
71 template <typename IteratorT, typename PositionT>
73 : public boost::spirit::classic::lexer<
74 boost::wave::util::position_iterator<IteratorT, PositionT> >
77 typedef boost::wave::util::position_iterator<IteratorT, PositionT>
79 typedef typename std::iterator_traits<IteratorT>::value_type char_type;
80 typedef boost::spirit::classic::lexer<iterator_type> base_type;
84 // initialization data (regular expressions for the token definitions)
86 token_id tokenid; // token data
87 char_type const *tokenregex; // associated token to match
88 typename base_type::callback_t tokencb; // associated callback function
89 unsigned int lexerstate; // valid for lexer state
93 ///////////////////////////////////////////////////////////////////////////////
94 template <typename IteratorT, typename PositionT>
96 : public lexer_base<IteratorT, PositionT>
99 typedef boost::wave::cpplexer::slex_token<PositionT> token_type;
101 void init_dfa(boost::wave::language_support language);
103 // get time of last compilation
104 static std::time_t get_compilation_time()
105 { return compilation_time.get_time(); }
107 // helper for calculation of the time of last compilation
108 static boost::wave::util::time_conversion_helper compilation_time;
111 typedef lexer_base<IteratorT, PositionT> base_type;
113 static typename base_type::lexer_data const init_data[INIT_DATA_SIZE]; // common patterns
114 static typename base_type::lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns
115 static typename base_type::lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns
116 static typename base_type::lexer_data const init_data_cpp0x[INIT_DATA_CPP0X_SIZE]; // C++0X only patterns
119 ///////////////////////////////////////////////////////////////////////////////
120 // data required for initialization of the lexer (token definitions)
123 #define TRI(c) Q("?") Q("?") c
125 // definition of some sub-token regexps to simplify the regex definitions
126 #define BLANK "[ \\t]"
128 Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
130 #define PPSPACE "(" BLANK OR CCOMMENT ")*"
132 #define OCTALDIGIT "[0-7]"
133 #define DIGIT "[0-9]"
134 #define HEXDIGIT "[0-9a-fA-F]"
135 #define OPTSIGN "[-+]?"
136 #define EXPSTART "[eE]" "[-+]"
137 #define EXPONENT "(" "[eE]" OPTSIGN "[0-9]+" ")"
138 #define NONDIGIT "[a-zA-Z_]"
141 "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")"
143 #define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
144 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
145 #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
146 "(" "[lL][lL]" ")" "[uU]" "?" OR \
150 #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
151 "(" "[lL][lL]" ")" "[uU]" "?" ")"
153 #define FLOAT_SUFFIX "(" "[fF][lL]?" OR "[lL][fF]?" ")"
154 #define CHAR_SPEC "L?"
155 #define EXTCHAR_SPEC "(" "[uU]" OR "u8" ")"
157 #define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
158 #define ESCAPESEQ "(" BACKSLASH "(" \
161 "x" HEXDIGIT "+" OR \
162 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
164 #define HEXQUAD "(" HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT ")"
165 #define UNIVERSALCHAR "(" BACKSLASH "(" \
167 "U" HEXQUAD HEXQUAD \
170 #define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
171 #define NEWLINEDEF "(" "\n" OR "\r" OR "\r\n" ")"
173 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
174 #define INCLUDEDEF "(include|include_next)"
176 #define INCLUDEDEF "include"
179 #define PP_NUMBERDEF Q(".") "?" DIGIT "(" DIGIT OR NONDIGIT OR EXPSTART OR Q(".") ")*"
181 ///////////////////////////////////////////////////////////////////////////////
182 // lexer state constants
183 #define LEXER_STATE_NORMAL 0
184 #define LEXER_STATE_PP 1
186 #define NUM_LEXER_STATES 1
188 // helper for initializing token data
189 #define TOKEN_DATA(id, regex) \
190 { T_##id, regex, 0, LEXER_STATE_NORMAL } \
193 #define TOKEN_DATA_EX(id, regex, callback) \
194 { T_##id, regex, callback, LEXER_STATE_NORMAL } \
197 ///////////////////////////////////////////////////////////////////////////////
198 // common C++/C99 token definitions
199 template <typename IteratorT, typename PositionT>
200 typename lexer_base<IteratorT, PositionT>::lexer_data const
201 lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] =
203 TOKEN_DATA(AND, "&"),
204 TOKEN_DATA(ANDAND, "&&"),
205 TOKEN_DATA(ASSIGN, "="),
206 TOKEN_DATA(ANDASSIGN, "&="),
207 TOKEN_DATA(OR, Q("|")),
208 TOKEN_DATA(OR_TRIGRAPH, TRI("!")),
209 TOKEN_DATA(ORASSIGN, Q("|=")),
210 TOKEN_DATA(ORASSIGN_TRIGRAPH, TRI("!=")),
211 TOKEN_DATA(XOR, Q("^")),
212 TOKEN_DATA(XOR_TRIGRAPH, TRI("'")),
213 TOKEN_DATA(XORASSIGN, Q("^=")),
214 TOKEN_DATA(XORASSIGN_TRIGRAPH, TRI("'=")),
215 TOKEN_DATA(COMMA, ","),
216 TOKEN_DATA(COLON, ":"),
217 TOKEN_DATA(DIVIDEASSIGN, Q("/=")),
218 TOKEN_DATA(DIVIDE, Q("/")),
219 TOKEN_DATA(DOT, Q(".")),
220 TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),
221 TOKEN_DATA(EQUAL, "=="),
222 TOKEN_DATA(GREATER, ">"),
223 TOKEN_DATA(GREATEREQUAL, ">="),
224 TOKEN_DATA(LEFTBRACE, Q("{")),
225 TOKEN_DATA(LEFTBRACE_ALT, "<" Q("%")),
226 TOKEN_DATA(LEFTBRACE_TRIGRAPH, TRI("<")),
227 TOKEN_DATA(LESS, "<"),
228 TOKEN_DATA(LESSEQUAL, "<="),
229 TOKEN_DATA(LEFTPAREN, Q("(")),
230 TOKEN_DATA(LEFTBRACKET, Q("[")),
231 TOKEN_DATA(LEFTBRACKET_ALT, "<:"),
232 TOKEN_DATA(LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
233 TOKEN_DATA(MINUS, Q("-")),
234 TOKEN_DATA(MINUSASSIGN, Q("-=")),
235 TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),
236 TOKEN_DATA(PERCENT, Q("%")),
237 TOKEN_DATA(PERCENTASSIGN, Q("%=")),
238 TOKEN_DATA(NOT, "!"),
239 TOKEN_DATA(NOTEQUAL, "!="),
240 TOKEN_DATA(OROR, Q("|") Q("|")),
241 TOKEN_DATA(OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
242 TOKEN_DATA(PLUS, Q("+")),
243 TOKEN_DATA(PLUSASSIGN, Q("+=")),
244 TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),
245 TOKEN_DATA(ARROW, Q("->")),
246 TOKEN_DATA(QUESTION_MARK, Q("?")),
247 TOKEN_DATA(RIGHTBRACE, Q("}")),
248 TOKEN_DATA(RIGHTBRACE_ALT, Q("%>")),
249 TOKEN_DATA(RIGHTBRACE_TRIGRAPH, TRI(">")),
250 TOKEN_DATA(RIGHTPAREN, Q(")")),
251 TOKEN_DATA(RIGHTBRACKET, Q("]")),
252 TOKEN_DATA(RIGHTBRACKET_ALT, ":>"),
253 TOKEN_DATA(RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
254 TOKEN_DATA(SEMICOLON, ";"),
255 TOKEN_DATA(SHIFTLEFT, "<<"),
256 TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),
257 TOKEN_DATA(SHIFTRIGHT, ">>"),
258 TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),
259 TOKEN_DATA(STAR, Q("*")),
260 TOKEN_DATA(COMPL, Q("~")),
261 TOKEN_DATA(COMPL_TRIGRAPH, TRI("-")),
262 TOKEN_DATA(STARASSIGN, Q("*=")),
263 TOKEN_DATA(ASM, "asm"),
264 TOKEN_DATA(AUTO, "auto"),
265 TOKEN_DATA(BOOL, "bool"),
266 TOKEN_DATA(FALSE, "false"),
267 TOKEN_DATA(TRUE, "true"),
268 TOKEN_DATA(BREAK, "break"),
269 TOKEN_DATA(CASE, "case"),
270 TOKEN_DATA(CATCH, "catch"),
271 TOKEN_DATA(CHAR, "char"),
272 TOKEN_DATA(CLASS, "class"),
273 TOKEN_DATA(CONST, "const"),
274 TOKEN_DATA(CONSTCAST, "const_cast"),
275 TOKEN_DATA(CONTINUE, "continue"),
276 TOKEN_DATA(DEFAULT, "default"),
277 TOKEN_DATA(DELETE, "delete"),
278 TOKEN_DATA(DO, "do"),
279 TOKEN_DATA(DOUBLE, "double"),
280 TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),
281 TOKEN_DATA(ELSE, "else"),
282 TOKEN_DATA(ENUM, "enum"),
283 TOKEN_DATA(EXPLICIT, "explicit"),
284 TOKEN_DATA(EXPORT, "export"),
285 TOKEN_DATA(EXTERN, "extern"),
286 TOKEN_DATA(FLOAT, "float"),
287 TOKEN_DATA(FOR, "for"),
288 TOKEN_DATA(FRIEND, "friend"),
289 TOKEN_DATA(GOTO, "goto"),
290 TOKEN_DATA(IF, "if"),
291 TOKEN_DATA(INLINE, "inline"),
292 TOKEN_DATA(INT, "int"),
293 TOKEN_DATA(LONG, "long"),
294 TOKEN_DATA(MUTABLE, "mutable"),
295 TOKEN_DATA(NAMESPACE, "namespace"),
296 TOKEN_DATA(NEW, "new"),
297 TOKEN_DATA(OPERATOR, "operator"),
298 TOKEN_DATA(PRIVATE, "private"),
299 TOKEN_DATA(PROTECTED, "protected"),
300 TOKEN_DATA(PUBLIC, "public"),
301 TOKEN_DATA(REGISTER, "register"),
302 TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),
303 TOKEN_DATA(RETURN, "return"),
304 TOKEN_DATA(SHORT, "short"),
305 TOKEN_DATA(SIGNED, "signed"),
306 TOKEN_DATA(SIZEOF, "sizeof"),
307 TOKEN_DATA(STATIC, "static"),
308 TOKEN_DATA(STATICCAST, "static_cast"),
309 TOKEN_DATA(STRUCT, "struct"),
310 TOKEN_DATA(SWITCH, "switch"),
311 TOKEN_DATA(TEMPLATE, "template"),
312 TOKEN_DATA(THIS, "this"),
313 TOKEN_DATA(THROW, "throw"),
314 TOKEN_DATA(TRY, "try"),
315 TOKEN_DATA(TYPEDEF, "typedef"),
316 TOKEN_DATA(TYPEID, "typeid"),
317 TOKEN_DATA(TYPENAME, "typename"),
318 TOKEN_DATA(UNION, "union"),
319 TOKEN_DATA(UNSIGNED, "unsigned"),
320 TOKEN_DATA(USING, "using"),
321 TOKEN_DATA(VIRTUAL, "virtual"),
322 TOKEN_DATA(VOID, "void"),
323 TOKEN_DATA(VOLATILE, "volatile"),
324 TOKEN_DATA(WCHART, "wchar_t"),
325 TOKEN_DATA(WHILE, "while"),
326 TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),
327 TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),
328 TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
329 TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
330 TOKEN_DATA(PP_ELSE, POUNDDEF PPSPACE "else"),
331 TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),
332 TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),
333 TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),
334 TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE \
335 INCLUDEDEF PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),
336 TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE \
337 INCLUDEDEF PPSPACE "<" "[^\\n\\r>]+" ">"),
338 TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE \
340 TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),
341 TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
342 TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),
343 TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),
344 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
345 TOKEN_DATA(MSEXT_INT8, "__int8"),
346 TOKEN_DATA(MSEXT_INT16, "__int16"),
347 TOKEN_DATA(MSEXT_INT32, "__int32"),
348 TOKEN_DATA(MSEXT_INT64, "__int64"),
349 TOKEN_DATA(MSEXT_BASED, "_?" "_based"),
350 TOKEN_DATA(MSEXT_DECLSPEC, "_?" "_declspec"),
351 TOKEN_DATA(MSEXT_CDECL, "_?" "_cdecl"),
352 TOKEN_DATA(MSEXT_FASTCALL, "_?" "_fastcall"),
353 TOKEN_DATA(MSEXT_STDCALL, "_?" "_stdcall"),
354 TOKEN_DATA(MSEXT_TRY , "__try"),
355 TOKEN_DATA(MSEXT_EXCEPT, "__except"),
356 TOKEN_DATA(MSEXT_FINALLY, "__finally"),
357 TOKEN_DATA(MSEXT_LEAVE, "__leave"),
358 TOKEN_DATA(MSEXT_INLINE, "_?" "_inline"),
359 TOKEN_DATA(MSEXT_ASM, "_?" "_asm"),
360 TOKEN_DATA(MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
361 TOKEN_DATA(MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
362 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
363 // TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),
364 // TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),
365 // TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
366 TOKEN_DATA(LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
367 TOKEN_DATA(INTLIT, INTEGER INTEGER_SUFFIX "?"),
369 "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
370 EXPONENT "?" FLOAT_SUFFIX "?" OR
371 DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
372 TOKEN_DATA(CCOMMENT, CCOMMENT),
373 TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
374 TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
375 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
376 TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
377 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
378 #if BOOST_WAVE_USE_STRICT_LEXER != 0
379 TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
381 TOKEN_DATA(IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
383 TOKEN_DATA(SPACE, "[ \t\v\f]+"),
384 // TOKEN_DATA(SPACE2, "[\\v\\f]+"),
385 TOKEN_DATA(CONTLINE, Q("\\") "\n"),
386 TOKEN_DATA(NEWLINE, NEWLINEDEF),
387 TOKEN_DATA(POUND_POUND, "##"),
388 TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
389 TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
390 TOKEN_DATA(POUND, "#"),
391 TOKEN_DATA(POUND_ALT, Q("%:")),
392 TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),
393 TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))),
394 TOKEN_DATA(ANY, "."), // this should be the last recognized token
395 { token_id(0) } // this should be the last entry
398 ///////////////////////////////////////////////////////////////////////////////
399 // C++ only token definitions
400 template <typename IteratorT, typename PositionT>
401 typename lexer_base<IteratorT, PositionT>::lexer_data const
402 lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] =
404 TOKEN_DATA(AND_ALT, "bitand"),
405 TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
406 TOKEN_DATA(ANDAND_ALT, "and"),
407 TOKEN_DATA(OR_ALT, "bitor"),
408 TOKEN_DATA(ORASSIGN_ALT, "or_eq"),
409 TOKEN_DATA(OROR_ALT, "or"),
410 TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),
411 TOKEN_DATA(XOR_ALT, "xor"),
412 TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),
413 TOKEN_DATA(NOT_ALT, "not"),
414 TOKEN_DATA(COMPL_ALT, "compl"),
415 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
416 TOKEN_DATA(IMPORT, "import"),
418 TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
419 TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
420 TOKEN_DATA(COLON_COLON, "::"),
421 { token_id(0) } // this should be the last entry
424 ///////////////////////////////////////////////////////////////////////////////
425 // C++ only token definitions
426 template <typename IteratorT, typename PositionT>
427 typename lexer_base<IteratorT, PositionT>::lexer_data const
428 lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
430 TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF),
431 { token_id(0) } // this should be the last entry
434 ///////////////////////////////////////////////////////////////////////////////
435 // C++ only token definitions
437 #define T_EXTCHARLIT token_id(T_CHARLIT|AltTokenType)
438 #define T_EXTSTRINGLIT token_id(T_STRINGLIT|AltTokenType)
439 #define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
441 template <typename IteratorT, typename PositionT>
442 typename lexer_base<IteratorT, PositionT>::lexer_data const
443 lexer<IteratorT, PositionT>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
445 TOKEN_DATA(EXTCHARLIT, EXTCHAR_SPEC "'"
446 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
447 TOKEN_DATA(EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
448 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
449 TOKEN_DATA(RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
450 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
451 TOKEN_DATA(EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
452 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
453 TOKEN_DATA(ALIGNAS, "alignas"),
454 TOKEN_DATA(ALIGNOF, "alignof"),
455 TOKEN_DATA(CHAR16_T, "char16_t"),
456 TOKEN_DATA(CHAR32_T, "char32_t"),
457 TOKEN_DATA(CONSTEXPR, "constexpr"),
458 TOKEN_DATA(DECLTYPE, "decltype"),
459 TOKEN_DATA(NOEXCEPT, "noexcept"),
460 TOKEN_DATA(NULLPTR, "nullptr"),
461 TOKEN_DATA(STATICASSERT, "static_assert"),
462 TOKEN_DATA(THREADLOCAL, "threadlocal"),
463 { token_id(0) } // this should be the last entry
466 ///////////////////////////////////////////////////////////////////////////////
467 // undefine macros, required for regular expression definitions
479 #undef LONGINTEGER_SUFFIX
480 #undef INTEGER_SUFFIX
497 ///////////////////////////////////////////////////////////////////////////////
498 // initialize cpp lexer with token data
499 template <typename IteratorT, typename PositionT>
501 lexer_base<IteratorT, PositionT>::lexer_base()
502 : base_type(NUM_LEXER_STATES)
506 template <typename IteratorT, typename PositionT>
508 lexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang)
510 if (this->has_compiled_dfa())
513 // if pp-numbers should be preferred, insert the corresponding rule first
514 if (boost::wave::need_prefer_pp_numbers(lang)) {
515 for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
516 this->register_regex(init_data_pp_number[j].tokenregex,
517 init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,
518 init_data_pp_number[j].lexerstate);
522 // if in C99 mode, some of the keywords are not valid
523 if (!boost::wave::need_c99(lang)) {
524 for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
525 this->register_regex(init_data_cpp[j].tokenregex,
526 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,
527 init_data_cpp[j].lexerstate);
531 // if in C++0x mode, add all new keywords
532 #if BOOST_WAVE_SUPPORT_CPP0X != 0
533 if (boost::wave::need_cpp0x(lang)) {
534 for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
535 this->register_regex(init_data_cpp0x[j].tokenregex,
536 init_data_cpp0x[j].tokenid, init_data_cpp0x[j].tokencb,
537 init_data_cpp0x[j].lexerstate);
542 for (int i = 0; 0 != init_data[i].tokenid; ++i) {
543 this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
544 init_data[i].tokencb, init_data[i].lexerstate);
548 ///////////////////////////////////////////////////////////////////////////////
549 // get time of last compilation of this file
550 template <typename IteratorT, typename PositionT>
551 boost::wave::util::time_conversion_helper
552 lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
554 ///////////////////////////////////////////////////////////////////////////////
557 ///////////////////////////////////////////////////////////////////////////////
559 template <typename IteratorT, typename PositionT>
561 init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,
562 boost::wave::language_support language, bool force_reinit = false)
564 if (lexer.has_compiled_dfa())
565 return; // nothing to do
573 ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);
575 lexer.init_dfa(language);
576 if (force_reinit || !dfa_in.is_open() ||
577 !lexer.load (dfa_in, (long)lexer.get_compilation_time()))
579 #if defined(BOOST_SPIRIT_DEBUG)
580 cerr << "Compiling regular expressions for slex ...";
581 #endif // defined(BOOST_SPIRIT_DEBUG)
586 ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);
588 if (dfa_out.is_open())
589 lexer.save (dfa_out, (long)lexer.get_compilation_time());
591 #if defined(BOOST_SPIRIT_DEBUG)
592 cerr << " Done." << endl;
593 #endif // defined(BOOST_SPIRIT_DEBUG)
597 ///////////////////////////////////////////////////////////////////////////////
601 ///////////////////////////////////////////////////////////////////////////////
603 template <typename IteratorT, typename PositionT = wave::util::file_position_type>
605 : public slex_input_interface<
606 typename lexer::lexer<IteratorT, PositionT>::token_type
611 typedef boost::wave::util::position_iterator<IteratorT, PositionT>
613 typedef typename std::iterator_traits<IteratorT>::value_type char_type;
614 typedef BOOST_WAVE_STRINGTYPE string_type;
615 typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;
617 slex_functor(IteratorT const &first_, IteratorT const &last_,
618 PositionT const &pos_, boost::wave::language_support language_)
619 : first(first_, last_, pos_), language(language_), at_eof(false)
621 // initialize lexer dfa tables
622 init_lexer(lexer, language_);
624 virtual ~slex_functor() {}
626 // get the next token from the input stream
627 token_type& get(token_type& result)
631 // generate and return the next token
633 PositionT pos = first.get_position(); // begin of token position
634 token_id id = token_id(lexer.next_token(first, last, &value));
636 if ((token_id)(-1) == id)
637 id = T_EOF; // end of input reached
639 string_type token_val(value.c_str());
641 if (boost::wave::need_emit_contnewlines(language) ||
644 // The cast should avoid spurious warnings about missing case labels
645 // for the other token ids's.
648 // test identifier characters for validity (throws if
649 // invalid chars found)
650 if (!boost::wave::need_no_character_validation(language)) {
651 using boost::wave::cpplexer::impl::validate_identifier_name;
652 validate_identifier_name(token_val,
653 pos.get_line(), pos.get_column(), pos.get_file());
659 case T_EXTRAWSTRINGLIT:
660 id = token_id(id & ~AltTokenType);
666 // test literal characters for validity (throws if invalid
668 if (boost::wave::need_convert_trigraphs(language)) {
669 using boost::wave::cpplexer::impl::convert_trigraphs;
670 token_val = convert_trigraphs(token_val);
672 if (!boost::wave::need_no_character_validation(language)) {
673 using boost::wave::cpplexer::impl::validate_literal;
674 validate_literal(token_val,
675 pos.get_line(), pos.get_column(), pos.get_file());
679 case T_LONGINTLIT: // supported in C99 and long_long mode
680 if (!boost::wave::need_long_long(language)) {
681 // syntax error: not allowed in C++ mode
682 BOOST_WAVE_LEXER_THROW(
683 boost::wave::cpplexer::lexing_exception,
684 invalid_long_long_literal, value.c_str(),
685 pos.get_line(), pos.get_column(),
686 pos.get_file().c_str());
690 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
694 // convert to the corresponding ..._next token, if appropriate
696 // Skip '#' and whitespace and see whether we find an
697 // 'include_next' here.
698 typename string_type::size_type start = value.find("include");
699 if (0 == value.compare(start, 12, "include_next", 12))
700 id = token_id(id | AltTokenType);
703 #endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
706 // T_EOF is returned as a valid token, the next call will
707 // return T_EOI, i.e. the actual end of input
714 case T_LEFTBRACE_TRIGRAPH:
715 case T_RIGHTBRACE_TRIGRAPH:
716 case T_LEFTBRACKET_TRIGRAPH:
717 case T_RIGHTBRACKET_TRIGRAPH:
718 case T_COMPL_TRIGRAPH:
719 case T_POUND_TRIGRAPH:
721 if (boost::wave::need_convert_trigraphs(language))
723 using boost::wave::cpplexer::impl::convert_trigraph;
724 token_val = convert_trigraph(token_val);
729 result = token_type(id, token_val, pos);
730 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
731 return guards.detect_guard(result);
737 // skip the T_CONTLINE token
740 return result = token_type(); // return T_EOI
743 void set_position(PositionT const &pos)
745 // set position has to change the file name and line number only
746 first.get_position().set_file(pos.get_file());
747 first.get_position().set_line(pos.get_line());
750 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
751 bool has_include_guards(std::string& guard_name) const
752 { return guards.detected(guard_name); }
758 boost::wave::language_support language;
759 static lexer::lexer<IteratorT, PositionT> lexer; // needed only once
763 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
764 include_guards<token_type> guards;
768 template <typename IteratorT, typename PositionT>
769 lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;
772 #undef T_EXTSTRINGLIT
773 #undef T_EXTRAWSTRINGLIT
775 ///////////////////////////////////////////////////////////////////////////////
777 // The 'new_lexer' function allows the opaque generation of a new lexer object.
778 // It is coupled to the iterator type to allow to decouple the lexer/iterator
779 // configurations at compile time.
781 // This function is declared inside the cpp_slex_token.hpp file, which is
782 // referenced by the source file calling the lexer and the source file, which
783 // instantiates the lex_functor. But it is defined here, so it will be
784 // instantiated only while compiling the source file, which instantiates the
785 // lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
786 // this file (cpp_slex_lexer.hpp) should be included only once. This allows
787 // to decouple the lexer interface from the lexer implementation and reduces
790 ///////////////////////////////////////////////////////////////////////////////
792 ///////////////////////////////////////////////////////////////////////////////
794 // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
795 // should be defined inline, if the lex_functor shouldn't be instantiated
796 // separately from the lex_iterator.
798 // Separate (explicit) instantiation helps to reduce compilation time.
800 ///////////////////////////////////////////////////////////////////////////////
802 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
803 #define BOOST_WAVE_SLEX_NEW_LEXER_INLINE
805 #define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline
808 template <typename IteratorT, typename PositionT>
809 BOOST_WAVE_SLEX_NEW_LEXER_INLINE
810 lex_input_interface<slex_token<PositionT> > *
811 new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
812 IteratorT const &last, PositionT const &pos,
813 boost::wave::language_support language)
815 return new slex_functor<IteratorT, PositionT>(first, last, pos,
819 #undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE
821 ///////////////////////////////////////////////////////////////////////////////
823 } // namespace cpplexer
827 #endif // !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)