]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/wave/samples/cpp_tokens/slex/cpp_slex_lexer.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / libs / wave / samples / cpp_tokens / slex / cpp_slex_lexer.hpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 SLex (Spirit Lex) based C++ lexer
b32b8144 5
7c673cae
FG
6 http://www.boost.org/
7
b32b8144
FG
8 Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
7c673cae
FG
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11=============================================================================*/
12
20effc67
TL
13#if !defined(BOOST_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
14#define BOOST_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED
7c673cae
FG
15
16#include <string>
17#if defined(BOOST_SPIRIT_DEBUG)
18#include <iostream>
19#endif // defined(BOOST_SPIRIT_DEBUG)
20
21#include <boost/assert.hpp>
22#include <boost/spirit/include/classic_core.hpp>
23
24#include <boost/wave/wave_config.hpp>
25#include <boost/wave/language_support.hpp>
26#include <boost/wave/token_ids.hpp>
27#include <boost/wave/util/file_position.hpp>
28#include <boost/wave/util/time_conversion_helper.hpp>
29#include <boost/wave/cpplexer/validate_universal_char.hpp>
30#include <boost/wave/cpplexer/convert_trigraphs.hpp>
31#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
32#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
33#include <boost/wave/cpplexer/detect_include_guards.hpp>
34#endif
35#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
36
37#include "../slex_interface.hpp"
38#include "../slex_token.hpp"
39#include "../slex_iterator.hpp"
40
41#include "lexer.hpp" // "spirit/lexer.hpp"
42
43///////////////////////////////////////////////////////////////////////////////
44namespace boost {
45namespace wave {
46namespace cpplexer {
47namespace slex {
48namespace lexer {
49
50///////////////////////////////////////////////////////////////////////////////
51// The following numbers are the array sizes of the token regex's which we
52// need to specify to make the CW compiler happy (at least up to V9.5).
53#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
54#define INIT_DATA_SIZE 175
55#else
56#define INIT_DATA_SIZE 158
57#endif
58#define INIT_DATA_CPP_SIZE 15
59#define INIT_DATA_PP_NUMBER_SIZE 2
60#define INIT_DATA_CPP0X_SIZE 15
20effc67 61#define INIT_DATA_CPP2A_SIZE 10
7c673cae
FG
62
63///////////////////////////////////////////////////////////////////////////////
b32b8144 64//
7c673cae
FG
65// encapsulation of the boost::spirit::classic::slex based cpp lexer
66//
67///////////////////////////////////////////////////////////////////////////////
68
69///////////////////////////////////////////////////////////////////////////////
b32b8144 70// The following lexer_base class was necessary to workaround a CodeWarrior
7c673cae
FG
71// bug (at least up to CW V9.5).
72template <typename IteratorT, typename PositionT>
b32b8144 73class lexer_base
7c673cae
FG
74: public boost::spirit::classic::lexer<
75 boost::wave::util::position_iterator<IteratorT, PositionT> >
76{
77protected:
b32b8144 78 typedef boost::wave::util::position_iterator<IteratorT, PositionT>
7c673cae
FG
79 iterator_type;
80 typedef typename std::iterator_traits<IteratorT>::value_type char_type;
81 typedef boost::spirit::classic::lexer<iterator_type> base_type;
82
83 lexer_base();
84
85// initialization data (regular expressions for the token definitions)
86 struct lexer_data {
87 token_id tokenid; // token data
88 char_type const *tokenregex; // associated token to match
89 typename base_type::callback_t tokencb; // associated callback function
90 unsigned int lexerstate; // valid for lexer state
91 };
92};
93
94///////////////////////////////////////////////////////////////////////////////
95template <typename IteratorT, typename PositionT>
b32b8144 96class lexer
7c673cae
FG
97: public lexer_base<IteratorT, PositionT>
98{
99public:
100 typedef boost::wave::cpplexer::slex_token<PositionT> token_type;
b32b8144 101
7c673cae
FG
102 void init_dfa(boost::wave::language_support language);
103
104// get time of last compilation
b32b8144 105 static std::time_t get_compilation_time()
7c673cae
FG
106 { return compilation_time.get_time(); }
107
108// helper for calculation of the time of last compilation
109 static boost::wave::util::time_conversion_helper compilation_time;
110
111private:
112 typedef lexer_base<IteratorT, PositionT> base_type;
113
114 static typename base_type::lexer_data const init_data[INIT_DATA_SIZE]; // common patterns
115 static typename base_type::lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns
116 static typename base_type::lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns
117 static typename base_type::lexer_data const init_data_cpp0x[INIT_DATA_CPP0X_SIZE]; // C++0X only patterns
20effc67 118 static typename base_type::lexer_data const init_data_cpp2a[INIT_DATA_CPP2A_SIZE]; // C++2A only patterns
7c673cae
FG
119};
120
121///////////////////////////////////////////////////////////////////////////////
122// data required for initialization of the lexer (token definitions)
123#define OR "|"
124#define Q(c) "\\" c
125#define TRI(c) Q("?") Q("?") c
126
127// definition of some sub-token regexps to simplify the regex definitions
128#define BLANK "[ \\t]"
129#define CCOMMENT \
130 Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
b32b8144 131
7c673cae
FG
132#define PPSPACE "(" BLANK OR CCOMMENT ")*"
133
134#define OCTALDIGIT "[0-7]"
135#define DIGIT "[0-9]"
136#define HEXDIGIT "[0-9a-fA-F]"
137#define OPTSIGN "[-+]?"
138#define EXPSTART "[eE]" "[-+]"
139#define EXPONENT "(" "[eE]" OPTSIGN "[0-9]+" ")"
140#define NONDIGIT "[a-zA-Z_]"
141
142#define INTEGER \
143 "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")"
b32b8144 144
7c673cae
FG
145#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
146#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
20effc67
TL
147#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "ll" OR "LL" ")" OR \
148 "(" "ll" OR "LL" ")" "[uU]" "?" OR \
7c673cae 149 "i64" \
b32b8144 150 ")"
7c673cae 151#else
20effc67
TL
152#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "ll" OR "LL" ")" OR \
153 "(" "ll" OR "LL" ")" "[uU]" "?" ")"
7c673cae
FG
154#endif
155#define FLOAT_SUFFIX "(" "[fF][lL]?" OR "[lL][fF]?" ")"
156#define CHAR_SPEC "L?"
157#define EXTCHAR_SPEC "(" "[uU]" OR "u8" ")"
158
159#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
160#define ESCAPESEQ "(" BACKSLASH "(" \
161 "[abfnrtv?'\"]" OR \
162 BACKSLASH OR \
163 "x" HEXDIGIT "+" OR \
164 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
165 "))"
166#define HEXQUAD "(" HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT ")"
167#define UNIVERSALCHAR "(" BACKSLASH "(" \
168 "u" HEXQUAD OR \
169 "U" HEXQUAD HEXQUAD \
b32b8144 170 "))"
7c673cae
FG
171
172#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
173#define NEWLINEDEF "(" "\n" OR "\r" OR "\r\n" ")"
174
175#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
176#define INCLUDEDEF "(include|include_next)"
177#else
178#define INCLUDEDEF "include"
179#endif
180
181#define PP_NUMBERDEF Q(".") "?" DIGIT "(" DIGIT OR NONDIGIT OR EXPSTART OR Q(".") ")*"
182
183///////////////////////////////////////////////////////////////////////////////
184// lexer state constants
185#define LEXER_STATE_NORMAL 0
186#define LEXER_STATE_PP 1
187
188#define NUM_LEXER_STATES 1
189
190// helper for initializing token data
191#define TOKEN_DATA(id, regex) \
192 { T_##id, regex, 0, LEXER_STATE_NORMAL } \
193 /**/
194
195#define TOKEN_DATA_EX(id, regex, callback) \
196 { T_##id, regex, callback, LEXER_STATE_NORMAL } \
197 /**/
198
199///////////////////////////////////////////////////////////////////////////////
200// common C++/C99 token definitions
201template <typename IteratorT, typename PositionT>
b32b8144
FG
202typename lexer_base<IteratorT, PositionT>::lexer_data const
203lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] =
7c673cae
FG
204{
205 TOKEN_DATA(AND, "&"),
206 TOKEN_DATA(ANDAND, "&&"),
207 TOKEN_DATA(ASSIGN, "="),
208 TOKEN_DATA(ANDASSIGN, "&="),
209 TOKEN_DATA(OR, Q("|")),
210 TOKEN_DATA(OR_TRIGRAPH, TRI("!")),
211 TOKEN_DATA(ORASSIGN, Q("|=")),
212 TOKEN_DATA(ORASSIGN_TRIGRAPH, TRI("!=")),
213 TOKEN_DATA(XOR, Q("^")),
214 TOKEN_DATA(XOR_TRIGRAPH, TRI("'")),
215 TOKEN_DATA(XORASSIGN, Q("^=")),
216 TOKEN_DATA(XORASSIGN_TRIGRAPH, TRI("'=")),
217 TOKEN_DATA(COMMA, ","),
218 TOKEN_DATA(COLON, ":"),
219 TOKEN_DATA(DIVIDEASSIGN, Q("/=")),
220 TOKEN_DATA(DIVIDE, Q("/")),
221 TOKEN_DATA(DOT, Q(".")),
222 TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),
223 TOKEN_DATA(EQUAL, "=="),
224 TOKEN_DATA(GREATER, ">"),
225 TOKEN_DATA(GREATEREQUAL, ">="),
226 TOKEN_DATA(LEFTBRACE, Q("{")),
227 TOKEN_DATA(LEFTBRACE_ALT, "<" Q("%")),
228 TOKEN_DATA(LEFTBRACE_TRIGRAPH, TRI("<")),
229 TOKEN_DATA(LESS, "<"),
230 TOKEN_DATA(LESSEQUAL, "<="),
231 TOKEN_DATA(LEFTPAREN, Q("(")),
232 TOKEN_DATA(LEFTBRACKET, Q("[")),
233 TOKEN_DATA(LEFTBRACKET_ALT, "<:"),
234 TOKEN_DATA(LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
235 TOKEN_DATA(MINUS, Q("-")),
236 TOKEN_DATA(MINUSASSIGN, Q("-=")),
237 TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),
238 TOKEN_DATA(PERCENT, Q("%")),
239 TOKEN_DATA(PERCENTASSIGN, Q("%=")),
240 TOKEN_DATA(NOT, "!"),
241 TOKEN_DATA(NOTEQUAL, "!="),
242 TOKEN_DATA(OROR, Q("|") Q("|")),
243 TOKEN_DATA(OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
244 TOKEN_DATA(PLUS, Q("+")),
245 TOKEN_DATA(PLUSASSIGN, Q("+=")),
246 TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),
247 TOKEN_DATA(ARROW, Q("->")),
248 TOKEN_DATA(QUESTION_MARK, Q("?")),
249 TOKEN_DATA(RIGHTBRACE, Q("}")),
250 TOKEN_DATA(RIGHTBRACE_ALT, Q("%>")),
251 TOKEN_DATA(RIGHTBRACE_TRIGRAPH, TRI(">")),
252 TOKEN_DATA(RIGHTPAREN, Q(")")),
253 TOKEN_DATA(RIGHTBRACKET, Q("]")),
254 TOKEN_DATA(RIGHTBRACKET_ALT, ":>"),
255 TOKEN_DATA(RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
256 TOKEN_DATA(SEMICOLON, ";"),
257 TOKEN_DATA(SHIFTLEFT, "<<"),
258 TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),
259 TOKEN_DATA(SHIFTRIGHT, ">>"),
260 TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),
261 TOKEN_DATA(STAR, Q("*")),
262 TOKEN_DATA(COMPL, Q("~")),
263 TOKEN_DATA(COMPL_TRIGRAPH, TRI("-")),
264 TOKEN_DATA(STARASSIGN, Q("*=")),
265 TOKEN_DATA(ASM, "asm"),
266 TOKEN_DATA(AUTO, "auto"),
267 TOKEN_DATA(BOOL, "bool"),
268 TOKEN_DATA(FALSE, "false"),
269 TOKEN_DATA(TRUE, "true"),
270 TOKEN_DATA(BREAK, "break"),
271 TOKEN_DATA(CASE, "case"),
272 TOKEN_DATA(CATCH, "catch"),
273 TOKEN_DATA(CHAR, "char"),
274 TOKEN_DATA(CLASS, "class"),
275 TOKEN_DATA(CONST, "const"),
276 TOKEN_DATA(CONSTCAST, "const_cast"),
277 TOKEN_DATA(CONTINUE, "continue"),
278 TOKEN_DATA(DEFAULT, "default"),
279 TOKEN_DATA(DELETE, "delete"),
280 TOKEN_DATA(DO, "do"),
281 TOKEN_DATA(DOUBLE, "double"),
282 TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),
283 TOKEN_DATA(ELSE, "else"),
284 TOKEN_DATA(ENUM, "enum"),
285 TOKEN_DATA(EXPLICIT, "explicit"),
286 TOKEN_DATA(EXPORT, "export"),
287 TOKEN_DATA(EXTERN, "extern"),
288 TOKEN_DATA(FLOAT, "float"),
289 TOKEN_DATA(FOR, "for"),
290 TOKEN_DATA(FRIEND, "friend"),
291 TOKEN_DATA(GOTO, "goto"),
292 TOKEN_DATA(IF, "if"),
293 TOKEN_DATA(INLINE, "inline"),
294 TOKEN_DATA(INT, "int"),
295 TOKEN_DATA(LONG, "long"),
296 TOKEN_DATA(MUTABLE, "mutable"),
297 TOKEN_DATA(NAMESPACE, "namespace"),
298 TOKEN_DATA(NEW, "new"),
299 TOKEN_DATA(OPERATOR, "operator"),
300 TOKEN_DATA(PRIVATE, "private"),
301 TOKEN_DATA(PROTECTED, "protected"),
302 TOKEN_DATA(PUBLIC, "public"),
303 TOKEN_DATA(REGISTER, "register"),
304 TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),
305 TOKEN_DATA(RETURN, "return"),
306 TOKEN_DATA(SHORT, "short"),
307 TOKEN_DATA(SIGNED, "signed"),
308 TOKEN_DATA(SIZEOF, "sizeof"),
309 TOKEN_DATA(STATIC, "static"),
310 TOKEN_DATA(STATICCAST, "static_cast"),
311 TOKEN_DATA(STRUCT, "struct"),
312 TOKEN_DATA(SWITCH, "switch"),
313 TOKEN_DATA(TEMPLATE, "template"),
314 TOKEN_DATA(THIS, "this"),
315 TOKEN_DATA(THROW, "throw"),
316 TOKEN_DATA(TRY, "try"),
317 TOKEN_DATA(TYPEDEF, "typedef"),
318 TOKEN_DATA(TYPEID, "typeid"),
319 TOKEN_DATA(TYPENAME, "typename"),
320 TOKEN_DATA(UNION, "union"),
321 TOKEN_DATA(UNSIGNED, "unsigned"),
322 TOKEN_DATA(USING, "using"),
323 TOKEN_DATA(VIRTUAL, "virtual"),
324 TOKEN_DATA(VOID, "void"),
325 TOKEN_DATA(VOLATILE, "volatile"),
326 TOKEN_DATA(WCHART, "wchar_t"),
327 TOKEN_DATA(WHILE, "while"),
328 TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),
329 TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),
330 TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
331 TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
332 TOKEN_DATA(PP_ELSE, POUNDDEF PPSPACE "else"),
333 TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),
334 TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),
335 TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),
336 TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE \
337 INCLUDEDEF PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),
338 TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE \
339 INCLUDEDEF PPSPACE "<" "[^\\n\\r>]+" ">"),
340 TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE \
341 INCLUDEDEF PPSPACE),
342 TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),
343 TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
344 TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),
345 TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),
346#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
347 TOKEN_DATA(MSEXT_INT8, "__int8"),
348 TOKEN_DATA(MSEXT_INT16, "__int16"),
349 TOKEN_DATA(MSEXT_INT32, "__int32"),
350 TOKEN_DATA(MSEXT_INT64, "__int64"),
351 TOKEN_DATA(MSEXT_BASED, "_?" "_based"),
352 TOKEN_DATA(MSEXT_DECLSPEC, "_?" "_declspec"),
353 TOKEN_DATA(MSEXT_CDECL, "_?" "_cdecl"),
354 TOKEN_DATA(MSEXT_FASTCALL, "_?" "_fastcall"),
355 TOKEN_DATA(MSEXT_STDCALL, "_?" "_stdcall"),
356 TOKEN_DATA(MSEXT_TRY , "__try"),
357 TOKEN_DATA(MSEXT_EXCEPT, "__except"),
358 TOKEN_DATA(MSEXT_FINALLY, "__finally"),
359 TOKEN_DATA(MSEXT_LEAVE, "__leave"),
360 TOKEN_DATA(MSEXT_INLINE, "_?" "_inline"),
361 TOKEN_DATA(MSEXT_ASM, "_?" "_asm"),
362 TOKEN_DATA(MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
363 TOKEN_DATA(MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
364#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
365// TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),
366// TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),
367// TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
368 TOKEN_DATA(LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
369 TOKEN_DATA(INTLIT, INTEGER INTEGER_SUFFIX "?"),
b32b8144
FG
370 TOKEN_DATA(FLOATLIT,
371 "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
7c673cae
FG
372 EXPONENT "?" FLOAT_SUFFIX "?" OR
373 DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
374 TOKEN_DATA(CCOMMENT, CCOMMENT),
375 TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
b32b8144 376 TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
7c673cae 377 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
b32b8144 378 TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
7c673cae
FG
379 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
380#if BOOST_WAVE_USE_STRICT_LEXER != 0
381 TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
382#else
383 TOKEN_DATA(IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
384#endif
385 TOKEN_DATA(SPACE, "[ \t\v\f]+"),
386// TOKEN_DATA(SPACE2, "[\\v\\f]+"),
b32b8144 387 TOKEN_DATA(CONTLINE, Q("\\") "\n"),
7c673cae
FG
388 TOKEN_DATA(NEWLINE, NEWLINEDEF),
389 TOKEN_DATA(POUND_POUND, "##"),
390 TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
391 TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
392 TOKEN_DATA(POUND, "#"),
393 TOKEN_DATA(POUND_ALT, Q("%:")),
394 TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),
395 TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))),
396 TOKEN_DATA(ANY, "."), // this should be the last recognized token
397 { token_id(0) } // this should be the last entry
398};
399
400///////////////////////////////////////////////////////////////////////////////
401// C++ only token definitions
402template <typename IteratorT, typename PositionT>
b32b8144
FG
403typename lexer_base<IteratorT, PositionT>::lexer_data const
404lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] =
7c673cae
FG
405{
406 TOKEN_DATA(AND_ALT, "bitand"),
407 TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
408 TOKEN_DATA(ANDAND_ALT, "and"),
409 TOKEN_DATA(OR_ALT, "bitor"),
410 TOKEN_DATA(ORASSIGN_ALT, "or_eq"),
411 TOKEN_DATA(OROR_ALT, "or"),
412 TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),
413 TOKEN_DATA(XOR_ALT, "xor"),
414 TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),
415 TOKEN_DATA(NOT_ALT, "not"),
416 TOKEN_DATA(COMPL_ALT, "compl"),
417#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
418 TOKEN_DATA(IMPORT, "import"),
419#endif
420 TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
421 TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
422 TOKEN_DATA(COLON_COLON, "::"),
423 { token_id(0) } // this should be the last entry
424};
425
426///////////////////////////////////////////////////////////////////////////////
427// C++ only token definitions
428template <typename IteratorT, typename PositionT>
b32b8144
FG
429typename lexer_base<IteratorT, PositionT>::lexer_data const
430lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
7c673cae
FG
431{
432 TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF),
433 { token_id(0) } // this should be the last entry
434};
435
436///////////////////////////////////////////////////////////////////////////////
20effc67 437// C++11 only token definitions
7c673cae
FG
438
439#define T_EXTCHARLIT token_id(T_CHARLIT|AltTokenType)
440#define T_EXTSTRINGLIT token_id(T_STRINGLIT|AltTokenType)
441#define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
442
443template <typename IteratorT, typename PositionT>
b32b8144
FG
444typename lexer_base<IteratorT, PositionT>::lexer_data const
445lexer<IteratorT, PositionT>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
7c673cae 446{
b32b8144 447 TOKEN_DATA(EXTCHARLIT, EXTCHAR_SPEC "'"
7c673cae 448 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
b32b8144 449 TOKEN_DATA(EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
7c673cae 450 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
b32b8144 451 TOKEN_DATA(RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
7c673cae 452 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
b32b8144 453 TOKEN_DATA(EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
7c673cae
FG
454 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
455 TOKEN_DATA(ALIGNAS, "alignas"),
456 TOKEN_DATA(ALIGNOF, "alignof"),
457 TOKEN_DATA(CHAR16_T, "char16_t"),
458 TOKEN_DATA(CHAR32_T, "char32_t"),
459 TOKEN_DATA(CONSTEXPR, "constexpr"),
460 TOKEN_DATA(DECLTYPE, "decltype"),
461 TOKEN_DATA(NOEXCEPT, "noexcept"),
462 TOKEN_DATA(NULLPTR, "nullptr"),
463 TOKEN_DATA(STATICASSERT, "static_assert"),
20effc67 464 TOKEN_DATA(THREADLOCAL, "thread_local"),
7c673cae
FG
465 { token_id(0) } // this should be the last entry
466};
467
20effc67
TL
468///////////////////////////////////////////////////////////////////////////////
469// C++20 only token definitions
470
471template <typename IteratorT, typename PositionT>
472typename lexer_base<IteratorT, PositionT>::lexer_data const
473lexer<IteratorT, PositionT>::init_data_cpp2a[INIT_DATA_CPP2A_SIZE] =
474{
475 TOKEN_DATA(CHAR8_T, "char8_t"),
476 TOKEN_DATA(CONCEPT, "concept"),
477 TOKEN_DATA(CONSTEVAL, "consteval"),
478 TOKEN_DATA(CONSTINIT, "constinit"),
479 TOKEN_DATA(CO_AWAIT, "co_await"),
480 TOKEN_DATA(CO_RETURN, "co_return"),
481 TOKEN_DATA(CO_YIELD, "co_yield"),
482 TOKEN_DATA(REQUIRES, "requires"),
483 TOKEN_DATA(SPACESHIP, "<=>"),
484
485 { token_id(0) } // this should be the last entry
486};
487
488
7c673cae
FG
489///////////////////////////////////////////////////////////////////////////////
490// undefine macros, required for regular expression definitions
491#undef INCLUDEDEF
492#undef POUNDDEF
493#undef CCOMMENT
494#undef PPSPACE
495#undef DIGIT
496#undef OCTALDIGIT
497#undef HEXDIGIT
498#undef NONDIGIT
499#undef OPTSIGN
500#undef EXPSTART
501#undef EXPONENT
502#undef LONGINTEGER_SUFFIX
503#undef INTEGER_SUFFIX
504#undef INTEGER
505#undef FLOAT_SUFFIX
506#undef CHAR_SPEC
b32b8144
FG
507#undef BACKSLASH
508#undef ESCAPESEQ
509#undef HEXQUAD
7c673cae
FG
510#undef UNIVERSALCHAR
511#undef PP_NUMBERDEF
512
513#undef Q
514#undef TRI
515#undef OR
516
517#undef TOKEN_DATA
518#undef TOKEN_DATA_EX
519
520///////////////////////////////////////////////////////////////////////////////
521// initialize cpp lexer with token data
522template <typename IteratorT, typename PositionT>
523inline
b32b8144 524lexer_base<IteratorT, PositionT>::lexer_base()
7c673cae
FG
525: base_type(NUM_LEXER_STATES)
526{
527}
528
529template <typename IteratorT, typename PositionT>
530inline void
531lexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang)
532{
533 if (this->has_compiled_dfa())
534 return;
535
536// if pp-numbers should be preferred, insert the corresponding rule first
537 if (boost::wave::need_prefer_pp_numbers(lang)) {
538 for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
b32b8144
FG
539 this->register_regex(init_data_pp_number[j].tokenregex,
540 init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,
7c673cae
FG
541 init_data_pp_number[j].lexerstate);
542 }
543 }
b32b8144
FG
544
545// if in C99 mode, some of the keywords are not valid
7c673cae
FG
546 if (!boost::wave::need_c99(lang)) {
547 for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
b32b8144
FG
548 this->register_regex(init_data_cpp[j].tokenregex,
549 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,
7c673cae
FG
550 init_data_cpp[j].lexerstate);
551 }
552 }
b32b8144 553
20effc67 554// if in C++0x mode, add appropriate keywords
7c673cae 555#if BOOST_WAVE_SUPPORT_CPP0X != 0
20effc67 556 if (boost::wave::need_cpp0x(lang) || boost::wave::need_cpp2a(lang)) {
7c673cae 557 for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
b32b8144
FG
558 this->register_regex(init_data_cpp0x[j].tokenregex,
559 init_data_cpp0x[j].tokenid, init_data_cpp0x[j].tokencb,
7c673cae
FG
560 init_data_cpp0x[j].lexerstate);
561 }
562 }
563#endif
564
20effc67
TL
565 // if in C++2a mode, add those keywords
566#if BOOST_WAVE_SUPPORT_CPP2A != 0
567 if (wave::need_cpp2a(lang)) {
568 for (int j = 0; 0 != init_data_cpp2a[j].tokenid; ++j) {
569 this->register_regex(init_data_cpp2a[j].tokenregex,
570 init_data_cpp2a[j].tokenid,
571 init_data_cpp2a[j].tokencb,
572 init_data_cpp2a[j].lexerstate);
573 }
574 }
575#endif
576
7c673cae 577 for (int i = 0; 0 != init_data[i].tokenid; ++i) {
b32b8144 578 this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
7c673cae
FG
579 init_data[i].tokencb, init_data[i].lexerstate);
580 }
581}
582
583///////////////////////////////////////////////////////////////////////////////
584// get time of last compilation of this file
585template <typename IteratorT, typename PositionT>
b32b8144 586boost::wave::util::time_conversion_helper
7c673cae
FG
587 lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
588
589///////////////////////////////////////////////////////////////////////////////
590} // namespace lexer
591
592///////////////////////////////////////////////////////////////////////////////
b32b8144 593//
7c673cae 594template <typename IteratorT, typename PositionT>
b32b8144
FG
595inline void
596init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,
7c673cae
FG
597 boost::wave::language_support language, bool force_reinit = false)
598{
599 if (lexer.has_compiled_dfa())
600 return; // nothing to do
b32b8144 601
7c673cae
FG
602 using std::ifstream;
603 using std::ofstream;
604 using std::ios;
605 using std::cerr;
606 using std::endl;
b32b8144 607
7c673cae
FG
608ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);
609
610 lexer.init_dfa(language);
611 if (force_reinit || !dfa_in.is_open() ||
612 !lexer.load (dfa_in, (long)lexer.get_compilation_time()))
613 {
614#if defined(BOOST_SPIRIT_DEBUG)
615 cerr << "Compiling regular expressions for slex ...";
616#endif // defined(BOOST_SPIRIT_DEBUG)
617
618 dfa_in.close();
619 lexer.create_dfa();
620
621 ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);
622
623 if (dfa_out.is_open())
624 lexer.save (dfa_out, (long)lexer.get_compilation_time());
625
626#if defined(BOOST_SPIRIT_DEBUG)
627 cerr << " Done." << endl;
628#endif // defined(BOOST_SPIRIT_DEBUG)
629 }
630}
631
632///////////////////////////////////////////////////////////////////////////////
b32b8144 633//
7c673cae
FG
634// lex_functor
635//
636///////////////////////////////////////////////////////////////////////////////
637
638template <typename IteratorT, typename PositionT = wave::util::file_position_type>
b32b8144 639class slex_functor
7c673cae
FG
640: public slex_input_interface<
641 typename lexer::lexer<IteratorT, PositionT>::token_type
642 >
643{
644public:
645
646 typedef boost::wave::util::position_iterator<IteratorT, PositionT>
647 iterator_type;
648 typedef typename std::iterator_traits<IteratorT>::value_type char_type;
649 typedef BOOST_WAVE_STRINGTYPE string_type;
650 typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;
651
b32b8144 652 slex_functor(IteratorT const &first_, IteratorT const &last_,
7c673cae
FG
653 PositionT const &pos_, boost::wave::language_support language_)
654 : first(first_, last_, pos_), language(language_), at_eof(false)
655 {
656 // initialize lexer dfa tables
b32b8144 657 init_lexer(lexer, language_);
7c673cae
FG
658 }
659 virtual ~slex_functor() {}
660
661// get the next token from the input stream
20effc67 662 token_type& get(token_type& result) BOOST_OVERRIDE
7c673cae
FG
663 {
664 if (!at_eof) {
665 do {
666 // generate and return the next token
667 std::string value;
668 PositionT pos = first.get_position(); // begin of token position
669 token_id id = token_id(lexer.next_token(first, last, &value));
670
671 if ((token_id)(-1) == id)
672 id = T_EOF; // end of input reached
673
674 string_type token_val(value.c_str());
675
676 if (boost::wave::need_emit_contnewlines(language) ||
b32b8144 677 T_CONTLINE != id)
7c673cae 678 {
b32b8144 679 // The cast should avoid spurious warnings about missing case labels
7c673cae 680 // for the other token ids's.
b32b8144 681 switch (id) {
7c673cae 682 case T_IDENTIFIER:
b32b8144 683 // test identifier characters for validity (throws if
7c673cae
FG
684 // invalid chars found)
685 if (!boost::wave::need_no_character_validation(language)) {
686 using boost::wave::cpplexer::impl::validate_identifier_name;
b32b8144
FG
687 validate_identifier_name(token_val,
688 pos.get_line(), pos.get_column(), pos.get_file());
7c673cae
FG
689 }
690 break;
691
692 case T_EXTCHARLIT:
693 case T_EXTSTRINGLIT:
694 case T_EXTRAWSTRINGLIT:
695 id = token_id(id & ~AltTokenType);
696 BOOST_FALLTHROUGH;
697
698 case T_CHARLIT:
699 case T_STRINGLIT:
700 case T_RAWSTRINGLIT:
b32b8144 701 // test literal characters for validity (throws if invalid
7c673cae
FG
702 // chars found)
703 if (boost::wave::need_convert_trigraphs(language)) {
704 using boost::wave::cpplexer::impl::convert_trigraphs;
b32b8144 705 token_val = convert_trigraphs(token_val);
7c673cae
FG
706 }
707 if (!boost::wave::need_no_character_validation(language)) {
708 using boost::wave::cpplexer::impl::validate_literal;
b32b8144
FG
709 validate_literal(token_val,
710 pos.get_line(), pos.get_column(), pos.get_file());
7c673cae
FG
711 }
712 break;
713
714 case T_LONGINTLIT: // supported in C99 and long_long mode
715 if (!boost::wave::need_long_long(language)) {
716 // syntax error: not allowed in C++ mode
717 BOOST_WAVE_LEXER_THROW(
b32b8144
FG
718 boost::wave::cpplexer::lexing_exception,
719 invalid_long_long_literal, value.c_str(),
720 pos.get_line(), pos.get_column(),
7c673cae
FG
721 pos.get_file().c_str());
722 }
723 break;
724
7c673cae
FG
725 case T_PP_HHEADER:
726 case T_PP_QHEADER:
727 case T_PP_INCLUDE:
728 // convert to the corresponding ..._next token, if appropriate
729 {
f67539c2 730#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
b32b8144 731 // Skip '#' and whitespace and see whether we find an
7c673cae
FG
732 // 'include_next' here.
733 typename string_type::size_type start = value.find("include");
734 if (0 == value.compare(start, 12, "include_next", 12))
735 id = token_id(id | AltTokenType);
f67539c2 736#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
7c673cae
FG
737 break;
738 }
7c673cae
FG
739
740 case T_EOF:
b32b8144 741 // T_EOF is returned as a valid token, the next call will
7c673cae
FG
742 // return T_EOI, i.e. the actual end of input
743 at_eof = true;
744 token_val.clear();
745 break;
746
747 case T_OR_TRIGRAPH:
748 case T_XOR_TRIGRAPH:
749 case T_LEFTBRACE_TRIGRAPH:
750 case T_RIGHTBRACE_TRIGRAPH:
751 case T_LEFTBRACKET_TRIGRAPH:
752 case T_RIGHTBRACKET_TRIGRAPH:
753 case T_COMPL_TRIGRAPH:
754 case T_POUND_TRIGRAPH:
755 case T_ANY_TRIGRAPH:
756 if (boost::wave::need_convert_trigraphs(language))
757 {
758 using boost::wave::cpplexer::impl::convert_trigraph;
759 token_val = convert_trigraph(token_val);
760 }
761 break;
762 }
763
764 result = token_type(id, token_val, pos);
765#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
766 return guards.detect_guard(result);
767#else
768 return result;
769#endif
770 }
771
772 // skip the T_CONTLINE token
773 } while (true);
774 }
775 return result = token_type(); // return T_EOI
776 }
777
20effc67 778 void set_position(PositionT const &pos) BOOST_OVERRIDE
b32b8144 779 {
7c673cae 780 // set position has to change the file name and line number only
b32b8144
FG
781 first.get_position().set_file(pos.get_file());
782 first.get_position().set_line(pos.get_line());
7c673cae
FG
783 }
784
785#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
20effc67 786 bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
7c673cae
FG
787 { return guards.detected(guard_name); }
788#endif
789
790private:
791 iterator_type first;
792 iterator_type last;
793 boost::wave::language_support language;
794 static lexer::lexer<IteratorT, PositionT> lexer; // needed only once
b32b8144 795
7c673cae
FG
796 bool at_eof;
797
798#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
799 include_guards<token_type> guards;
800#endif
801};
802
803template <typename IteratorT, typename PositionT>
804lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;
805
b32b8144
FG
806#undef T_EXTCHARLIT
807#undef T_EXTSTRINGLIT
7c673cae
FG
808#undef T_EXTRAWSTRINGLIT
809
810///////////////////////////////////////////////////////////////////////////////
811//
812// The 'new_lexer' function allows the opaque generation of a new lexer object.
b32b8144 813// It is coupled to the iterator type to allow to decouple the lexer/iterator
7c673cae
FG
814// configurations at compile time.
815//
b32b8144 816// This function is declared inside the cpp_slex_token.hpp file, which is
7c673cae 817// referenced by the source file calling the lexer and the source file, which
b32b8144
FG
818// instantiates the lex_functor. But it is defined here, so it will be
819// instantiated only while compiling the source file, which instantiates the
7c673cae
FG
820// lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
821// this file (cpp_slex_lexer.hpp) should be included only once. This allows
b32b8144 822// to decouple the lexer interface from the lexer implementation and reduces
7c673cae
FG
823// compilation time.
824//
825///////////////////////////////////////////////////////////////////////////////
826
827///////////////////////////////////////////////////////////////////////////////
b32b8144 828//
7c673cae 829// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
b32b8144 830// should be defined inline, if the lex_functor shouldn't be instantiated
7c673cae
FG
831// separately from the lex_iterator.
832//
833// Separate (explicit) instantiation helps to reduce compilation time.
834//
835///////////////////////////////////////////////////////////////////////////////
836
837#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
838#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE
839#else
840#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline
b32b8144 841#endif
7c673cae
FG
842
843template <typename IteratorT, typename PositionT>
844BOOST_WAVE_SLEX_NEW_LEXER_INLINE
845lex_input_interface<slex_token<PositionT> > *
846new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
b32b8144 847 IteratorT const &last, PositionT const &pos,
7c673cae
FG
848 boost::wave::language_support language)
849{
b32b8144 850 return new slex_functor<IteratorT, PositionT>(first, last, pos,
7c673cae
FG
851 language);
852}
853
854#undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE
855
856///////////////////////////////////////////////////////////////////////////////
857} // namespace slex
858} // namespace cpplexer
859} // namespace wave
860} // namespace boost
b32b8144 861
20effc67 862#endif // !defined(BOOST_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)