]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/wave/samples/token_statistics/xlex/xlex_lexer.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / libs / wave / samples / token_statistics / xlex / xlex_lexer.hpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Xpressive based C++ lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11=============================================================================*/
12
20effc67
TL
13#if !defined(BOOST_XLEX_LEXER_HPP)
14#define BOOST_XLEX_LEXER_HPP
7c673cae
FG
15
16#include <string>
17#include <cstdio>
18#include <cstdarg>
19#if defined(BOOST_SPIRIT_DEBUG)
20#include <iostream>
21#endif // defined(BOOST_SPIRIT_DEBUG)
22
23#include <boost/concept_check.hpp>
24#include <boost/assert.hpp>
25#include <boost/spirit/include/classic_core.hpp>
26
27#include <boost/wave/token_ids.hpp>
28#include <boost/wave/language_support.hpp>
29#include <boost/wave/util/file_position.hpp>
30#include <boost/wave/cpplexer/validate_universal_char.hpp>
31#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
32#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
33#include <boost/wave/cpplexer/detect_include_guards.hpp>
34#endif
35#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
36
37// reuse the default token type
20effc67 38#include "../xlex_interface.hpp"
7c673cae
FG
39
40// include the xpressive headers
41#include "xpressive_lexer.hpp"
42
43///////////////////////////////////////////////////////////////////////////////
44namespace boost {
45namespace wave {
46namespace cpplexer {
47namespace xlex {
48namespace lexer {
49
50///////////////////////////////////////////////////////////////////////////////
51//
52// encapsulation of the xpressive based C++ lexer
53//
54///////////////////////////////////////////////////////////////////////////////
55
56template <
57 typename Iterator,
58 typename Position = boost::wave::util::file_position_type
59>
60class lexer
61{
62public:
63 typedef char char_type;
64 typedef boost::wave::cpplexer::lex_token<Position> token_type;
65 typedef typename token_type::string_type string_type;
66
67 lexer(Iterator const &first, Iterator const &last,
68 Position const &pos, boost::wave::language_support language);
69 ~lexer() {}
70
71 token_type& get(token_type& t);
72 void set_position(Position const &pos)
73 {
7c673cae
FG
74 filename = pos.get_file();
75 line = pos.get_line();
20effc67 76 column = pos.get_column();
7c673cae
FG
77 }
78
79#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
80 bool has_include_guards(std::string& guard_name) const
81 { return guards.detected(guard_name); }
82#endif
83
84private:
85 typedef xpressive_lexer<Iterator, token_id> lexer_type;
86 typedef typename lexer_type::callback_type callback_type;
87
88 lexer_type xlexer;
89 Iterator first;
90 Iterator last;
91
92 string_type filename;
93 int line;
20effc67 94 int column;
7c673cae
FG
95 bool at_eof;
96 boost::wave::language_support language;
97
98// initialization data (regular expressions for the token definitions)
99 struct lexer_data {
100 token_id tokenid; // token data
101 char_type const *tokenregex; // associated token to match
102 callback_type tokencb; // associated callback function
103 };
104
105 static lexer_data const init_data[]; // common patterns
106 static lexer_data const init_data_cpp[]; // C++ only patterns
20effc67
TL
107 static lexer_data const init_data_cpp0x[]; // C++11 only patterns
108 static lexer_data const init_data_cpp2a[]; // C++20 only patterns
7c673cae
FG
109
110#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
111 boost::wave::cpplexer::include_guards<token_type> guards;
112#endif
113};
114
115///////////////////////////////////////////////////////////////////////////////
116// helper for initializing token data
117#define TOKEN_DATA(id, regex) \
118 { id, regex, 0 }
119
120#define TOKEN_DATA_EX(id, regex, callback) \
121 { id, regex, callback }
122
123///////////////////////////////////////////////////////////////////////////////
124// data required for initialization of the lexer (token definitions)
125#define OR "|"
126#define Q(c) "\\" c
127#define TRI(c) Q("?") Q("?") c
128
129// definition of some subtoken regexps to simplify the regex definitions
20effc67 130#define BLANK "[ \t\v\f]"
7c673cae
FG
131#define CCOMMENT Q("/") Q("*") ".*?" Q("*") Q("/")
132
133#define PPSPACE "(" BLANK OR CCOMMENT ")*"
134
135#define OCTALDIGIT "[0-7]"
136#define DIGIT "[0-9]"
137#define HEXDIGIT "[0-9a-fA-F]"
138#define SIGN "[-+]?"
139#define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")"
140
141#define INTEGER "(" \
142 "(0x|0X)" HEXDIGIT "+" OR \
143 "0" OCTALDIGIT "*" OR \
144 "[1-9]" DIGIT "*" \
145 ")"
146
147#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
148#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
20effc67
TL
149#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "ll" OR "LL" ")" OR \
150 "(" "ll" OR "LL" ")" "[uU]" "?" OR \
7c673cae
FG
151 "i64" \
152 ")"
153#else
20effc67
TL
154#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "ll" OR "LL" ")" OR \
155 "(" "ll" OR "LL" ")" "[uU]" "?" ")"
7c673cae
FG
156#endif
157#define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")"
158#define CHAR_SPEC "L?"
20effc67 159#define EXTCHAR_SPEC "(" "[uU]" OR "u8" ")"
7c673cae
FG
160
161#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
162#define ESCAPESEQ BACKSLASH "(" \
163 "[abfnrtv?'\"]" OR \
164 BACKSLASH OR \
165 "x" HEXDIGIT "+" OR \
166 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
167 ")"
168#define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
169#define UNIVERSALCHAR BACKSLASH "(" \
170 "u" HEXQUAD OR \
171 "U" HEXQUAD HEXQUAD \
172 ")"
173
174#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
175#define NEWLINEDEF "(" "\n" OR "\r\n" OR "\r" ")"
176
177#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
178#define INCLUDEDEF "(include_next|include)"
179#else
180#define INCLUDEDEF "include"
181#endif
182
183///////////////////////////////////////////////////////////////////////////////
184// common C++/C99 token definitions
185template <typename Iterator, typename Position>
186typename lexer<Iterator, Position>::lexer_data const
187lexer<Iterator, Position>::init_data[] =
188{
189 TOKEN_DATA(T_CCOMMENT, CCOMMENT),
190 TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ),
191 TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'"
192 "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"),
193 TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"")
194 "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
195 TOKEN_DATA(T_ANDAND, "&&"),
196 TOKEN_DATA(T_ANDASSIGN, "&="),
197 TOKEN_DATA(T_AND, "&"),
198 TOKEN_DATA(T_EQUAL, "=="),
199 TOKEN_DATA(T_ASSIGN, "="),
200 TOKEN_DATA(T_ORASSIGN, Q("|=")),
201 TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")),
202 TOKEN_DATA(T_OROR, Q("|") Q("|")),
203 TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
204 TOKEN_DATA(T_OR, Q("|")),
205 TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")),
206 TOKEN_DATA(T_XORASSIGN, Q("^=")),
207 TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")),
208 TOKEN_DATA(T_XOR, Q("^")),
209 TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")),
210 TOKEN_DATA(T_COMMA, ","),
211 TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
212 TOKEN_DATA(T_COLON, ":"),
213 TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
214 TOKEN_DATA(T_DIVIDE, Q("/")),
215 TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")),
216 TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
217 TOKEN_DATA(T_SHIFTRIGHT, ">>"),
218 TOKEN_DATA(T_GREATEREQUAL, ">="),
219 TOKEN_DATA(T_GREATER, ">"),
220 TOKEN_DATA(T_LEFTBRACE, Q("{")),
221 TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
222 TOKEN_DATA(T_SHIFTLEFT, "<<"),
223 TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
224 TOKEN_DATA(T_LESSEQUAL, "<="),
225 TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
226 TOKEN_DATA(T_LESS, "<"),
227 TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")),
228 TOKEN_DATA(T_LEFTPAREN, Q("(")),
229 TOKEN_DATA(T_LEFTBRACKET, Q("[")),
230 TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
231 TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")),
232 TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
233 TOKEN_DATA(T_ARROW, Q("->")),
234 TOKEN_DATA(T_MINUS, Q("-")),
235 TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
236 TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
237 TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
238 TOKEN_DATA(T_POUND_ALT, Q("%:")),
239 TOKEN_DATA(T_PERCENT, Q("%")),
240 TOKEN_DATA(T_NOTEQUAL, "!="),
241 TOKEN_DATA(T_NOT, "!"),
242 TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
243 TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")),
244 TOKEN_DATA(T_PLUS, Q("+")),
245 TOKEN_DATA(T_RIGHTBRACE, Q("}")),
246 TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")),
247 TOKEN_DATA(T_RIGHTPAREN, Q(")")),
248 TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
249 TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
250 TOKEN_DATA(T_SEMICOLON, ";"),
251 TOKEN_DATA(T_STARASSIGN, Q("*=")),
252 TOKEN_DATA(T_STAR, Q("*")),
253 TOKEN_DATA(T_COMPL, Q("~")),
254 TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")),
255 TOKEN_DATA(T_ASM, "asm"),
256 TOKEN_DATA(T_AUTO, "auto"),
257 TOKEN_DATA(T_BOOL, "bool"),
258 TOKEN_DATA(T_FALSE, "false"),
259 TOKEN_DATA(T_TRUE, "true"),
260 TOKEN_DATA(T_BREAK, "break"),
261 TOKEN_DATA(T_CASE, "case"),
262 TOKEN_DATA(T_CATCH, "catch"),
263 TOKEN_DATA(T_CHAR, "char"),
264 TOKEN_DATA(T_CLASS, "class"),
265 TOKEN_DATA(T_CONSTCAST, "const_cast"),
266 TOKEN_DATA(T_CONST, "const"),
267 TOKEN_DATA(T_CONTINUE, "continue"),
268 TOKEN_DATA(T_DEFAULT, "default"),
269 TOKEN_DATA(T_DELETE, "delete"),
270 TOKEN_DATA(T_DOUBLE, "double"),
271 TOKEN_DATA(T_DO, "do"),
272 TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
273 TOKEN_DATA(T_ELSE, "else"),
274 TOKEN_DATA(T_ENUM, "enum"),
275 TOKEN_DATA(T_EXPLICIT, "explicit"),
276 TOKEN_DATA(T_EXPORT, "export"),
277 TOKEN_DATA(T_EXTERN, "extern"),
278 TOKEN_DATA(T_FLOAT, "float"),
279 TOKEN_DATA(T_FOR, "for"),
280 TOKEN_DATA(T_FRIEND, "friend"),
281 TOKEN_DATA(T_GOTO, "goto"),
282 TOKEN_DATA(T_IF, "if"),
283 TOKEN_DATA(T_INLINE, "inline"),
284 TOKEN_DATA(T_INT, "int"),
285 TOKEN_DATA(T_LONG, "long"),
286 TOKEN_DATA(T_MUTABLE, "mutable"),
287 TOKEN_DATA(T_NAMESPACE, "namespace"),
288 TOKEN_DATA(T_NEW, "new"),
289 TOKEN_DATA(T_OPERATOR, "operator"),
290 TOKEN_DATA(T_PRIVATE, "private"),
291 TOKEN_DATA(T_PROTECTED, "protected"),
292 TOKEN_DATA(T_PUBLIC, "public"),
293 TOKEN_DATA(T_REGISTER, "register"),
294 TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
295 TOKEN_DATA(T_RETURN, "return"),
296 TOKEN_DATA(T_SHORT, "short"),
297 TOKEN_DATA(T_SIGNED, "signed"),
298 TOKEN_DATA(T_SIZEOF, "sizeof"),
299 TOKEN_DATA(T_STATICCAST, "static_cast"),
300 TOKEN_DATA(T_STATIC, "static"),
301 TOKEN_DATA(T_STRUCT, "struct"),
302 TOKEN_DATA(T_SWITCH, "switch"),
303 TOKEN_DATA(T_TEMPLATE, "template"),
304 TOKEN_DATA(T_THIS, "this"),
305 TOKEN_DATA(T_THROW, "throw"),
306 TOKEN_DATA(T_TRY, "try"),
307 TOKEN_DATA(T_TYPEDEF, "typedef"),
308 TOKEN_DATA(T_TYPEID, "typeid"),
309 TOKEN_DATA(T_TYPENAME, "typename"),
310 TOKEN_DATA(T_UNION, "union"),
311 TOKEN_DATA(T_UNSIGNED, "unsigned"),
312 TOKEN_DATA(T_USING, "using"),
313 TOKEN_DATA(T_VIRTUAL, "virtual"),
314 TOKEN_DATA(T_VOID, "void"),
315 TOKEN_DATA(T_VOLATILE, "volatile"),
316 TOKEN_DATA(T_WCHART, "wchar_t"),
317 TOKEN_DATA(T_WHILE, "while"),
318#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
319 TOKEN_DATA(T_MSEXT_INT8, "__int8"),
320 TOKEN_DATA(T_MSEXT_INT16, "__int16"),
321 TOKEN_DATA(T_MSEXT_INT32, "__int32"),
322 TOKEN_DATA(T_MSEXT_INT64, "__int64"),
323 TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
324 TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
325 TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
326 TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
327 TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
328 TOKEN_DATA(T_MSEXT_TRY , "__try"),
329 TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
330 TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
331 TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
332 TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
333 TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
334 TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
335 TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
336#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
337 TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"),
338 TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
339 TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
340 TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"),
341 TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"),
342 TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"),
343 TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"),
344 TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"),
345 TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \
346 INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")),
347 TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \
348 INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"),
349 TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \
350 INCLUDEDEF PPSPACE),
351 TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"),
352 TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
353 TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"),
354 TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"),
355 TOKEN_DATA(T_FLOATLIT,
356 "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
357 EXPONENT "?" FLOAT_SUFFIX "?" OR
358 DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
359 TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
360 TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"),
361#if BOOST_WAVE_USE_STRICT_LEXER != 0
362 TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
363#else
364 TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
365#endif
366 TOKEN_DATA(T_SPACE, BLANK "+"),
7c673cae
FG
367 TOKEN_DATA(T_CONTLINE, Q("\\") "\n"),
368 TOKEN_DATA(T_NEWLINE, NEWLINEDEF),
369 TOKEN_DATA(T_POUND_POUND, "##"),
370 TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
371 TOKEN_DATA(T_POUND, "#"),
372 TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")),
373 TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))),
374 TOKEN_DATA(T_QUESTION_MARK, Q("?")),
375 TOKEN_DATA(T_DOT, Q(".")),
376 TOKEN_DATA(T_ANY, "."),
377 { token_id(0) } // this should be the last entry
378};
379
380///////////////////////////////////////////////////////////////////////////////
381// C++ only token definitions
382template <typename Iterator, typename Position>
383typename lexer<Iterator, Position>::lexer_data const
384lexer<Iterator, Position>::init_data_cpp[] =
385{
386 TOKEN_DATA(T_AND_ALT, "bitand"),
387 TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
388 TOKEN_DATA(T_ANDAND_ALT, "and"),
389 TOKEN_DATA(T_OR_ALT, "bitor"),
390 TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
391 TOKEN_DATA(T_OROR_ALT, "or"),
392 TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
393 TOKEN_DATA(T_XOR_ALT, "xor"),
394 TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
395 TOKEN_DATA(T_NOT_ALT, "not"),
396 TOKEN_DATA(T_COMPL_ALT, "compl"),
397 TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
398 TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
399 TOKEN_DATA(T_COLON_COLON, "::"),
400 { token_id(0) } // this should be the last entry
401};
402
20effc67
TL
403///////////////////////////////////////////////////////////////////////////////
404// C++11 only token definitions
405#define T_EXTCHARLIT token_id(T_CHARLIT|AltTokenType)
406#define T_EXTSTRINGLIT token_id(T_STRINGLIT|AltTokenType)
407#define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
408
409template <typename Iterator, typename Position>
410typename lexer<Iterator, Position>::lexer_data const
411lexer<Iterator, Position>::init_data_cpp0x[] =
412{
413 TOKEN_DATA(T_EXTCHARLIT, EXTCHAR_SPEC "'"
414 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
415 TOKEN_DATA(T_EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
416 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
417 TOKEN_DATA(T_RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
418 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
419 TOKEN_DATA(T_EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
420 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
421 TOKEN_DATA(T_ALIGNAS, "alignas"),
422 TOKEN_DATA(T_ALIGNOF, "alignof"),
423 TOKEN_DATA(T_CHAR16_T, "char16_t"),
424 TOKEN_DATA(T_CHAR32_T, "char32_t"),
425 TOKEN_DATA(T_CONSTEXPR, "constexpr"),
426 TOKEN_DATA(T_DECLTYPE, "decltype"),
427 TOKEN_DATA(T_NOEXCEPT, "noexcept"),
428 TOKEN_DATA(T_NULLPTR, "nullptr"),
429 TOKEN_DATA(T_STATICASSERT, "static_assert"),
430 TOKEN_DATA(T_THREADLOCAL, "thread_local"),
431 { token_id(0) } // this should be the last entry
432};
433
434///////////////////////////////////////////////////////////////////////////////
435// C++11 only token definitions
436
437template <typename Iterator, typename Position>
438typename lexer<Iterator, Position>::lexer_data const
439lexer<Iterator, Position>::init_data_cpp2a[] =
440{
441 TOKEN_DATA(T_CHAR8_T, "char8_t"),
442 TOKEN_DATA(T_CONCEPT, "concept"),
443 TOKEN_DATA(T_CONSTEVAL, "consteval"),
444 TOKEN_DATA(T_CONSTINIT, "constinit"),
445 TOKEN_DATA(T_CO_AWAIT, "co_await"),
446 TOKEN_DATA(T_CO_RETURN, "co_return"),
447 TOKEN_DATA(T_CO_YIELD, "co_yield"),
448 TOKEN_DATA(T_REQUIRES, "requires"),
449 TOKEN_DATA(T_SPACESHIP, "<=>"),
450 { token_id(0) } // this should be the last entry
451};
452
7c673cae
FG
453///////////////////////////////////////////////////////////////////////////////
454// undefine macros, required for regular expression definitions
455#undef INCLUDEDEF
456#undef POUNDDEF
457#undef CCOMMENT
458#undef PPSPACE
459#undef DIGIT
460#undef OCTALDIGIT
461#undef HEXDIGIT
462#undef SIGN
463#undef EXPONENT
464#undef LONGINTEGER_SUFFIX
465#undef INTEGER_SUFFIX
466#undef INTEGER
467#undef FLOAT_SUFFIX
468#undef CHAR_SPEC
20effc67 469#undef EXTCHAR_SPEC
7c673cae
FG
470#undef BACKSLASH
471#undef ESCAPESEQ
472#undef HEXQUAD
473#undef UNIVERSALCHAR
474
475#undef Q
476#undef TRI
477#undef OR
478
479#undef TOKEN_DATA
480#undef TOKEN_DATA_EX
481
20effc67
TL
482#undef T_EXTCHARLIT
483#undef T_EXTSTRINGLIT
484#undef T_EXTRAWSTRINGLIT
7c673cae
FG
485///////////////////////////////////////////////////////////////////////////////
486// initialize cpp lexer
487template <typename Iterator, typename Position>
488inline
489lexer<Iterator, Position>::lexer(Iterator const &first,
490 Iterator const &last, Position const &pos,
491 boost::wave::language_support language)
492: first(first), last(last),
20effc67
TL
493 filename(pos.get_file()), line(pos.get_line()), column(pos.get_column()),
494 at_eof(false), language(language)
7c673cae
FG
495{
496// if in C99 mode, some of the keywords/operators are not valid
497 if (!boost::wave::need_c99(language)) {
498 for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
499 xlexer.register_regex(init_data_cpp[j].tokenregex,
500 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb);
501 }
502 }
503
20effc67
TL
504#if BOOST_WAVE_SUPPORT_CPP0X != 0
505 if (boost::wave::need_cpp0x(language) || boost::wave::need_cpp2a(language)) {
506 for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
507 xlexer.register_regex(init_data_cpp0x[j].tokenregex,
508 init_data_cpp0x[j].tokenid, init_data_cpp[j].tokencb);
509 }
510 }
511#endif
512
513#if BOOST_WAVE_SUPPORT_CPP2A != 0
514 if (boost::wave::need_cpp2a(language) || boost::wave::need_cpp2a(language)) {
515 for (int j = 0; 0 != init_data_cpp2a[j].tokenid; ++j) {
516 xlexer.register_regex(init_data_cpp2a[j].tokenregex,
517 init_data_cpp2a[j].tokenid, init_data_cpp[j].tokencb);
518 }
519 }
520#endif
521
7c673cae
FG
522// tokens valid for C++ and C99
523 for (int i = 0; 0 != init_data[i].tokenid; ++i) {
524 xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid,
525 init_data[i].tokencb);
526 }
527}
528
529///////////////////////////////////////////////////////////////////////////////
530// get the next token from the input stream
531template <typename Iterator, typename Position>
532inline boost::wave::cpplexer::lex_token<Position>&
533lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t)
534{
535 using namespace boost::wave; // to import token ids to this scope
536
537 if (at_eof)
538 return t = cpplexer::lex_token<Position>(); // return T_EOI
539
540 std::string tokval;
541 token_id id = xlexer.next_token(first, last, tokval);
542 string_type value = tokval.c_str();
543
544 if ((token_id)(-1) == id)
545 id = T_EOF; // end of input reached
546
547 if (T_IDENTIFIER == id) {
548 // test identifier characters for validity (throws if invalid chars found)
549 if (!boost::wave::need_no_character_validation(language)) {
550 cpplexer::impl::validate_identifier_name(value, line, -1, filename);
551 }
552 }
553 else if (T_STRINGLIT == id || T_CHARLIT == id) {
554 // test literal characters for validity (throws if invalid chars found)
555 if (!boost::wave::need_no_character_validation(language)) {
556 cpplexer::impl::validate_literal(value, line, -1, filename);
557 }
558 }
559 else if (T_EOF == id) {
560 // T_EOF is returned as a valid token, the next call will return T_EOI,
561 // i.e. the actual end of input
562 at_eof = true;
563 value.clear();
564 }
20effc67
TL
565 else if (T_NEWLINE == id) {
566 ++line;
567 column = 1;
568 } else {
569 column += value.size();
570 }
7c673cae
FG
571
572#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
20effc67 573 cpplexer::lex_token<Position> tok(id, value, Position(filename, line, column));
7c673cae
FG
574 return t = guards.detect_guard(tok);
575#else
576 return t = cpplexer::lex_token<Position>(id, value,
20effc67 577 Position(filename, line, column));
7c673cae
FG
578#endif
579}
580
581///////////////////////////////////////////////////////////////////////////////
582//
583// lex_functor
584//
585///////////////////////////////////////////////////////////////////////////////
586template <
587 typename Iterator,
588 typename Position = boost::wave::util::file_position_type
589>
590class xlex_functor
591: public xlex_input_interface<typename lexer<Iterator, Position>::token_type>
592{
593public:
594
595 typedef typename lexer<Iterator, Position>::token_type token_type;
596
597 xlex_functor(Iterator const &first, Iterator const &last,
598 Position const &pos, boost::wave::language_support language)
599 : lexer_(first, last, pos, language)
600 {}
601 virtual ~xlex_functor() {}
602
603// get the next token from the input stream
20effc67
TL
604 token_type& get(token_type& t) BOOST_OVERRIDE { return lexer_.get(t); }
605 void set_position(Position const &pos) BOOST_OVERRIDE { lexer_.set_position(pos); }
7c673cae
FG
606
607#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
20effc67 608 bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
7c673cae
FG
609 { return lexer_.has_include_guards(guard_name); }
610#endif
611
612private:
613 lexer<Iterator, Position> lexer_;
614};
615
616} // namespace lexer
617
618///////////////////////////////////////////////////////////////////////////////
619//
620// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
621// should be defined inline, if the lex_functor shouldn't be instantiated
622// separately from the lex_iterator.
623//
624// Separate (explicit) instantiation helps to reduce compilation time.
625//
626///////////////////////////////////////////////////////////////////////////////
627
628#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
629#define BOOST_WAVE_XLEX_NEW_LEXER_INLINE
630#else
631#define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline
632#endif
633
634///////////////////////////////////////////////////////////////////////////////
635//
636// The 'new_lexer' function allows the opaque generation of a new lexer object.
637// It is coupled to the iterator type to allow to decouple the lexer/iterator
638// configurations at compile time.
639//
640// This function is declared inside the xlex_interface.hpp file, which is
641// referenced by the source file calling the lexer and the source file, which
642// instantiates the lex_functor. But it is defined here, so it will be
643// instantiated only while compiling the source file, which instantiates the
644// lex_functor. While the xlex_interface.hpp file may be included everywhere,
645// this file (xlex_lexer.hpp) should be included only once. This allows
646// to decouple the lexer interface from the lexer implementation and reduces
647// compilation time.
648//
649///////////////////////////////////////////////////////////////////////////////
650
651template <typename Iterator, typename Position>
652BOOST_WAVE_XLEX_NEW_LEXER_INLINE
653lex_input_interface<boost::wave::cpplexer::lex_token<Position> > *
654new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
655 Iterator const &last, Position const &pos,
656 wave::language_support language)
657{
658 return new lexer::xlex_functor<Iterator, Position>(
659 first, last, pos, language);
660}
661
662#undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE
663
664///////////////////////////////////////////////////////////////////////////////
665} // namespace xlex
666} // namespace cpplexer
667} // namespace wave
668} // namespace boost
669
20effc67 670#endif // !defined(BOOST_XLEX_LEXER_HPP)