]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Boost.Wave: A Standard compliant C++ preprocessor library | |
3 | ||
4 | Xpressive based C++ lexer | |
5 | ||
6 | http://www.boost.org/ | |
7 | ||
8 | Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost | |
9 | Software License, Version 1.0. (See accompanying file | |
10 | LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
11 | =============================================================================*/ | |
12 | ||
20effc67 TL |
13 | #if !defined(BOOST_XLEX_LEXER_HPP) |
14 | #define BOOST_XLEX_LEXER_HPP | |
7c673cae FG |
15 | |
16 | #include <string> | |
17 | #include <cstdio> | |
18 | #include <cstdarg> | |
19 | #if defined(BOOST_SPIRIT_DEBUG) | |
20 | #include <iostream> | |
21 | #endif // defined(BOOST_SPIRIT_DEBUG) | |
22 | ||
23 | #include <boost/concept_check.hpp> | |
24 | #include <boost/assert.hpp> | |
25 | #include <boost/spirit/include/classic_core.hpp> | |
26 | ||
27 | #include <boost/wave/token_ids.hpp> | |
28 | #include <boost/wave/language_support.hpp> | |
29 | #include <boost/wave/util/file_position.hpp> | |
30 | #include <boost/wave/cpplexer/validate_universal_char.hpp> | |
31 | #include <boost/wave/cpplexer/cpplexer_exceptions.hpp> | |
32 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
33 | #include <boost/wave/cpplexer/detect_include_guards.hpp> | |
34 | #endif | |
35 | #include <boost/wave/cpplexer/cpp_lex_interface.hpp> | |
36 | ||
37 | // reuse the default token type | |
20effc67 | 38 | #include "../xlex_interface.hpp" |
7c673cae FG |
39 | |
40 | // include the xpressive headers | |
41 | #include "xpressive_lexer.hpp" | |
42 | ||
43 | /////////////////////////////////////////////////////////////////////////////// | |
44 | namespace boost { | |
45 | namespace wave { | |
46 | namespace cpplexer { | |
47 | namespace xlex { | |
48 | namespace lexer { | |
49 | ||
50 | /////////////////////////////////////////////////////////////////////////////// | |
51 | // | |
52 | // encapsulation of the xpressive based C++ lexer | |
53 | // | |
54 | /////////////////////////////////////////////////////////////////////////////// | |
55 | ||
56 | template < | |
57 | typename Iterator, | |
58 | typename Position = boost::wave::util::file_position_type | |
59 | > | |
60 | class lexer | |
61 | { | |
62 | public: | |
63 | typedef char char_type; | |
64 | typedef boost::wave::cpplexer::lex_token<Position> token_type; | |
65 | typedef typename token_type::string_type string_type; | |
66 | ||
67 | lexer(Iterator const &first, Iterator const &last, | |
68 | Position const &pos, boost::wave::language_support language); | |
69 | ~lexer() {} | |
70 | ||
71 | token_type& get(token_type& t); | |
72 | void set_position(Position const &pos) | |
73 | { | |
7c673cae FG |
74 | filename = pos.get_file(); |
75 | line = pos.get_line(); | |
20effc67 | 76 | column = pos.get_column(); |
7c673cae FG |
77 | } |
78 | ||
79 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
80 | bool has_include_guards(std::string& guard_name) const | |
81 | { return guards.detected(guard_name); } | |
82 | #endif | |
83 | ||
84 | private: | |
85 | typedef xpressive_lexer<Iterator, token_id> lexer_type; | |
86 | typedef typename lexer_type::callback_type callback_type; | |
87 | ||
88 | lexer_type xlexer; | |
89 | Iterator first; | |
90 | Iterator last; | |
91 | ||
92 | string_type filename; | |
93 | int line; | |
20effc67 | 94 | int column; |
7c673cae FG |
95 | bool at_eof; |
96 | boost::wave::language_support language; | |
97 | ||
98 | // initialization data (regular expressions for the token definitions) | |
99 | struct lexer_data { | |
100 | token_id tokenid; // token data | |
101 | char_type const *tokenregex; // associated token to match | |
102 | callback_type tokencb; // associated callback function | |
103 | }; | |
104 | ||
105 | static lexer_data const init_data[]; // common patterns | |
106 | static lexer_data const init_data_cpp[]; // C++ only patterns | |
20effc67 TL |
107 | static lexer_data const init_data_cpp0x[]; // C++11 only patterns |
108 | static lexer_data const init_data_cpp2a[]; // C++20 only patterns | |
7c673cae FG |
109 | |
110 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
111 | boost::wave::cpplexer::include_guards<token_type> guards; | |
112 | #endif | |
113 | }; | |
114 | ||
115 | /////////////////////////////////////////////////////////////////////////////// | |
116 | // helper for initializing token data | |
117 | #define TOKEN_DATA(id, regex) \ | |
118 | { id, regex, 0 } | |
119 | ||
120 | #define TOKEN_DATA_EX(id, regex, callback) \ | |
121 | { id, regex, callback } | |
122 | ||
123 | /////////////////////////////////////////////////////////////////////////////// | |
124 | // data required for initialization of the lexer (token definitions) | |
125 | #define OR "|" | |
126 | #define Q(c) "\\" c | |
127 | #define TRI(c) Q("?") Q("?") c | |
128 | ||
129 | // definition of some subtoken regexps to simplify the regex definitions | |
20effc67 | 130 | #define BLANK "[ \t\v\f]" |
7c673cae FG |
131 | #define CCOMMENT Q("/") Q("*") ".*?" Q("*") Q("/") |
132 | ||
133 | #define PPSPACE "(" BLANK OR CCOMMENT ")*" | |
134 | ||
135 | #define OCTALDIGIT "[0-7]" | |
136 | #define DIGIT "[0-9]" | |
137 | #define HEXDIGIT "[0-9a-fA-F]" | |
138 | #define SIGN "[-+]?" | |
139 | #define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")" | |
140 | ||
141 | #define INTEGER "(" \ | |
142 | "(0x|0X)" HEXDIGIT "+" OR \ | |
143 | "0" OCTALDIGIT "*" OR \ | |
144 | "[1-9]" DIGIT "*" \ | |
145 | ")" | |
146 | ||
147 | #define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")" | |
148 | #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 | |
20effc67 TL |
149 | #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "ll" OR "LL" ")" OR \ |
150 | "(" "ll" OR "LL" ")" "[uU]" "?" OR \ | |
7c673cae FG |
151 | "i64" \ |
152 | ")" | |
153 | #else | |
20effc67 TL |
154 | #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "ll" OR "LL" ")" OR \ |
155 | "(" "ll" OR "LL" ")" "[uU]" "?" ")" | |
7c673cae FG |
156 | #endif |
157 | #define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")" | |
158 | #define CHAR_SPEC "L?" | |
20effc67 | 159 | #define EXTCHAR_SPEC "(" "[uU]" OR "u8" ")" |
7c673cae FG |
160 | |
161 | #define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")" | |
162 | #define ESCAPESEQ BACKSLASH "(" \ | |
163 | "[abfnrtv?'\"]" OR \ | |
164 | BACKSLASH OR \ | |
165 | "x" HEXDIGIT "+" OR \ | |
166 | OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \ | |
167 | ")" | |
168 | #define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT | |
169 | #define UNIVERSALCHAR BACKSLASH "(" \ | |
170 | "u" HEXQUAD OR \ | |
171 | "U" HEXQUAD HEXQUAD \ | |
172 | ")" | |
173 | ||
174 | #define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")" | |
175 | #define NEWLINEDEF "(" "\n" OR "\r\n" OR "\r" ")" | |
176 | ||
177 | #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 | |
178 | #define INCLUDEDEF "(include_next|include)" | |
179 | #else | |
180 | #define INCLUDEDEF "include" | |
181 | #endif | |
182 | ||
183 | /////////////////////////////////////////////////////////////////////////////// | |
184 | // common C++/C99 token definitions | |
185 | template <typename Iterator, typename Position> | |
186 | typename lexer<Iterator, Position>::lexer_data const | |
187 | lexer<Iterator, Position>::init_data[] = | |
188 | { | |
189 | TOKEN_DATA(T_CCOMMENT, CCOMMENT), | |
190 | TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ), | |
191 | TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'" | |
192 | "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"), | |
193 | TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"") | |
194 | "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")), | |
195 | TOKEN_DATA(T_ANDAND, "&&"), | |
196 | TOKEN_DATA(T_ANDASSIGN, "&="), | |
197 | TOKEN_DATA(T_AND, "&"), | |
198 | TOKEN_DATA(T_EQUAL, "=="), | |
199 | TOKEN_DATA(T_ASSIGN, "="), | |
200 | TOKEN_DATA(T_ORASSIGN, Q("|=")), | |
201 | TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")), | |
202 | TOKEN_DATA(T_OROR, Q("|") Q("|")), | |
203 | TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")), | |
204 | TOKEN_DATA(T_OR, Q("|")), | |
205 | TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")), | |
206 | TOKEN_DATA(T_XORASSIGN, Q("^=")), | |
207 | TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")), | |
208 | TOKEN_DATA(T_XOR, Q("^")), | |
209 | TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")), | |
210 | TOKEN_DATA(T_COMMA, ","), | |
211 | TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"), | |
212 | TOKEN_DATA(T_COLON, ":"), | |
213 | TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")), | |
214 | TOKEN_DATA(T_DIVIDE, Q("/")), | |
215 | TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")), | |
216 | TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="), | |
217 | TOKEN_DATA(T_SHIFTRIGHT, ">>"), | |
218 | TOKEN_DATA(T_GREATEREQUAL, ">="), | |
219 | TOKEN_DATA(T_GREATER, ">"), | |
220 | TOKEN_DATA(T_LEFTBRACE, Q("{")), | |
221 | TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="), | |
222 | TOKEN_DATA(T_SHIFTLEFT, "<<"), | |
223 | TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")), | |
224 | TOKEN_DATA(T_LESSEQUAL, "<="), | |
225 | TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"), | |
226 | TOKEN_DATA(T_LESS, "<"), | |
227 | TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")), | |
228 | TOKEN_DATA(T_LEFTPAREN, Q("(")), | |
229 | TOKEN_DATA(T_LEFTBRACKET, Q("[")), | |
230 | TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))), | |
231 | TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")), | |
232 | TOKEN_DATA(T_MINUSASSIGN, Q("-=")), | |
233 | TOKEN_DATA(T_ARROW, Q("->")), | |
234 | TOKEN_DATA(T_MINUS, Q("-")), | |
235 | TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")), | |
236 | TOKEN_DATA(T_PERCENTASSIGN, Q("%=")), | |
237 | TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")), | |
238 | TOKEN_DATA(T_POUND_ALT, Q("%:")), | |
239 | TOKEN_DATA(T_PERCENT, Q("%")), | |
240 | TOKEN_DATA(T_NOTEQUAL, "!="), | |
241 | TOKEN_DATA(T_NOT, "!"), | |
242 | TOKEN_DATA(T_PLUSASSIGN, Q("+=")), | |
243 | TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")), | |
244 | TOKEN_DATA(T_PLUS, Q("+")), | |
245 | TOKEN_DATA(T_RIGHTBRACE, Q("}")), | |
246 | TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")), | |
247 | TOKEN_DATA(T_RIGHTPAREN, Q(")")), | |
248 | TOKEN_DATA(T_RIGHTBRACKET, Q("]")), | |
249 | TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))), | |
250 | TOKEN_DATA(T_SEMICOLON, ";"), | |
251 | TOKEN_DATA(T_STARASSIGN, Q("*=")), | |
252 | TOKEN_DATA(T_STAR, Q("*")), | |
253 | TOKEN_DATA(T_COMPL, Q("~")), | |
254 | TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")), | |
255 | TOKEN_DATA(T_ASM, "asm"), | |
256 | TOKEN_DATA(T_AUTO, "auto"), | |
257 | TOKEN_DATA(T_BOOL, "bool"), | |
258 | TOKEN_DATA(T_FALSE, "false"), | |
259 | TOKEN_DATA(T_TRUE, "true"), | |
260 | TOKEN_DATA(T_BREAK, "break"), | |
261 | TOKEN_DATA(T_CASE, "case"), | |
262 | TOKEN_DATA(T_CATCH, "catch"), | |
263 | TOKEN_DATA(T_CHAR, "char"), | |
264 | TOKEN_DATA(T_CLASS, "class"), | |
265 | TOKEN_DATA(T_CONSTCAST, "const_cast"), | |
266 | TOKEN_DATA(T_CONST, "const"), | |
267 | TOKEN_DATA(T_CONTINUE, "continue"), | |
268 | TOKEN_DATA(T_DEFAULT, "default"), | |
269 | TOKEN_DATA(T_DELETE, "delete"), | |
270 | TOKEN_DATA(T_DOUBLE, "double"), | |
271 | TOKEN_DATA(T_DO, "do"), | |
272 | TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"), | |
273 | TOKEN_DATA(T_ELSE, "else"), | |
274 | TOKEN_DATA(T_ENUM, "enum"), | |
275 | TOKEN_DATA(T_EXPLICIT, "explicit"), | |
276 | TOKEN_DATA(T_EXPORT, "export"), | |
277 | TOKEN_DATA(T_EXTERN, "extern"), | |
278 | TOKEN_DATA(T_FLOAT, "float"), | |
279 | TOKEN_DATA(T_FOR, "for"), | |
280 | TOKEN_DATA(T_FRIEND, "friend"), | |
281 | TOKEN_DATA(T_GOTO, "goto"), | |
282 | TOKEN_DATA(T_IF, "if"), | |
283 | TOKEN_DATA(T_INLINE, "inline"), | |
284 | TOKEN_DATA(T_INT, "int"), | |
285 | TOKEN_DATA(T_LONG, "long"), | |
286 | TOKEN_DATA(T_MUTABLE, "mutable"), | |
287 | TOKEN_DATA(T_NAMESPACE, "namespace"), | |
288 | TOKEN_DATA(T_NEW, "new"), | |
289 | TOKEN_DATA(T_OPERATOR, "operator"), | |
290 | TOKEN_DATA(T_PRIVATE, "private"), | |
291 | TOKEN_DATA(T_PROTECTED, "protected"), | |
292 | TOKEN_DATA(T_PUBLIC, "public"), | |
293 | TOKEN_DATA(T_REGISTER, "register"), | |
294 | TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"), | |
295 | TOKEN_DATA(T_RETURN, "return"), | |
296 | TOKEN_DATA(T_SHORT, "short"), | |
297 | TOKEN_DATA(T_SIGNED, "signed"), | |
298 | TOKEN_DATA(T_SIZEOF, "sizeof"), | |
299 | TOKEN_DATA(T_STATICCAST, "static_cast"), | |
300 | TOKEN_DATA(T_STATIC, "static"), | |
301 | TOKEN_DATA(T_STRUCT, "struct"), | |
302 | TOKEN_DATA(T_SWITCH, "switch"), | |
303 | TOKEN_DATA(T_TEMPLATE, "template"), | |
304 | TOKEN_DATA(T_THIS, "this"), | |
305 | TOKEN_DATA(T_THROW, "throw"), | |
306 | TOKEN_DATA(T_TRY, "try"), | |
307 | TOKEN_DATA(T_TYPEDEF, "typedef"), | |
308 | TOKEN_DATA(T_TYPEID, "typeid"), | |
309 | TOKEN_DATA(T_TYPENAME, "typename"), | |
310 | TOKEN_DATA(T_UNION, "union"), | |
311 | TOKEN_DATA(T_UNSIGNED, "unsigned"), | |
312 | TOKEN_DATA(T_USING, "using"), | |
313 | TOKEN_DATA(T_VIRTUAL, "virtual"), | |
314 | TOKEN_DATA(T_VOID, "void"), | |
315 | TOKEN_DATA(T_VOLATILE, "volatile"), | |
316 | TOKEN_DATA(T_WCHART, "wchar_t"), | |
317 | TOKEN_DATA(T_WHILE, "while"), | |
318 | #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 | |
319 | TOKEN_DATA(T_MSEXT_INT8, "__int8"), | |
320 | TOKEN_DATA(T_MSEXT_INT16, "__int16"), | |
321 | TOKEN_DATA(T_MSEXT_INT32, "__int32"), | |
322 | TOKEN_DATA(T_MSEXT_INT64, "__int64"), | |
323 | TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"), | |
324 | TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"), | |
325 | TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"), | |
326 | TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"), | |
327 | TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"), | |
328 | TOKEN_DATA(T_MSEXT_TRY , "__try"), | |
329 | TOKEN_DATA(T_MSEXT_EXCEPT, "__except"), | |
330 | TOKEN_DATA(T_MSEXT_FINALLY, "__finally"), | |
331 | TOKEN_DATA(T_MSEXT_LEAVE, "__leave"), | |
332 | TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"), | |
333 | TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"), | |
334 | TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"), | |
335 | TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"), | |
336 | #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 | |
337 | TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"), | |
338 | TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"), | |
339 | TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"), | |
340 | TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"), | |
341 | TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"), | |
342 | TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"), | |
343 | TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"), | |
344 | TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"), | |
345 | TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \ | |
346 | INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")), | |
347 | TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \ | |
348 | INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"), | |
349 | TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \ | |
350 | INCLUDEDEF PPSPACE), | |
351 | TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"), | |
352 | TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"), | |
353 | TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"), | |
354 | TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"), | |
355 | TOKEN_DATA(T_FLOATLIT, | |
356 | "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")" | |
357 | EXPONENT "?" FLOAT_SUFFIX "?" OR | |
358 | DIGIT "+" EXPONENT FLOAT_SUFFIX "?"), | |
359 | TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX), | |
360 | TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"), | |
361 | #if BOOST_WAVE_USE_STRICT_LEXER != 0 | |
362 | TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"), | |
363 | #else | |
364 | TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"), | |
365 | #endif | |
366 | TOKEN_DATA(T_SPACE, BLANK "+"), | |
7c673cae FG |
367 | TOKEN_DATA(T_CONTLINE, Q("\\") "\n"), |
368 | TOKEN_DATA(T_NEWLINE, NEWLINEDEF), | |
369 | TOKEN_DATA(T_POUND_POUND, "##"), | |
370 | TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")), | |
371 | TOKEN_DATA(T_POUND, "#"), | |
372 | TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")), | |
373 | TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))), | |
374 | TOKEN_DATA(T_QUESTION_MARK, Q("?")), | |
375 | TOKEN_DATA(T_DOT, Q(".")), | |
376 | TOKEN_DATA(T_ANY, "."), | |
377 | { token_id(0) } // this should be the last entry | |
378 | }; | |
379 | ||
380 | /////////////////////////////////////////////////////////////////////////////// | |
381 | // C++ only token definitions | |
382 | template <typename Iterator, typename Position> | |
383 | typename lexer<Iterator, Position>::lexer_data const | |
384 | lexer<Iterator, Position>::init_data_cpp[] = | |
385 | { | |
386 | TOKEN_DATA(T_AND_ALT, "bitand"), | |
387 | TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"), | |
388 | TOKEN_DATA(T_ANDAND_ALT, "and"), | |
389 | TOKEN_DATA(T_OR_ALT, "bitor"), | |
390 | TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"), | |
391 | TOKEN_DATA(T_OROR_ALT, "or"), | |
392 | TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"), | |
393 | TOKEN_DATA(T_XOR_ALT, "xor"), | |
394 | TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"), | |
395 | TOKEN_DATA(T_NOT_ALT, "not"), | |
396 | TOKEN_DATA(T_COMPL_ALT, "compl"), | |
397 | TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")), | |
398 | TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")), | |
399 | TOKEN_DATA(T_COLON_COLON, "::"), | |
400 | { token_id(0) } // this should be the last entry | |
401 | }; | |
402 | ||
20effc67 TL |
403 | /////////////////////////////////////////////////////////////////////////////// |
404 | // C++11 only token definitions | |
405 | #define T_EXTCHARLIT token_id(T_CHARLIT|AltTokenType) | |
406 | #define T_EXTSTRINGLIT token_id(T_STRINGLIT|AltTokenType) | |
407 | #define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType) | |
408 | ||
409 | template <typename Iterator, typename Position> | |
410 | typename lexer<Iterator, Position>::lexer_data const | |
411 | lexer<Iterator, Position>::init_data_cpp0x[] = | |
412 | { | |
413 | TOKEN_DATA(T_EXTCHARLIT, EXTCHAR_SPEC "'" | |
414 | "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"), | |
415 | TOKEN_DATA(T_EXTSTRINGLIT, EXTCHAR_SPEC Q("\"") | |
416 | "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")), | |
417 | TOKEN_DATA(T_RAWSTRINGLIT, CHAR_SPEC "R" Q("\"") | |
418 | "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")), | |
419 | TOKEN_DATA(T_EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"") | |
420 | "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")), | |
421 | TOKEN_DATA(T_ALIGNAS, "alignas"), | |
422 | TOKEN_DATA(T_ALIGNOF, "alignof"), | |
423 | TOKEN_DATA(T_CHAR16_T, "char16_t"), | |
424 | TOKEN_DATA(T_CHAR32_T, "char32_t"), | |
425 | TOKEN_DATA(T_CONSTEXPR, "constexpr"), | |
426 | TOKEN_DATA(T_DECLTYPE, "decltype"), | |
427 | TOKEN_DATA(T_NOEXCEPT, "noexcept"), | |
428 | TOKEN_DATA(T_NULLPTR, "nullptr"), | |
429 | TOKEN_DATA(T_STATICASSERT, "static_assert"), | |
430 | TOKEN_DATA(T_THREADLOCAL, "thread_local"), | |
431 | { token_id(0) } // this should be the last entry | |
432 | }; | |
433 | ||
434 | /////////////////////////////////////////////////////////////////////////////// | |
435 | // C++11 only token definitions | |
436 | ||
437 | template <typename Iterator, typename Position> | |
438 | typename lexer<Iterator, Position>::lexer_data const | |
439 | lexer<Iterator, Position>::init_data_cpp2a[] = | |
440 | { | |
441 | TOKEN_DATA(T_CHAR8_T, "char8_t"), | |
442 | TOKEN_DATA(T_CONCEPT, "concept"), | |
443 | TOKEN_DATA(T_CONSTEVAL, "consteval"), | |
444 | TOKEN_DATA(T_CONSTINIT, "constinit"), | |
445 | TOKEN_DATA(T_CO_AWAIT, "co_await"), | |
446 | TOKEN_DATA(T_CO_RETURN, "co_return"), | |
447 | TOKEN_DATA(T_CO_YIELD, "co_yield"), | |
448 | TOKEN_DATA(T_REQUIRES, "requires"), | |
449 | TOKEN_DATA(T_SPACESHIP, "<=>"), | |
450 | { token_id(0) } // this should be the last entry | |
451 | }; | |
452 | ||
7c673cae FG |
453 | /////////////////////////////////////////////////////////////////////////////// |
454 | // undefine macros, required for regular expression definitions | |
455 | #undef INCLUDEDEF | |
456 | #undef POUNDDEF | |
457 | #undef CCOMMENT | |
458 | #undef PPSPACE | |
459 | #undef DIGIT | |
460 | #undef OCTALDIGIT | |
461 | #undef HEXDIGIT | |
462 | #undef SIGN | |
463 | #undef EXPONENT | |
464 | #undef LONGINTEGER_SUFFIX | |
465 | #undef INTEGER_SUFFIX | |
466 | #undef INTEGER | |
467 | #undef FLOAT_SUFFIX | |
468 | #undef CHAR_SPEC | |
20effc67 | 469 | #undef EXTCHAR_SPEC |
7c673cae FG |
470 | #undef BACKSLASH |
471 | #undef ESCAPESEQ | |
472 | #undef HEXQUAD | |
473 | #undef UNIVERSALCHAR | |
474 | ||
475 | #undef Q | |
476 | #undef TRI | |
477 | #undef OR | |
478 | ||
479 | #undef TOKEN_DATA | |
480 | #undef TOKEN_DATA_EX | |
481 | ||
20effc67 TL |
482 | #undef T_EXTCHARLIT |
483 | #undef T_EXTSTRINGLIT | |
484 | #undef T_EXTRAWSTRINGLIT | |
7c673cae FG |
485 | /////////////////////////////////////////////////////////////////////////////// |
486 | // initialize cpp lexer | |
487 | template <typename Iterator, typename Position> | |
488 | inline | |
489 | lexer<Iterator, Position>::lexer(Iterator const &first, | |
490 | Iterator const &last, Position const &pos, | |
491 | boost::wave::language_support language) | |
492 | : first(first), last(last), | |
20effc67 TL |
493 | filename(pos.get_file()), line(pos.get_line()), column(pos.get_column()), |
494 | at_eof(false), language(language) | |
7c673cae FG |
495 | { |
496 | // if in C99 mode, some of the keywords/operators are not valid | |
497 | if (!boost::wave::need_c99(language)) { | |
498 | for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) { | |
499 | xlexer.register_regex(init_data_cpp[j].tokenregex, | |
500 | init_data_cpp[j].tokenid, init_data_cpp[j].tokencb); | |
501 | } | |
502 | } | |
503 | ||
20effc67 TL |
504 | #if BOOST_WAVE_SUPPORT_CPP0X != 0 |
505 | if (boost::wave::need_cpp0x(language) || boost::wave::need_cpp2a(language)) { | |
506 | for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) { | |
507 | xlexer.register_regex(init_data_cpp0x[j].tokenregex, | |
508 | init_data_cpp0x[j].tokenid, init_data_cpp[j].tokencb); | |
509 | } | |
510 | } | |
511 | #endif | |
512 | ||
513 | #if BOOST_WAVE_SUPPORT_CPP2A != 0 | |
514 | if (boost::wave::need_cpp2a(language) || boost::wave::need_cpp2a(language)) { | |
515 | for (int j = 0; 0 != init_data_cpp2a[j].tokenid; ++j) { | |
516 | xlexer.register_regex(init_data_cpp2a[j].tokenregex, | |
517 | init_data_cpp2a[j].tokenid, init_data_cpp[j].tokencb); | |
518 | } | |
519 | } | |
520 | #endif | |
521 | ||
7c673cae FG |
522 | // tokens valid for C++ and C99 |
523 | for (int i = 0; 0 != init_data[i].tokenid; ++i) { | |
524 | xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid, | |
525 | init_data[i].tokencb); | |
526 | } | |
527 | } | |
528 | ||
529 | /////////////////////////////////////////////////////////////////////////////// | |
530 | // get the next token from the input stream | |
531 | template <typename Iterator, typename Position> | |
532 | inline boost::wave::cpplexer::lex_token<Position>& | |
533 | lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t) | |
534 | { | |
535 | using namespace boost::wave; // to import token ids to this scope | |
536 | ||
537 | if (at_eof) | |
538 | return t = cpplexer::lex_token<Position>(); // return T_EOI | |
539 | ||
540 | std::string tokval; | |
541 | token_id id = xlexer.next_token(first, last, tokval); | |
542 | string_type value = tokval.c_str(); | |
543 | ||
544 | if ((token_id)(-1) == id) | |
545 | id = T_EOF; // end of input reached | |
546 | ||
547 | if (T_IDENTIFIER == id) { | |
548 | // test identifier characters for validity (throws if invalid chars found) | |
549 | if (!boost::wave::need_no_character_validation(language)) { | |
550 | cpplexer::impl::validate_identifier_name(value, line, -1, filename); | |
551 | } | |
552 | } | |
553 | else if (T_STRINGLIT == id || T_CHARLIT == id) { | |
554 | // test literal characters for validity (throws if invalid chars found) | |
555 | if (!boost::wave::need_no_character_validation(language)) { | |
556 | cpplexer::impl::validate_literal(value, line, -1, filename); | |
557 | } | |
558 | } | |
559 | else if (T_EOF == id) { | |
560 | // T_EOF is returned as a valid token, the next call will return T_EOI, | |
561 | // i.e. the actual end of input | |
562 | at_eof = true; | |
563 | value.clear(); | |
564 | } | |
20effc67 TL |
565 | else if (T_NEWLINE == id) { |
566 | ++line; | |
567 | column = 1; | |
568 | } else { | |
569 | column += value.size(); | |
570 | } | |
7c673cae FG |
571 | |
572 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
20effc67 | 573 | cpplexer::lex_token<Position> tok(id, value, Position(filename, line, column)); |
7c673cae FG |
574 | return t = guards.detect_guard(tok); |
575 | #else | |
576 | return t = cpplexer::lex_token<Position>(id, value, | |
20effc67 | 577 | Position(filename, line, column)); |
7c673cae FG |
578 | #endif |
579 | } | |
580 | ||
581 | /////////////////////////////////////////////////////////////////////////////// | |
582 | // | |
583 | // lex_functor | |
584 | // | |
585 | /////////////////////////////////////////////////////////////////////////////// | |
586 | template < | |
587 | typename Iterator, | |
588 | typename Position = boost::wave::util::file_position_type | |
589 | > | |
590 | class xlex_functor | |
591 | : public xlex_input_interface<typename lexer<Iterator, Position>::token_type> | |
592 | { | |
593 | public: | |
594 | ||
595 | typedef typename lexer<Iterator, Position>::token_type token_type; | |
596 | ||
597 | xlex_functor(Iterator const &first, Iterator const &last, | |
598 | Position const &pos, boost::wave::language_support language) | |
599 | : lexer_(first, last, pos, language) | |
600 | {} | |
601 | virtual ~xlex_functor() {} | |
602 | ||
603 | // get the next token from the input stream | |
20effc67 TL |
604 | token_type& get(token_type& t) BOOST_OVERRIDE { return lexer_.get(t); } |
605 | void set_position(Position const &pos) BOOST_OVERRIDE { lexer_.set_position(pos); } | |
7c673cae FG |
606 | |
607 | #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 | |
20effc67 | 608 | bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE |
7c673cae FG |
609 | { return lexer_.has_include_guards(guard_name); } |
610 | #endif | |
611 | ||
612 | private: | |
613 | lexer<Iterator, Position> lexer_; | |
614 | }; | |
615 | ||
616 | } // namespace lexer | |
617 | ||
618 | /////////////////////////////////////////////////////////////////////////////// | |
619 | // | |
620 | // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp) | |
621 | // should be defined inline, if the lex_functor shouldn't be instantiated | |
622 | // separately from the lex_iterator. | |
623 | // | |
624 | // Separate (explicit) instantiation helps to reduce compilation time. | |
625 | // | |
626 | /////////////////////////////////////////////////////////////////////////////// | |
627 | ||
628 | #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0 | |
629 | #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE | |
630 | #else | |
631 | #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline | |
632 | #endif | |
633 | ||
634 | /////////////////////////////////////////////////////////////////////////////// | |
635 | // | |
636 | // The 'new_lexer' function allows the opaque generation of a new lexer object. | |
637 | // It is coupled to the iterator type to allow to decouple the lexer/iterator | |
638 | // configurations at compile time. | |
639 | // | |
640 | // This function is declared inside the xlex_interface.hpp file, which is | |
641 | // referenced by the source file calling the lexer and the source file, which | |
642 | // instantiates the lex_functor. But it is defined here, so it will be | |
643 | // instantiated only while compiling the source file, which instantiates the | |
644 | // lex_functor. While the xlex_interface.hpp file may be included everywhere, | |
645 | // this file (xlex_lexer.hpp) should be included only once. This allows | |
646 | // to decouple the lexer interface from the lexer implementation and reduces | |
647 | // compilation time. | |
648 | // | |
649 | /////////////////////////////////////////////////////////////////////////////// | |
650 | ||
651 | template <typename Iterator, typename Position> | |
652 | BOOST_WAVE_XLEX_NEW_LEXER_INLINE | |
653 | lex_input_interface<boost::wave::cpplexer::lex_token<Position> > * | |
654 | new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first, | |
655 | Iterator const &last, Position const &pos, | |
656 | wave::language_support language) | |
657 | { | |
658 | return new lexer::xlex_functor<Iterator, Position>( | |
659 | first, last, pos, language); | |
660 | } | |
661 | ||
662 | #undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE | |
663 | ||
664 | /////////////////////////////////////////////////////////////////////////////// | |
665 | } // namespace xlex | |
666 | } // namespace cpplexer | |
667 | } // namespace wave | |
668 | } // namespace boost | |
669 | ||
20effc67 | 670 | #endif // !defined(BOOST_XLEX_LEXER_HPP) |