]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/wave/samples/list_includes/lexertl/lexertl_lexer.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / libs / wave / samples / list_includes / lexertl / lexertl_lexer.hpp
1 /*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3 http://www.boost.org/
4
5 Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
6 Software License, Version 1.0. (See accompanying file
7 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8 =============================================================================*/
9
10 #if !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
11 #define BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED
12
13 #include <fstream>
14
15 #include <boost/iterator/iterator_traits.hpp>
16
17 #include <boost/wave/wave_config.hpp>
18 #include <boost/wave/language_support.hpp>
19 #include <boost/wave/token_ids.hpp>
20 #include <boost/wave/util/time_conversion_helper.hpp>
21
22 #include <boost/wave/cpplexer/validate_universal_char.hpp>
23 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
24 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
25 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
26 #include <boost/wave/cpplexer/detect_include_guards.hpp>
27 #endif
28
29 #include "wave_lexertl_config.hpp"
30 #include "../lexertl_iterator.hpp"
31
32 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
33 #include "wave_lexertl_tables.hpp"
34 #else
35 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
36 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
37 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
38 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
39 //#include "lexertl/examples/serialise.hpp>
40 #if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0
41 #include <boost/spirit/home/support/detail/lexer/generate_cpp.hpp>
42 #endif
43 #endif
44
45 ///////////////////////////////////////////////////////////////////////////////
46 namespace boost { namespace wave { namespace cpplexer { namespace lexertl
47 {
48
49 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
50 ///////////////////////////////////////////////////////////////////////////////
51 // The following numbers are the array sizes of the token regex's which we
52 // need to specify to make the CW compiler happy (at least up to V9.5).
53 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
54 #define INIT_DATA_SIZE 176
55 #else
56 #define INIT_DATA_SIZE 159
57 #endif
58 #define INIT_DATA_CPP_SIZE 15
59 #define INIT_DATA_PP_NUMBER_SIZE 2
60 #define INIT_DATA_CPP0X_SIZE 15
61 #define INIT_DATA_CPP2A_SIZE 10
62 #define INIT_MACRO_DATA_SIZE 28
63 #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
64
65 // this is just a hack to have a unique token id not otherwise used by Wave
66 #define T_ANYCTRL T_LAST_TOKEN_ID
67
68 ///////////////////////////////////////////////////////////////////////////////
69 namespace lexer
70 {
71
72 ///////////////////////////////////////////////////////////////////////////////
73 // this is the wrapper for the lexertl lexer library
74 template <typename Iterator, typename Position>
75 class lexertl
76 {
77 private:
78 typedef BOOST_WAVE_STRINGTYPE string_type;
79 typedef typename boost::iterators::iterator_value<Iterator>::type
80 char_type;
81
82 public:
83 wave::token_id next_token(Iterator &first, Iterator const &last,
84 string_type& token_value);
85
86 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
87 lexertl() {}
88 void init_dfa(wave::language_support lang, Position const& pos,
89 bool force_reinit = false) {}
90 bool is_initialized() const { return true; }
91 #else
92 lexertl() : has_compiled_dfa_(false) {}
93 bool init_dfa(wave::language_support lang, Position const& pos,
94 bool force_reinit = false);
95 bool is_initialized() const { return has_compiled_dfa_; }
96
97 // get time of last compilation
98 static std::time_t get_compilation_time()
99 { return compilation_time.get_time(); }
100
101 bool load (std::istream& instrm);
102 bool save (std::ostream& outstrm);
103
104 private:
105 boost::lexer::state_machine state_machine_;
106 bool has_compiled_dfa_;
107
108 // initialization data (regular expressions for the token definitions)
109 struct lexer_macro_data {
110 char_type const *name; // macro name
111 char_type const *macro; // associated macro definition
112 };
113 static lexer_macro_data const init_macro_data[INIT_MACRO_DATA_SIZE]; // macro patterns
114
115 struct lexer_data {
116 token_id tokenid; // token data
117 char_type const *tokenregex; // associated token to match
118 };
119 static lexer_data const init_data[INIT_DATA_SIZE]; // common patterns
120 static lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns
121 static lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns
122 static lexer_data const init_data_cpp0x[INIT_DATA_CPP0X_SIZE]; // C++0X only patterns
123 static lexer_data const init_data_cpp2a[INIT_DATA_CPP2A_SIZE]; // C++2A only patterns
124
125 // helper for calculation of the time of last compilation
126 static boost::wave::util::time_conversion_helper compilation_time;
127 #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
128 };
129
130 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
131 ///////////////////////////////////////////////////////////////////////////////
132 // get time of last compilation of this file
133 template <typename IteratorT, typename PositionT>
134 boost::wave::util::time_conversion_helper
135 lexertl<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
136
137 ///////////////////////////////////////////////////////////////////////////////
138 // token regex definitions
139
140 // helper for initializing token data and macro definitions
141 #define Q(c) "\\" c
142 #define TRI(c) "{TRI}" c
143 #define OR "|"
144 #define MACRO_DATA(name, macro) { name, macro }
145 #define TOKEN_DATA(id, regex) { id, regex }
146
147 // lexertl macro definitions
148 template <typename Iterator, typename Position>
149 typename lexertl<Iterator, Position>::lexer_macro_data const
150 lexertl<Iterator, Position>::init_macro_data[INIT_MACRO_DATA_SIZE] =
151 {
152 MACRO_DATA("ANY", "[\t\v\f\r\n\\040-\\377]"),
153 MACRO_DATA("ANYCTRL", "[\\000-\\037]"),
154 MACRO_DATA("TRI", "\\?\\?"),
155 MACRO_DATA("BLANK", "[ \t\v\f]"),
156 MACRO_DATA("CCOMMENT", "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"),
157 MACRO_DATA("PPSPACE", "(" "{BLANK}" OR "{CCOMMENT}" ")*"),
158 MACRO_DATA("OCTALDIGIT", "[0-7]"),
159 MACRO_DATA("DIGIT", "[0-9]"),
160 MACRO_DATA("HEXDIGIT", "[0-9a-fA-F]"),
161 MACRO_DATA("OPTSIGN", "[-+]?"),
162 MACRO_DATA("EXPSTART", "[eE][-+]"),
163 MACRO_DATA("EXPONENT", "([eE]{OPTSIGN}{DIGIT}+)"),
164 MACRO_DATA("NONDIGIT", "[a-zA-Z_]"),
165 MACRO_DATA("INTEGER", "(" "(0x|0X){HEXDIGIT}+" OR "0{OCTALDIGIT}*" OR "[1-9]{DIGIT}*" ")"),
166 MACRO_DATA("INTEGER_SUFFIX", "(" "[uU][lL]?" OR "[lL][uU]?" ")"),
167 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
168 MACRO_DATA("LONGINTEGER_SUFFIX", "([uU](ll|LL)|(ll|LL)[uU]?|i64)"),
169 #else
170 MACRO_DATA("LONGINTEGER_SUFFIX", "([uU](ll|LL)|(ll|LL)[uU]?)"),
171 #endif
172 MACRO_DATA("FLOAT_SUFFIX", "(" "[fF][lL]?" OR "[lL][fF]?" ")"),
173 MACRO_DATA("CHAR_SPEC", "L?"),
174 MACRO_DATA("EXTCHAR_SPEC", "(" "[uU]" OR "u8" ")"),
175 MACRO_DATA("BACKSLASH", "(" Q("\\") OR TRI(Q("/")) ")"),
176 MACRO_DATA("ESCAPESEQ", "{BACKSLASH}([abfnrtv?'\"]|{BACKSLASH}|x{HEXDIGIT}+|{OCTALDIGIT}{1,3})"),
177 MACRO_DATA("HEXQUAD", "{HEXDIGIT}{4}"),
178 MACRO_DATA("UNIVERSALCHAR", "{BACKSLASH}(u{HEXQUAD}|U{HEXQUAD}{2})"),
179 MACRO_DATA("POUNDDEF", "(" "#" OR TRI("=") OR Q("%:") ")"),
180 MACRO_DATA("NEWLINEDEF", "(" "\\n" OR "\\r" OR "\\r\\n" ")"),
181 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
182 MACRO_DATA("INCLUDEDEF", "(include|include_next)"),
183 #else
184 MACRO_DATA("INCLUDEDEF", "include"),
185 #endif
186 MACRO_DATA("PP_NUMBERDEF", "\\.?{DIGIT}({DIGIT}|{NONDIGIT}|{EXPSTART}|\\.)*"),
187 MACRO_DATA(NULL, NULL) // should be the last entry
188 };
189
190 // common C++/C99 token definitions
191 template <typename Iterator, typename Position>
192 typename lexertl<Iterator, Position>::lexer_data const
193 lexertl<Iterator, Position>::init_data[INIT_DATA_SIZE] =
194 {
195 TOKEN_DATA(T_AND, "&"),
196 TOKEN_DATA(T_ANDAND, "&&"),
197 TOKEN_DATA(T_ASSIGN, "="),
198 TOKEN_DATA(T_ANDASSIGN, "&="),
199 TOKEN_DATA(T_OR, Q("|")),
200 TOKEN_DATA(T_OR_TRIGRAPH, "{TRI}!"),
201 TOKEN_DATA(T_ORASSIGN, Q("|=")),
202 TOKEN_DATA(T_ORASSIGN_TRIGRAPH, "{TRI}!="),
203 TOKEN_DATA(T_XOR, Q("^")),
204 TOKEN_DATA(T_XOR_TRIGRAPH, "{TRI}'"),
205 TOKEN_DATA(T_XORASSIGN, Q("^=")),
206 TOKEN_DATA(T_XORASSIGN_TRIGRAPH, "{TRI}'="),
207 TOKEN_DATA(T_COMMA, ","),
208 TOKEN_DATA(T_COLON, ":"),
209 TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
210 TOKEN_DATA(T_DIVIDE, Q("/")),
211 TOKEN_DATA(T_DOT, Q(".")),
212 TOKEN_DATA(T_ELLIPSIS, Q(".") "{3}"),
213 TOKEN_DATA(T_EQUAL, "=="),
214 TOKEN_DATA(T_GREATER, ">"),
215 TOKEN_DATA(T_GREATEREQUAL, ">="),
216 TOKEN_DATA(T_LEFTBRACE, Q("{")),
217 TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
218 TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, "{TRI}<"),
219 TOKEN_DATA(T_LESS, "<"),
220 TOKEN_DATA(T_LESSEQUAL, "<="),
221 TOKEN_DATA(T_LEFTPAREN, Q("(")),
222 TOKEN_DATA(T_LEFTBRACKET, Q("[")),
223 TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
224 TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, "{TRI}" Q("(")),
225 TOKEN_DATA(T_MINUS, Q("-")),
226 TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
227 TOKEN_DATA(T_MINUSMINUS, Q("-") "{2}"),
228 TOKEN_DATA(T_PERCENT, Q("%")),
229 TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
230 TOKEN_DATA(T_NOT, "!"),
231 TOKEN_DATA(T_NOTEQUAL, "!="),
232 TOKEN_DATA(T_OROR, Q("|") "{2}"),
233 TOKEN_DATA(T_OROR_TRIGRAPH, "{TRI}!\\||\\|{TRI}!|{TRI}!{TRI}!"),
234 TOKEN_DATA(T_PLUS, Q("+")),
235 TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
236 TOKEN_DATA(T_PLUSPLUS, Q("+") "{2}"),
237 TOKEN_DATA(T_ARROW, Q("->")),
238 TOKEN_DATA(T_QUESTION_MARK, Q("?")),
239 TOKEN_DATA(T_RIGHTBRACE, Q("}")),
240 TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
241 TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, "{TRI}>"),
242 TOKEN_DATA(T_RIGHTPAREN, Q(")")),
243 TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
244 TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
245 TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, "{TRI}" Q(")")),
246 TOKEN_DATA(T_SEMICOLON, ";"),
247 TOKEN_DATA(T_SHIFTLEFT, "<<"),
248 TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
249 TOKEN_DATA(T_SHIFTRIGHT, ">>"),
250 TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
251 TOKEN_DATA(T_STAR, Q("*")),
252 TOKEN_DATA(T_COMPL, Q("~")),
253 TOKEN_DATA(T_COMPL_TRIGRAPH, "{TRI}-"),
254 TOKEN_DATA(T_STARASSIGN, Q("*=")),
255 TOKEN_DATA(T_ASM, "asm"),
256 TOKEN_DATA(T_AUTO, "auto"),
257 TOKEN_DATA(T_BOOL, "bool"),
258 TOKEN_DATA(T_FALSE, "false"),
259 TOKEN_DATA(T_TRUE, "true"),
260 TOKEN_DATA(T_BREAK, "break"),
261 TOKEN_DATA(T_CASE, "case"),
262 TOKEN_DATA(T_CATCH, "catch"),
263 TOKEN_DATA(T_CHAR, "char"),
264 TOKEN_DATA(T_CLASS, "class"),
265 TOKEN_DATA(T_CONST, "const"),
266 TOKEN_DATA(T_CONSTCAST, "const_cast"),
267 TOKEN_DATA(T_CONTINUE, "continue"),
268 TOKEN_DATA(T_DEFAULT, "default"),
269 TOKEN_DATA(T_DELETE, "delete"),
270 TOKEN_DATA(T_DO, "do"),
271 TOKEN_DATA(T_DOUBLE, "double"),
272 TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
273 TOKEN_DATA(T_ELSE, "else"),
274 TOKEN_DATA(T_ENUM, "enum"),
275 TOKEN_DATA(T_EXPLICIT, "explicit"),
276 TOKEN_DATA(T_EXPORT, "export"),
277 TOKEN_DATA(T_EXTERN, "extern"),
278 TOKEN_DATA(T_FLOAT, "float"),
279 TOKEN_DATA(T_FOR, "for"),
280 TOKEN_DATA(T_FRIEND, "friend"),
281 TOKEN_DATA(T_GOTO, "goto"),
282 TOKEN_DATA(T_IF, "if"),
283 TOKEN_DATA(T_INLINE, "inline"),
284 TOKEN_DATA(T_INT, "int"),
285 TOKEN_DATA(T_LONG, "long"),
286 TOKEN_DATA(T_MUTABLE, "mutable"),
287 TOKEN_DATA(T_NAMESPACE, "namespace"),
288 TOKEN_DATA(T_NEW, "new"),
289 TOKEN_DATA(T_OPERATOR, "operator"),
290 TOKEN_DATA(T_PRIVATE, "private"),
291 TOKEN_DATA(T_PROTECTED, "protected"),
292 TOKEN_DATA(T_PUBLIC, "public"),
293 TOKEN_DATA(T_REGISTER, "register"),
294 TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
295 TOKEN_DATA(T_RETURN, "return"),
296 TOKEN_DATA(T_SHORT, "short"),
297 TOKEN_DATA(T_SIGNED, "signed"),
298 TOKEN_DATA(T_SIZEOF, "sizeof"),
299 TOKEN_DATA(T_STATIC, "static"),
300 TOKEN_DATA(T_STATICCAST, "static_cast"),
301 TOKEN_DATA(T_STRUCT, "struct"),
302 TOKEN_DATA(T_SWITCH, "switch"),
303 TOKEN_DATA(T_TEMPLATE, "template"),
304 TOKEN_DATA(T_THIS, "this"),
305 TOKEN_DATA(T_THROW, "throw"),
306 TOKEN_DATA(T_TRY, "try"),
307 TOKEN_DATA(T_TYPEDEF, "typedef"),
308 TOKEN_DATA(T_TYPEID, "typeid"),
309 TOKEN_DATA(T_TYPENAME, "typename"),
310 TOKEN_DATA(T_UNION, "union"),
311 TOKEN_DATA(T_UNSIGNED, "unsigned"),
312 TOKEN_DATA(T_USING, "using"),
313 TOKEN_DATA(T_VIRTUAL, "virtual"),
314 TOKEN_DATA(T_VOID, "void"),
315 TOKEN_DATA(T_VOLATILE, "volatile"),
316 TOKEN_DATA(T_WCHART, "wchar_t"),
317 TOKEN_DATA(T_WHILE, "while"),
318 TOKEN_DATA(T_PP_DEFINE, "{POUNDDEF}{PPSPACE}define"),
319 TOKEN_DATA(T_PP_IF, "{POUNDDEF}{PPSPACE}if"),
320 TOKEN_DATA(T_PP_IFDEF, "{POUNDDEF}{PPSPACE}ifdef"),
321 TOKEN_DATA(T_PP_IFNDEF, "{POUNDDEF}{PPSPACE}ifndef"),
322 TOKEN_DATA(T_PP_ELSE, "{POUNDDEF}{PPSPACE}else"),
323 TOKEN_DATA(T_PP_ELIF, "{POUNDDEF}{PPSPACE}elif"),
324 TOKEN_DATA(T_PP_ENDIF, "{POUNDDEF}{PPSPACE}endif"),
325 TOKEN_DATA(T_PP_ERROR, "{POUNDDEF}{PPSPACE}error"),
326 TOKEN_DATA(T_PP_QHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" Q("\"") "[^\\n\\r\"]+" Q("\"")),
327 TOKEN_DATA(T_PP_HHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" "<" "[^\\n\\r>]+" ">"),
328 TOKEN_DATA(T_PP_INCLUDE, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}"),
329 TOKEN_DATA(T_PP_LINE, "{POUNDDEF}{PPSPACE}line"),
330 TOKEN_DATA(T_PP_PRAGMA, "{POUNDDEF}{PPSPACE}pragma"),
331 TOKEN_DATA(T_PP_UNDEF, "{POUNDDEF}{PPSPACE}undef"),
332 TOKEN_DATA(T_PP_WARNING, "{POUNDDEF}{PPSPACE}warning"),
333 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
334 TOKEN_DATA(T_MSEXT_INT8, "__int8"),
335 TOKEN_DATA(T_MSEXT_INT16, "__int16"),
336 TOKEN_DATA(T_MSEXT_INT32, "__int32"),
337 TOKEN_DATA(T_MSEXT_INT64, "__int64"),
338 TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
339 TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
340 TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
341 TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
342 TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
343 TOKEN_DATA(T_MSEXT_TRY , "__try"),
344 TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
345 TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
346 TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
347 TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
348 TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
349 TOKEN_DATA(T_MSEXT_PP_REGION, "{POUNDDEF}{PPSPACE}region"),
350 TOKEN_DATA(T_MSEXT_PP_ENDREGION, "{POUNDDEF}{PPSPACE}endregion"),
351 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
352 TOKEN_DATA(T_LONGINTLIT, "{INTEGER}{LONGINTEGER_SUFFIX}"),
353 TOKEN_DATA(T_INTLIT, "{INTEGER}{INTEGER_SUFFIX}?"),
354 TOKEN_DATA(T_FLOATLIT,
355 "(" "{DIGIT}*" Q(".") "{DIGIT}+" OR "{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?" OR
356 "{DIGIT}+{EXPONENT}{FLOAT_SUFFIX}?"),
357 #if BOOST_WAVE_USE_STRICT_LEXER != 0
358 TOKEN_DATA(T_IDENTIFIER,
359 "(" "{NONDIGIT}" OR "{UNIVERSALCHAR}" ")"
360 "(" "{NONDIGIT}" OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
361 #else
362 TOKEN_DATA(T_IDENTIFIER,
363 "(" "{NONDIGIT}" OR Q("$") OR "{UNIVERSALCHAR}" ")"
364 "(" "{NONDIGIT}" OR Q("$") OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
365 #endif
366 TOKEN_DATA(T_CCOMMENT, "{CCOMMENT}"),
367 TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") "{NEWLINEDEF}" ),
368 TOKEN_DATA(T_CHARLIT,
369 "{CHAR_SPEC}" "'" "({ESCAPESEQ}|[^\\n\\r']|{UNIVERSALCHAR})+" "'"),
370 TOKEN_DATA(T_STRINGLIT,
371 "{CHAR_SPEC}" Q("\"") "({ESCAPESEQ}|[^\\n\\r\"]|{UNIVERSALCHAR})*" Q("\"")),
372 TOKEN_DATA(T_SPACE, "{BLANK}+"),
373 TOKEN_DATA(T_CONTLINE, Q("\\") "\\n"),
374 TOKEN_DATA(T_NEWLINE, "{NEWLINEDEF}"),
375 TOKEN_DATA(T_POUND_POUND, "##"),
376 TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
377 TOKEN_DATA(T_POUND_POUND_TRIGRAPH, "({TRI}=){2}"),
378 TOKEN_DATA(T_POUND, "#"),
379 TOKEN_DATA(T_POUND_ALT, Q("%:")),
380 TOKEN_DATA(T_POUND_TRIGRAPH, "{TRI}="),
381 TOKEN_DATA(T_ANY_TRIGRAPH, "{TRI}\\/"),
382 TOKEN_DATA(T_ANY, "{ANY}"),
383 TOKEN_DATA(T_ANYCTRL, "{ANYCTRL}"), // this should be the last recognized token
384 { token_id(0) } // this should be the last entry
385 };
386
387 // C++ only token definitions
388 template <typename Iterator, typename Position>
389 typename lexertl<Iterator, Position>::lexer_data const
390 lexertl<Iterator, Position>::init_data_cpp[INIT_DATA_CPP_SIZE] =
391 {
392 TOKEN_DATA(T_AND_ALT, "bitand"),
393 TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
394 TOKEN_DATA(T_ANDAND_ALT, "and"),
395 TOKEN_DATA(T_OR_ALT, "bitor"),
396 TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
397 TOKEN_DATA(T_OROR_ALT, "or"),
398 TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
399 TOKEN_DATA(T_XOR_ALT, "xor"),
400 TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
401 TOKEN_DATA(T_NOT_ALT, "not"),
402 TOKEN_DATA(T_COMPL_ALT, "compl"),
403 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
404 TOKEN_DATA(T_IMPORT, "import"),
405 #endif
406 TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
407 TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
408 TOKEN_DATA(T_COLON_COLON, "::"),
409 { token_id(0) } // this should be the last entry
410 };
411
412 // pp-number specific token definitions
413 template <typename Iterator, typename Position>
414 typename lexertl<Iterator, Position>::lexer_data const
415 lexertl<Iterator, Position>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
416 {
417 TOKEN_DATA(T_PP_NUMBER, "{PP_NUMBERDEF}"),
418 { token_id(0) } // this should be the last entry
419 };
420
421 // C++11 specific token definitions
422
423 #define T_EXTCHARLIT token_id(T_CHARLIT|AltTokenType)
424 #define T_EXTSTRINGLIT token_id(T_STRINGLIT|AltTokenType)
425 #define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
426
427 template <typename Iterator, typename Position>
428 typename lexertl<Iterator, Position>::lexer_data const
429 lexertl<Iterator, Position>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
430 {
431 TOKEN_DATA(T_EXTCHARLIT, "{EXTCHAR_SPEC}" "'"
432 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\n\\r\\\\']" ")+" "'"),
433 TOKEN_DATA(T_EXTSTRINGLIT, "{EXTCHAR_SPEC}" Q("\"")
434 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
435 TOKEN_DATA(T_RAWSTRINGLIT, "{CHAR_SPEC}" "R" Q("\"")
436 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\\\\"]" ")*" Q("\"")),
437 TOKEN_DATA(T_EXTRAWSTRINGLIT, "{EXTCHAR_SPEC}" "R" Q("\"")
438 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\\\\"]" ")*" Q("\"")),
439 TOKEN_DATA(T_ALIGNAS, "alignas"),
440 TOKEN_DATA(T_ALIGNOF, "alignof"),
441 TOKEN_DATA(T_CHAR16_T, "char16_t"),
442 TOKEN_DATA(T_CHAR32_T, "char32_t"),
443 TOKEN_DATA(T_CONSTEXPR, "constexpr"),
444 TOKEN_DATA(T_DECLTYPE, "decltype"),
445 TOKEN_DATA(T_NOEXCEPT, "noexcept"),
446 TOKEN_DATA(T_NULLPTR, "nullptr"),
447 TOKEN_DATA(T_STATICASSERT, "static_assert"),
448 TOKEN_DATA(T_THREADLOCAL, "thread_local"),
449 { token_id(0) } // this should be the last entry
450 };
451
452 // C++20 specific token definitions
453
454 template <typename Iterator, typename Position>
455 typename lexertl<Iterator, Position>::lexer_data const
456 lexertl<Iterator, Position>::init_data_cpp2a[INIT_DATA_CPP2A_SIZE] =
457 {
458 TOKEN_DATA(T_CHAR8_T, "char8_t"),
459 TOKEN_DATA(T_CONCEPT, "concept"),
460 TOKEN_DATA(T_CONSTEVAL, "consteval"),
461 TOKEN_DATA(T_CONSTINIT, "constinit"),
462 TOKEN_DATA(T_CO_AWAIT, "co_await"),
463 TOKEN_DATA(T_CO_RETURN, "co_return"),
464 TOKEN_DATA(T_CO_YIELD, "co_yield"),
465 TOKEN_DATA(T_REQUIRES, "requires"),
466 TOKEN_DATA(T_SPACESHIP, "<=>"),
467 { token_id(0) } // this should be the last entry
468 };
469
470 #undef MACRO_DATA
471 #undef TOKEN_DATA
472 #undef OR
473 #undef TRI
474 #undef Q
475
476 ///////////////////////////////////////////////////////////////////////////////
477 // initialize lexertl lexer from C++ token regex's
478 template <typename Iterator, typename Position>
479 inline bool
480 lexertl<Iterator, Position>::init_dfa(wave::language_support lang,
481 Position const& pos, bool force_reinit)
482 {
483 if (has_compiled_dfa_)
484 return true;
485
486 std::ifstream dfa_in("wave_lexertl_lexer.dfa", std::ios::in|std::ios::binary);
487
488 if (force_reinit || !dfa_in.is_open() || !load (dfa_in))
489 {
490 dfa_in.close();
491
492 state_machine_.clear();
493
494 // register macro definitions
495 boost::lexer::rules rules;
496 for (int k = 0; NULL != init_macro_data[k].name; ++k) {
497 rules.add_macro(init_macro_data[k].name, init_macro_data[k].macro);
498 }
499
500 // if pp-numbers should be preferred, insert the corresponding rule first
501 if (wave::need_prefer_pp_numbers(lang)) {
502 for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
503 rules.add(init_data_pp_number[j].tokenregex,
504 init_data_pp_number[j].tokenid);
505 }
506 }
507
508 // if in C99 mode, some of the keywords are not valid
509 if (!wave::need_c99(lang)) {
510 for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
511 rules.add(init_data_cpp[j].tokenregex,
512 init_data_cpp[j].tokenid);
513 }
514 }
515
516 // if in C++0x mode, add appropriate keywords
517 #if BOOST_WAVE_SUPPORT_CPP0X != 0
518 if (wave::need_cpp0x(lang) || wave::need_cpp2a(lang)) {
519 for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
520 rules.add(init_data_cpp0x[j].tokenregex,
521 init_data_cpp0x[j].tokenid);
522 }
523 }
524 #endif
525
526 // if in C++2a mode, add those keywords
527 #if BOOST_WAVE_SUPPORT_CPP2A != 0
528 if (wave::need_cpp2a(lang)) {
529 for (int j = 0; 0 != init_data_cpp2a[j].tokenid; ++j) {
530 rules.add(init_data_cpp2a[j].tokenregex,
531 init_data_cpp2a[j].tokenid);
532 }
533 }
534 #endif
535
536
537 for (int i = 0; 0 != init_data[i].tokenid; ++i) {
538 rules.add(init_data[i].tokenregex, init_data[i].tokenid);
539 }
540
541 // generate minimized DFA
542 try {
543 boost::lexer::generator::build (rules, state_machine_);
544 boost::lexer::generator::minimise (state_machine_);
545 }
546 catch (std::runtime_error const& e) {
547 string_type msg("lexertl initialization error: ");
548 msg += e.what();
549 BOOST_WAVE_LEXER_THROW(wave::cpplexer::lexing_exception,
550 unexpected_error, msg.c_str(),
551 pos.get_line(), pos.get_column(), pos.get_file().c_str());
552 return false;
553 }
554
555 std::ofstream dfa_out ("wave_lexertl_lexer.dfa",
556 std::ios::out|std::ios::binary|std::ios::trunc);
557
558 if (dfa_out.is_open())
559 save (dfa_out);
560 }
561
562 has_compiled_dfa_ = true;
563 return true;
564 }
565 #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
566
567 ///////////////////////////////////////////////////////////////////////////////
568 // return next token from the input stream
569 template <typename Iterator, typename Position>
570 inline wave::token_id
571 lexertl<Iterator, Position>::next_token(Iterator &first, Iterator const &last,
572 string_type& token_value)
573 {
574 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
575 size_t const* const lookup = &state_machine_.data()._lookup[0]->front ();
576 size_t const dfa_alphabet = state_machine_.data()._dfa_alphabet[0];
577 size_t const* dfa = &state_machine_.data()._dfa[0]->front();
578 #else
579 // set up pointers from static data
580 size_t const* lookup = lookup_;
581 size_t const dfa_alphabet = dfa_alphabet_;
582 size_t const* dfa = dfa_;
583 #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
584 size_t const* ptr = dfa + dfa_alphabet;
585
586 Iterator curr = first;
587 Iterator end_token = first;
588 bool end_state = (*ptr != 0);
589 size_t id = *(ptr + 1);
590
591 while (curr != last) {
592 size_t const state = ptr[lookup[int(*curr)]];
593 if (0 == state)
594 break;
595 ++curr;
596
597 ptr = &dfa[state * dfa_alphabet];
598
599 if (0 != *ptr) {
600 end_state = true;
601 id = *(ptr + 1);
602 end_token = curr;
603 }
604 }
605
606 if (end_state) {
607 if (T_ANY == id) {
608 id = TOKEN_FROM_ID(*first, UnknownTokenType);
609 }
610
611 // return longest match
612 string_type str(first, end_token);
613 token_value.swap(str);
614 first = end_token;
615 return wave::token_id(id);
616 }
617 return T_EOF;
618 }
619
620 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
621 ///////////////////////////////////////////////////////////////////////////////
622 // load the DFA tables to/from a stream
623 template <typename Iterator, typename Position>
624 inline bool
625 lexertl<Iterator, Position>::load (std::istream& instrm)
626 {
627 // #if !defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
628 // std::size_t version = 0;
629 // boost::lexer::serialise::load_as_binary(instrm, state_machine_, version);
630 // if (version != (std::size_t)get_compilation_time())
631 // return false; // too new for us
632 // return instrm.good();
633 // #else
634 return false; // always create the dfa when generating the C++ code
635 // #endif
636 }
637
638 ///////////////////////////////////////////////////////////////////////////////
639 // save the DFA tables to/from a stream
640 template <typename Iterator, typename Position>
641 inline bool
642 lexertl<Iterator, Position>::save (std::ostream& outstrm)
643 {
644 #if defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
645 boost::lexer::generate_cpp(state_machine_, outstrm);
646 #else
647 // boost::lexer::serialise::save_as_binary(state_machine_, outstrm,
648 // (std::size_t)get_compilation_time());
649 #endif
650 return outstrm.good();
651 }
652 #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
653
654 ///////////////////////////////////////////////////////////////////////////////
655 } // namespace lexer
656
657 ///////////////////////////////////////////////////////////////////////////////
658 template <typename Iterator, typename Position = wave::util::file_position_type>
659 class lexertl_functor
660 : public lexertl_input_interface<wave::cpplexer::lex_token<Position> >
661 {
662 public:
663 typedef wave::util::position_iterator<Iterator, Position> iterator_type;
664 typedef typename boost::iterators::iterator_value<Iterator>::type
665 char_type;
666 typedef BOOST_WAVE_STRINGTYPE string_type;
667 typedef wave::cpplexer::lex_token<Position> token_type;
668
669 lexertl_functor(Iterator const &first_, Iterator const &last_,
670 Position const &pos_, wave::language_support language)
671 : first(first_, last_, pos_), language(language), at_eof(false)
672 {
673 lexer_.init_dfa(language, pos_);
674
675 #if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0
676 std::ofstream os("wave_lexertl_tables_next_token.hpp");
677 // generates a next_token function with an incompatible interface
678 // to lexertl::next_token(), but you can extract the necessary tables
679 // and replace them manually:
680 lexer_.save(os);
681 #endif
682 }
683 ~lexertl_functor() {}
684
685 // get the next token from the input stream
686 token_type& get(token_type& result) BOOST_OVERRIDE
687 {
688 if (lexer_.is_initialized() && !at_eof) {
689 do {
690 // generate and return the next token
691 string_type token_val;
692 Position pos = first.get_position(); // begin of token position
693 wave::token_id id = lexer_.next_token(first, last, token_val);
694
695 if (T_CONTLINE != id) {
696 // The cast should avoid spurious warnings about missing case labels
697 // for the other token ids's.
698 switch (id) {
699 case T_IDENTIFIER:
700 // test identifier characters for validity (throws if
701 // invalid chars found)
702 if (!wave::need_no_character_validation(language)) {
703 using wave::cpplexer::impl::validate_identifier_name;
704 validate_identifier_name(token_val,
705 pos.get_line(), pos.get_column(), pos.get_file());
706 }
707 break;
708
709 case T_STRINGLIT:
710 case T_CHARLIT:
711 // test literal characters for validity (throws if invalid
712 // chars found)
713 if (wave::need_convert_trigraphs(language)) {
714 using wave::cpplexer::impl::convert_trigraphs;
715 token_val = convert_trigraphs(token_val);
716 }
717 if (!wave::need_no_character_validation(language)) {
718 using wave::cpplexer::impl::validate_literal;
719 validate_literal(token_val,
720 pos.get_line(), pos.get_column(), pos.get_file());
721 }
722 break;
723
724 case T_LONGINTLIT: // supported in C99 and long_long mode
725 if (!wave::need_long_long(language)) {
726 // syntax error: not allowed in C++ mode
727 BOOST_WAVE_LEXER_THROW(
728 wave::cpplexer::lexing_exception,
729 invalid_long_long_literal, token_val.c_str(),
730 pos.get_line(), pos.get_column(),
731 pos.get_file().c_str());
732 }
733 break;
734
735 case T_PP_HHEADER:
736 case T_PP_QHEADER:
737 case T_PP_INCLUDE:
738 // convert to the corresponding ..._next token, if appropriate
739 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
740 {
741 // Skip '#' and whitespace and see whether we find an
742 // 'include_next' here.
743 typename string_type::size_type start = token_val.find("include");
744 if (0 == token_val.compare(start, 12, "include_next", 12))
745 id = token_id(id | AltTokenType);
746 }
747 #endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
748 break;
749
750 case T_EOF:
751 // T_EOF is returned as a valid token, the next call will
752 // return T_EOI, i.e. the actual end of input
753 at_eof = true;
754 token_val.clear();
755 break;
756
757 case T_OR_TRIGRAPH:
758 case T_XOR_TRIGRAPH:
759 case T_LEFTBRACE_TRIGRAPH:
760 case T_RIGHTBRACE_TRIGRAPH:
761 case T_LEFTBRACKET_TRIGRAPH:
762 case T_RIGHTBRACKET_TRIGRAPH:
763 case T_COMPL_TRIGRAPH:
764 case T_POUND_TRIGRAPH:
765 case T_ANY_TRIGRAPH:
766 if (wave::need_convert_trigraphs(language))
767 {
768 using wave::cpplexer::impl::convert_trigraph;
769 token_val = convert_trigraph(token_val);
770 }
771 break;
772
773 case T_ANYCTRL:
774 // matched some unexpected character
775 {
776 // 21 is the max required size for a 64 bit integer
777 // represented as a string
778 char buffer[22];
779 string_type msg("invalid character in input stream: '0x");
780
781 // for some systems sprintf is in namespace std
782 using namespace std;
783 sprintf(buffer, "%02x'", token_val[0]);
784 msg += buffer;
785 BOOST_WAVE_LEXER_THROW(
786 wave::cpplexer::lexing_exception,
787 generic_lexing_error,
788 msg.c_str(), pos.get_line(), pos.get_column(),
789 pos.get_file().c_str());
790 }
791 break;
792 }
793
794 result = token_type(id, token_val, pos);
795 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
796 return guards.detect_guard(result);
797 #else
798 return result;
799 #endif
800 }
801 } while (true); // skip the T_CONTLINE token
802 }
803 return result = token_type(); // return T_EOI
804 }
805
806 void set_position(Position const &pos) BOOST_OVERRIDE
807 {
808 // set position has to change the file name and line number only
809 first.get_position().set_file(pos.get_file());
810 first.get_position().set_line(pos.get_line());
811 }
812
813 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
814 bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
815 { return guards.detected(guard_name); }
816 #endif
817
818 private:
819 iterator_type first;
820 iterator_type last;
821
822 wave::language_support language;
823 bool at_eof;
824 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
825 include_guards<token_type> guards;
826 #endif
827
828 static lexer::lexertl<iterator_type, Position> lexer_;
829 };
830
831 template <typename Iterator, typename Position>
832 lexer::lexertl<
833 typename lexertl_functor<Iterator, Position>::iterator_type, Position>
834 lexertl_functor<Iterator, Position>::lexer_;
835
836 #undef INIT_DATA_SIZE
837 #undef INIT_DATA_CPP_SIZE
838 #undef INIT_DATA_PP_NUMBER_SIZE
839 #undef INIT_MACRO_DATA_SIZE
840 #undef T_ANYCTRL
841
842 #undef T_EXTCHARLIT
843 #undef T_EXTSTRINGLIT
844 #undef T_EXTRAWSTRINGLIT
845
846 ///////////////////////////////////////////////////////////////////////////////
847 //
848 // The new_lexer_gen<>::new_lexer function (declared in lexertl_interface.hpp)
849 // should be defined inline, if the lex_functor shouldn't be instantiated
850 // separately from the lex_iterator.
851 //
852 // Separate (explicit) instantiation helps to reduce compilation time.
853 //
854 ///////////////////////////////////////////////////////////////////////////////
855
856 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
857 #define BOOST_WAVE_FLEX_NEW_LEXER_INLINE
858 #else
859 #define BOOST_WAVE_FLEX_NEW_LEXER_INLINE inline
860 #endif
861
862 ///////////////////////////////////////////////////////////////////////////////
863 //
864 // The 'new_lexer' function allows the opaque generation of a new lexer object.
865 // It is coupled to the iterator type to allow to decouple the lexer/iterator
866 // configurations at compile time.
867 //
868 // This function is declared inside the xlex_interface.hpp file, which is
869 // referenced by the source file calling the lexer and the source file, which
870 // instantiates the lex_functor. But it is defined here, so it will be
871 // instantiated only while compiling the source file, which instantiates the
872 // lex_functor. While the xlex_interface.hpp file may be included everywhere,
873 // this file (xlex_lexer.hpp) should be included only once. This allows
874 // to decouple the lexer interface from the lexer implementation and reduces
875 // compilation time.
876 //
877 ///////////////////////////////////////////////////////////////////////////////
878
879 template <typename Iterator, typename Position>
880 BOOST_WAVE_FLEX_NEW_LEXER_INLINE
881 wave::cpplexer::lex_input_interface<wave::cpplexer::lex_token<Position> > *
882 new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
883 Iterator const &last, Position const &pos, wave::language_support language)
884 {
885 return new lexertl_functor<Iterator, Position>(first, last, pos, language);
886 }
887
888 #undef BOOST_WAVE_FLEX_NEW_LEXER_INLINE
889
890 ///////////////////////////////////////////////////////////////////////////////
891 }}}} // namespace boost::wave::cpplexer::lexertl
892
893 #endif // !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
894