ceph/src/boost/libs/wave/samples/list_includes/lexertl/lexertl_lexer.hpp

   1 /*=============================================================================
   2     Boost.Wave: A Standard compliant C++ preprocessor library
   3     http://www.boost.org/
   4
   5     Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
   6     Software License, Version 1.0. (See accompanying file
   7     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   8 =============================================================================*/
   9
  10 #if !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
  11 #define BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED
  12
  13 #include <fstream>
  14
  15 #include <boost/iterator/iterator_traits.hpp>
  16
  17 #include <boost/wave/wave_config.hpp>
  18 #include <boost/wave/language_support.hpp>
  19 #include <boost/wave/token_ids.hpp>
  20 #include <boost/wave/util/time_conversion_helper.hpp>
  21
  22 #include <boost/wave/cpplexer/validate_universal_char.hpp>
  23 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
  24 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  25 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  26 #include <boost/wave/cpplexer/detect_include_guards.hpp>
  27 #endif
  28
  29 #include "wave_lexertl_config.hpp"
  30 #include "../lexertl_iterator.hpp"
  31
  32 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
  33 #include "wave_lexertl_tables.hpp"
  34 #else
  35 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
  36 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
  37 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  38 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  39 //#include "lexertl/examples/serialise.hpp>
  40 #if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0
  41 #include <boost/spirit/home/support/detail/lexer/generate_cpp.hpp>
  42 #endif
  43 #endif
  44
  45 ///////////////////////////////////////////////////////////////////////////////
  46 namespace boost { namespace wave { namespace cpplexer { namespace lexertl
  47 {
  48
  49 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  50 ///////////////////////////////////////////////////////////////////////////////
  51 //  The following numbers are the array sizes of the token regex's which we
  52 //  need to specify to make the CW compiler happy (at least up to V9.5).
  53 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  54 #define INIT_DATA_SIZE              176
  55 #else
  56 #define INIT_DATA_SIZE              159
  57 #endif
  58 #define INIT_DATA_CPP_SIZE          15
  59 #define INIT_DATA_PP_NUMBER_SIZE    2
  60 #define INIT_DATA_CPP0X_SIZE        15
  61 #define INIT_DATA_CPP2A_SIZE        10
  62 #define INIT_MACRO_DATA_SIZE        28
  63 #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
  64
  65 //  this is just a hack to have a unique token id not otherwise used by Wave
  66 #define T_ANYCTRL   T_LAST_TOKEN_ID
  67
  68 ///////////////////////////////////////////////////////////////////////////////
  69 namespace lexer
  70 {
  71
  72 ///////////////////////////////////////////////////////////////////////////////
  73 //  this is the wrapper for the lexertl lexer library
  74 template <typename Iterator, typename Position>
  75 class lexertl
  76 {
  77 private:
  78     typedef BOOST_WAVE_STRINGTYPE string_type;
  79     typedef typename boost::iterators::iterator_value<Iterator>::type
  80         char_type;
  81
  82 public:
  83     wave::token_id next_token(Iterator &first, Iterator const &last,
  84         string_type& token_value);
  85
  86 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
  87     lexertl() {}
  88     void init_dfa(wave::language_support lang, Position const& pos,
  89         bool force_reinit = false) {}
  90     bool is_initialized() const { return true; }
  91 #else
  92     lexertl() : has_compiled_dfa_(false) {}
  93     bool init_dfa(wave::language_support lang, Position const& pos,
  94         bool force_reinit = false);
  95     bool is_initialized() const { return has_compiled_dfa_; }
  96
  97 // get time of last compilation
  98     static std::time_t get_compilation_time()
  99         { return compilation_time.get_time(); }
 100
 101     bool load (std::istream& instrm);
 102     bool save (std::ostream& outstrm);
 103
 104 private:
 105     boost::lexer::state_machine state_machine_;
 106     bool has_compiled_dfa_;
 107
 108 // initialization data (regular expressions for the token definitions)
 109     struct lexer_macro_data {
 110         char_type const *name;          // macro name
 111         char_type const *macro;         // associated macro definition
 112     };
 113     static lexer_macro_data const init_macro_data[INIT_MACRO_DATA_SIZE];    // macro patterns
 114
 115     struct lexer_data {
 116         token_id tokenid;               // token data
 117         char_type const *tokenregex;    // associated token to match
 118     };
 119     static lexer_data const init_data[INIT_DATA_SIZE];              // common patterns
 120     static lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE];      // C++ only patterns
 121     static lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE];  // pp-number only patterns
 122     static lexer_data const init_data_cpp0x[INIT_DATA_CPP0X_SIZE];  // C++0X only patterns
 123     static lexer_data const init_data_cpp2a[INIT_DATA_CPP2A_SIZE];  // C++2A only patterns
 124
 125 // helper for calculation of the time of last compilation
 126     static boost::wave::util::time_conversion_helper compilation_time;
 127 #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 128 };
 129
 130 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 131 ///////////////////////////////////////////////////////////////////////////////
 132 // get time of last compilation of this file
 133 template <typename IteratorT, typename PositionT>
 134 boost::wave::util::time_conversion_helper
 135     lexertl<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
 136
 137 ///////////////////////////////////////////////////////////////////////////////
 138 // token regex definitions
 139
 140 //  helper for initializing token data and macro definitions
 141 #define Q(c)                    "\\" c
 142 #define TRI(c)                  "{TRI}" c
 143 #define OR                      "|"
 144 #define MACRO_DATA(name, macro) { name, macro }
 145 #define TOKEN_DATA(id, regex)   { id, regex }
 146
 147 // lexertl macro definitions
 148 template <typename Iterator, typename Position>
 149 typename lexertl<Iterator, Position>::lexer_macro_data const
 150 lexertl<Iterator, Position>::init_macro_data[INIT_MACRO_DATA_SIZE] =
 151 {
 152     MACRO_DATA("ANY", "[\t\v\f\r\n\\040-\\377]"),
 153     MACRO_DATA("ANYCTRL", "[\\000-\\037]"),
 154     MACRO_DATA("TRI", "\\?\\?"),
 155     MACRO_DATA("BLANK", "[ \t\v\f]"),
 156     MACRO_DATA("CCOMMENT", "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"),
 157     MACRO_DATA("PPSPACE", "(" "{BLANK}" OR "{CCOMMENT}" ")*"),
 158     MACRO_DATA("OCTALDIGIT", "[0-7]"),
 159     MACRO_DATA("DIGIT", "[0-9]"),
 160     MACRO_DATA("HEXDIGIT", "[0-9a-fA-F]"),
 161     MACRO_DATA("OPTSIGN", "[-+]?"),
 162     MACRO_DATA("EXPSTART", "[eE][-+]"),
 163     MACRO_DATA("EXPONENT", "([eE]{OPTSIGN}{DIGIT}+)"),
 164     MACRO_DATA("NONDIGIT", "[a-zA-Z_]"),
 165     MACRO_DATA("INTEGER", "(" "(0x|0X){HEXDIGIT}+" OR "0{OCTALDIGIT}*" OR "[1-9]{DIGIT}*" ")"),
 166     MACRO_DATA("INTEGER_SUFFIX", "(" "[uU][lL]?" OR "[lL][uU]?" ")"),
 167 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 168     MACRO_DATA("LONGINTEGER_SUFFIX", "([uU](ll|LL)|(ll|LL)[uU]?|i64)"),
 169 #else
 170     MACRO_DATA("LONGINTEGER_SUFFIX", "([uU](ll|LL)|(ll|LL)[uU]?)"),
 171 #endif
 172     MACRO_DATA("FLOAT_SUFFIX", "(" "[fF][lL]?" OR "[lL][fF]?" ")"),
 173     MACRO_DATA("CHAR_SPEC", "L?"),
 174     MACRO_DATA("EXTCHAR_SPEC", "(" "[uU]" OR "u8" ")"),
 175     MACRO_DATA("BACKSLASH", "(" Q("\\") OR TRI(Q("/")) ")"),
 176     MACRO_DATA("ESCAPESEQ", "{BACKSLASH}([abfnrtv?'\"]|{BACKSLASH}|x{HEXDIGIT}+|{OCTALDIGIT}{1,3})"),
 177     MACRO_DATA("HEXQUAD", "{HEXDIGIT}{4}"),
 178     MACRO_DATA("UNIVERSALCHAR", "{BACKSLASH}(u{HEXQUAD}|U{HEXQUAD}{2})"),
 179     MACRO_DATA("POUNDDEF", "(" "#" OR TRI("=") OR Q("%:") ")"),
 180     MACRO_DATA("NEWLINEDEF", "(" "\\n" OR "\\r" OR "\\r\\n" ")"),
 181 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 182     MACRO_DATA("INCLUDEDEF", "(include|include_next)"),
 183 #else
 184     MACRO_DATA("INCLUDEDEF", "include"),
 185 #endif
 186     MACRO_DATA("PP_NUMBERDEF", "\\.?{DIGIT}({DIGIT}|{NONDIGIT}|{EXPSTART}|\\.)*"),
 187     MACRO_DATA(NULL, NULL)      // should be the last entry
 188 };
 189
 190 // common C++/C99 token definitions
 191 template <typename Iterator, typename Position>
 192 typename lexertl<Iterator, Position>::lexer_data const
 193 lexertl<Iterator, Position>::init_data[INIT_DATA_SIZE] =
 194 {
 195     TOKEN_DATA(T_AND, "&"),
 196     TOKEN_DATA(T_ANDAND, "&&"),
 197     TOKEN_DATA(T_ASSIGN, "="),
 198     TOKEN_DATA(T_ANDASSIGN, "&="),
 199     TOKEN_DATA(T_OR, Q("|")),
 200     TOKEN_DATA(T_OR_TRIGRAPH, "{TRI}!"),
 201     TOKEN_DATA(T_ORASSIGN, Q("|=")),
 202     TOKEN_DATA(T_ORASSIGN_TRIGRAPH, "{TRI}!="),
 203     TOKEN_DATA(T_XOR, Q("^")),
 204     TOKEN_DATA(T_XOR_TRIGRAPH, "{TRI}'"),
 205     TOKEN_DATA(T_XORASSIGN, Q("^=")),
 206     TOKEN_DATA(T_XORASSIGN_TRIGRAPH, "{TRI}'="),
 207     TOKEN_DATA(T_COMMA, ","),
 208     TOKEN_DATA(T_COLON, ":"),
 209     TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
 210     TOKEN_DATA(T_DIVIDE, Q("/")),
 211     TOKEN_DATA(T_DOT, Q(".")),
 212     TOKEN_DATA(T_ELLIPSIS, Q(".") "{3}"),
 213     TOKEN_DATA(T_EQUAL, "=="),
 214     TOKEN_DATA(T_GREATER, ">"),
 215     TOKEN_DATA(T_GREATEREQUAL, ">="),
 216     TOKEN_DATA(T_LEFTBRACE, Q("{")),
 217     TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
 218     TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, "{TRI}<"),
 219     TOKEN_DATA(T_LESS, "<"),
 220     TOKEN_DATA(T_LESSEQUAL, "<="),
 221     TOKEN_DATA(T_LEFTPAREN, Q("(")),
 222     TOKEN_DATA(T_LEFTBRACKET, Q("[")),
 223     TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
 224     TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, "{TRI}" Q("(")),
 225     TOKEN_DATA(T_MINUS, Q("-")),
 226     TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
 227     TOKEN_DATA(T_MINUSMINUS, Q("-") "{2}"),
 228     TOKEN_DATA(T_PERCENT, Q("%")),
 229     TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
 230     TOKEN_DATA(T_NOT, "!"),
 231     TOKEN_DATA(T_NOTEQUAL, "!="),
 232     TOKEN_DATA(T_OROR, Q("|") "{2}"),
 233     TOKEN_DATA(T_OROR_TRIGRAPH, "{TRI}!\\||\\|{TRI}!|{TRI}!{TRI}!"),
 234     TOKEN_DATA(T_PLUS, Q("+")),
 235     TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
 236     TOKEN_DATA(T_PLUSPLUS, Q("+") "{2}"),
 237     TOKEN_DATA(T_ARROW, Q("->")),
 238     TOKEN_DATA(T_QUESTION_MARK, Q("?")),
 239     TOKEN_DATA(T_RIGHTBRACE, Q("}")),
 240     TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
 241     TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, "{TRI}>"),
 242     TOKEN_DATA(T_RIGHTPAREN, Q(")")),
 243     TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
 244     TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
 245     TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, "{TRI}" Q(")")),
 246     TOKEN_DATA(T_SEMICOLON, ";"),
 247     TOKEN_DATA(T_SHIFTLEFT, "<<"),
 248     TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
 249     TOKEN_DATA(T_SHIFTRIGHT, ">>"),
 250     TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
 251     TOKEN_DATA(T_STAR, Q("*")),
 252     TOKEN_DATA(T_COMPL, Q("~")),
 253     TOKEN_DATA(T_COMPL_TRIGRAPH, "{TRI}-"),
 254     TOKEN_DATA(T_STARASSIGN, Q("*=")),
 255     TOKEN_DATA(T_ASM, "asm"),
 256     TOKEN_DATA(T_AUTO, "auto"),
 257     TOKEN_DATA(T_BOOL, "bool"),
 258     TOKEN_DATA(T_FALSE, "false"),
 259     TOKEN_DATA(T_TRUE, "true"),
 260     TOKEN_DATA(T_BREAK, "break"),
 261     TOKEN_DATA(T_CASE, "case"),
 262     TOKEN_DATA(T_CATCH, "catch"),
 263     TOKEN_DATA(T_CHAR, "char"),
 264     TOKEN_DATA(T_CLASS, "class"),
 265     TOKEN_DATA(T_CONST, "const"),
 266     TOKEN_DATA(T_CONSTCAST, "const_cast"),
 267     TOKEN_DATA(T_CONTINUE, "continue"),
 268     TOKEN_DATA(T_DEFAULT, "default"),
 269     TOKEN_DATA(T_DELETE, "delete"),
 270     TOKEN_DATA(T_DO, "do"),
 271     TOKEN_DATA(T_DOUBLE, "double"),
 272     TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
 273     TOKEN_DATA(T_ELSE, "else"),
 274     TOKEN_DATA(T_ENUM, "enum"),
 275     TOKEN_DATA(T_EXPLICIT, "explicit"),
 276     TOKEN_DATA(T_EXPORT, "export"),
 277     TOKEN_DATA(T_EXTERN, "extern"),
 278     TOKEN_DATA(T_FLOAT, "float"),
 279     TOKEN_DATA(T_FOR, "for"),
 280     TOKEN_DATA(T_FRIEND, "friend"),
 281     TOKEN_DATA(T_GOTO, "goto"),
 282     TOKEN_DATA(T_IF, "if"),
 283     TOKEN_DATA(T_INLINE, "inline"),
 284     TOKEN_DATA(T_INT, "int"),
 285     TOKEN_DATA(T_LONG, "long"),
 286     TOKEN_DATA(T_MUTABLE, "mutable"),
 287     TOKEN_DATA(T_NAMESPACE, "namespace"),
 288     TOKEN_DATA(T_NEW, "new"),
 289     TOKEN_DATA(T_OPERATOR, "operator"),
 290     TOKEN_DATA(T_PRIVATE, "private"),
 291     TOKEN_DATA(T_PROTECTED, "protected"),
 292     TOKEN_DATA(T_PUBLIC, "public"),
 293     TOKEN_DATA(T_REGISTER, "register"),
 294     TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
 295     TOKEN_DATA(T_RETURN, "return"),
 296     TOKEN_DATA(T_SHORT, "short"),
 297     TOKEN_DATA(T_SIGNED, "signed"),
 298     TOKEN_DATA(T_SIZEOF, "sizeof"),
 299     TOKEN_DATA(T_STATIC, "static"),
 300     TOKEN_DATA(T_STATICCAST, "static_cast"),
 301     TOKEN_DATA(T_STRUCT, "struct"),
 302     TOKEN_DATA(T_SWITCH, "switch"),
 303     TOKEN_DATA(T_TEMPLATE, "template"),
 304     TOKEN_DATA(T_THIS, "this"),
 305     TOKEN_DATA(T_THROW, "throw"),
 306     TOKEN_DATA(T_TRY, "try"),
 307     TOKEN_DATA(T_TYPEDEF, "typedef"),
 308     TOKEN_DATA(T_TYPEID, "typeid"),
 309     TOKEN_DATA(T_TYPENAME, "typename"),
 310     TOKEN_DATA(T_UNION, "union"),
 311     TOKEN_DATA(T_UNSIGNED, "unsigned"),
 312     TOKEN_DATA(T_USING, "using"),
 313     TOKEN_DATA(T_VIRTUAL, "virtual"),
 314     TOKEN_DATA(T_VOID, "void"),
 315     TOKEN_DATA(T_VOLATILE, "volatile"),
 316     TOKEN_DATA(T_WCHART, "wchar_t"),
 317     TOKEN_DATA(T_WHILE, "while"),
 318     TOKEN_DATA(T_PP_DEFINE, "{POUNDDEF}{PPSPACE}define"),
 319     TOKEN_DATA(T_PP_IF, "{POUNDDEF}{PPSPACE}if"),
 320     TOKEN_DATA(T_PP_IFDEF, "{POUNDDEF}{PPSPACE}ifdef"),
 321     TOKEN_DATA(T_PP_IFNDEF, "{POUNDDEF}{PPSPACE}ifndef"),
 322     TOKEN_DATA(T_PP_ELSE, "{POUNDDEF}{PPSPACE}else"),
 323     TOKEN_DATA(T_PP_ELIF, "{POUNDDEF}{PPSPACE}elif"),
 324     TOKEN_DATA(T_PP_ENDIF, "{POUNDDEF}{PPSPACE}endif"),
 325     TOKEN_DATA(T_PP_ERROR, "{POUNDDEF}{PPSPACE}error"),
 326     TOKEN_DATA(T_PP_QHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" Q("\"") "[^\\n\\r\"]+" Q("\"")),
 327     TOKEN_DATA(T_PP_HHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" "<" "[^\\n\\r>]+" ">"),
 328     TOKEN_DATA(T_PP_INCLUDE, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}"),
 329     TOKEN_DATA(T_PP_LINE, "{POUNDDEF}{PPSPACE}line"),
 330     TOKEN_DATA(T_PP_PRAGMA, "{POUNDDEF}{PPSPACE}pragma"),
 331     TOKEN_DATA(T_PP_UNDEF, "{POUNDDEF}{PPSPACE}undef"),
 332     TOKEN_DATA(T_PP_WARNING, "{POUNDDEF}{PPSPACE}warning"),
 333 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 334     TOKEN_DATA(T_MSEXT_INT8, "__int8"),
 335     TOKEN_DATA(T_MSEXT_INT16, "__int16"),
 336     TOKEN_DATA(T_MSEXT_INT32, "__int32"),
 337     TOKEN_DATA(T_MSEXT_INT64, "__int64"),
 338     TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
 339     TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
 340     TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
 341     TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
 342     TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
 343     TOKEN_DATA(T_MSEXT_TRY , "__try"),
 344     TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
 345     TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
 346     TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
 347     TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
 348     TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
 349     TOKEN_DATA(T_MSEXT_PP_REGION, "{POUNDDEF}{PPSPACE}region"),
 350     TOKEN_DATA(T_MSEXT_PP_ENDREGION, "{POUNDDEF}{PPSPACE}endregion"),
 351 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 352     TOKEN_DATA(T_LONGINTLIT, "{INTEGER}{LONGINTEGER_SUFFIX}"),
 353     TOKEN_DATA(T_INTLIT, "{INTEGER}{INTEGER_SUFFIX}?"),
 354     TOKEN_DATA(T_FLOATLIT,
 355         "(" "{DIGIT}*" Q(".") "{DIGIT}+" OR "{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?" OR
 356         "{DIGIT}+{EXPONENT}{FLOAT_SUFFIX}?"),
 357 #if BOOST_WAVE_USE_STRICT_LEXER != 0
 358     TOKEN_DATA(T_IDENTIFIER,
 359         "(" "{NONDIGIT}" OR "{UNIVERSALCHAR}" ")"
 360         "(" "{NONDIGIT}" OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
 361 #else
 362     TOKEN_DATA(T_IDENTIFIER,
 363         "(" "{NONDIGIT}" OR Q("$") OR "{UNIVERSALCHAR}" ")"
 364         "(" "{NONDIGIT}" OR Q("$") OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
 365 #endif
 366     TOKEN_DATA(T_CCOMMENT, "{CCOMMENT}"),
 367     TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") "{NEWLINEDEF}" ),
 368     TOKEN_DATA(T_CHARLIT,
 369         "{CHAR_SPEC}" "'" "({ESCAPESEQ}|[^\\n\\r']|{UNIVERSALCHAR})+" "'"),
 370     TOKEN_DATA(T_STRINGLIT,
 371         "{CHAR_SPEC}" Q("\"") "({ESCAPESEQ}|[^\\n\\r\"]|{UNIVERSALCHAR})*" Q("\"")),
 372     TOKEN_DATA(T_SPACE, "{BLANK}+"),
 373     TOKEN_DATA(T_CONTLINE, Q("\\") "\\n"),
 374     TOKEN_DATA(T_NEWLINE, "{NEWLINEDEF}"),
 375     TOKEN_DATA(T_POUND_POUND, "##"),
 376     TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
 377     TOKEN_DATA(T_POUND_POUND_TRIGRAPH, "({TRI}=){2}"),
 378     TOKEN_DATA(T_POUND, "#"),
 379     TOKEN_DATA(T_POUND_ALT, Q("%:")),
 380     TOKEN_DATA(T_POUND_TRIGRAPH, "{TRI}="),
 381     TOKEN_DATA(T_ANY_TRIGRAPH, "{TRI}\\/"),
 382     TOKEN_DATA(T_ANY, "{ANY}"),
 383     TOKEN_DATA(T_ANYCTRL, "{ANYCTRL}"),   // this should be the last recognized token
 384     { token_id(0) }               // this should be the last entry
 385 };
 386
 387 // C++ only token definitions
 388 template <typename Iterator, typename Position>
 389 typename lexertl<Iterator, Position>::lexer_data const
 390 lexertl<Iterator, Position>::init_data_cpp[INIT_DATA_CPP_SIZE] =
 391 {
 392     TOKEN_DATA(T_AND_ALT, "bitand"),
 393     TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
 394     TOKEN_DATA(T_ANDAND_ALT, "and"),
 395     TOKEN_DATA(T_OR_ALT, "bitor"),
 396     TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
 397     TOKEN_DATA(T_OROR_ALT, "or"),
 398     TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
 399     TOKEN_DATA(T_XOR_ALT, "xor"),
 400     TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
 401     TOKEN_DATA(T_NOT_ALT, "not"),
 402     TOKEN_DATA(T_COMPL_ALT, "compl"),
 403 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
 404     TOKEN_DATA(T_IMPORT, "import"),
 405 #endif
 406     TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
 407     TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
 408     TOKEN_DATA(T_COLON_COLON, "::"),
 409     { token_id(0) }       // this should be the last entry
 410 };
 411
 412 // pp-number specific token definitions
 413 template <typename Iterator, typename Position>
 414 typename lexertl<Iterator, Position>::lexer_data const
 415 lexertl<Iterator, Position>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
 416 {
 417     TOKEN_DATA(T_PP_NUMBER, "{PP_NUMBERDEF}"),
 418     { token_id(0) }       // this should be the last entry
 419 };
 420
 421 // C++11 specific token definitions
 422
 423 #define T_EXTCHARLIT      token_id(T_CHARLIT|AltTokenType)
 424 #define T_EXTSTRINGLIT    token_id(T_STRINGLIT|AltTokenType)
 425 #define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
 426
 427 template <typename Iterator, typename Position>
 428 typename lexertl<Iterator, Position>::lexer_data const
 429 lexertl<Iterator, Position>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
 430 {
 431     TOKEN_DATA(T_EXTCHARLIT, "{EXTCHAR_SPEC}" "'"
 432                 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\n\\r\\\\']" ")+" "'"),
 433     TOKEN_DATA(T_EXTSTRINGLIT, "{EXTCHAR_SPEC}" Q("\"")
 434                 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
 435     TOKEN_DATA(T_RAWSTRINGLIT, "{CHAR_SPEC}" "R" Q("\"")
 436                 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\\\\"]" ")*" Q("\"")),
 437     TOKEN_DATA(T_EXTRAWSTRINGLIT, "{EXTCHAR_SPEC}" "R" Q("\"")
 438                 "(" "{ESCAPESEQ}" OR "{UNIVERSALCHAR}" OR "[^\\\\\"]" ")*" Q("\"")),
 439     TOKEN_DATA(T_ALIGNAS, "alignas"),
 440     TOKEN_DATA(T_ALIGNOF, "alignof"),
 441     TOKEN_DATA(T_CHAR16_T, "char16_t"),
 442     TOKEN_DATA(T_CHAR32_T, "char32_t"),
 443     TOKEN_DATA(T_CONSTEXPR, "constexpr"),
 444     TOKEN_DATA(T_DECLTYPE, "decltype"),
 445     TOKEN_DATA(T_NOEXCEPT, "noexcept"),
 446     TOKEN_DATA(T_NULLPTR, "nullptr"),
 447     TOKEN_DATA(T_STATICASSERT, "static_assert"),
 448     TOKEN_DATA(T_THREADLOCAL, "thread_local"),
 449     { token_id(0) }       // this should be the last entry
 450 };
 451
 452 // C++20 specific token definitions
 453
 454 template <typename Iterator, typename Position>
 455 typename lexertl<Iterator, Position>::lexer_data const
 456 lexertl<Iterator, Position>::init_data_cpp2a[INIT_DATA_CPP2A_SIZE] =
 457 {
 458     TOKEN_DATA(T_CHAR8_T, "char8_t"),
 459     TOKEN_DATA(T_CONCEPT, "concept"),
 460     TOKEN_DATA(T_CONSTEVAL, "consteval"),
 461     TOKEN_DATA(T_CONSTINIT, "constinit"),
 462     TOKEN_DATA(T_CO_AWAIT, "co_await"),
 463     TOKEN_DATA(T_CO_RETURN, "co_return"),
 464     TOKEN_DATA(T_CO_YIELD, "co_yield"),
 465     TOKEN_DATA(T_REQUIRES, "requires"),
 466     TOKEN_DATA(T_SPACESHIP, "<=>"),
 467     { token_id(0) }       // this should be the last entry
 468 };
 469
 470 #undef MACRO_DATA
 471 #undef TOKEN_DATA
 472 #undef OR
 473 #undef TRI
 474 #undef Q
 475
 476 ///////////////////////////////////////////////////////////////////////////////
 477 // initialize lexertl lexer from C++ token regex's
 478 template <typename Iterator, typename Position>
 479 inline bool
 480 lexertl<Iterator, Position>::init_dfa(wave::language_support lang,
 481     Position const& pos, bool force_reinit)
 482 {
 483     if (has_compiled_dfa_)
 484         return true;
 485
 486 std::ifstream dfa_in("wave_lexertl_lexer.dfa", std::ios::in|std::ios::binary);
 487
 488     if (force_reinit || !dfa_in.is_open() || !load (dfa_in))
 489     {
 490         dfa_in.close();
 491
 492         state_machine_.clear();
 493
 494     // register macro definitions
 495         boost::lexer::rules rules;
 496         for (int k = 0; NULL != init_macro_data[k].name; ++k) {
 497             rules.add_macro(init_macro_data[k].name, init_macro_data[k].macro);
 498         }
 499
 500     // if pp-numbers should be preferred, insert the corresponding rule first
 501         if (wave::need_prefer_pp_numbers(lang)) {
 502             for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
 503                 rules.add(init_data_pp_number[j].tokenregex,
 504                     init_data_pp_number[j].tokenid);
 505             }
 506         }
 507
 508     // if in C99 mode, some of the keywords are not valid
 509         if (!wave::need_c99(lang)) {
 510             for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
 511                 rules.add(init_data_cpp[j].tokenregex,
 512                     init_data_cpp[j].tokenid);
 513             }
 514         }
 515
 516     // if in C++0x mode, add appropriate keywords
 517 #if BOOST_WAVE_SUPPORT_CPP0X != 0
 518         if (wave::need_cpp0x(lang) || wave::need_cpp2a(lang)) {
 519             for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
 520                 rules.add(init_data_cpp0x[j].tokenregex,
 521                           init_data_cpp0x[j].tokenid);
 522         }
 523     }
 524 #endif
 525
 526     // if in C++2a mode, add those keywords
 527 #if BOOST_WAVE_SUPPORT_CPP2A != 0
 528         if (wave::need_cpp2a(lang)) {
 529             for (int j = 0; 0 != init_data_cpp2a[j].tokenid; ++j) {
 530                 rules.add(init_data_cpp2a[j].tokenregex,
 531                           init_data_cpp2a[j].tokenid);
 532         }
 533     }
 534 #endif
 535
 536
 537         for (int i = 0; 0 != init_data[i].tokenid; ++i) {
 538             rules.add(init_data[i].tokenregex, init_data[i].tokenid);
 539         }
 540
 541     // generate minimized DFA
 542         try {
 543             boost::lexer::generator::build (rules, state_machine_);
 544             boost::lexer::generator::minimise (state_machine_);
 545         }
 546         catch (std::runtime_error const& e) {
 547             string_type msg("lexertl initialization error: ");
 548             msg += e.what();
 549             BOOST_WAVE_LEXER_THROW(wave::cpplexer::lexing_exception,
 550                 unexpected_error, msg.c_str(),
 551                 pos.get_line(), pos.get_column(), pos.get_file().c_str());
 552             return false;
 553         }
 554
 555     std::ofstream dfa_out ("wave_lexertl_lexer.dfa",
 556         std::ios::out|std::ios::binary|std::ios::trunc);
 557
 558         if (dfa_out.is_open())
 559             save (dfa_out);
 560     }
 561
 562     has_compiled_dfa_ = true;
 563     return true;
 564 }
 565 #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 566
 567 ///////////////////////////////////////////////////////////////////////////////
 568 // return next token from the input stream
 569 template <typename Iterator, typename Position>
 570 inline wave::token_id
 571 lexertl<Iterator, Position>::next_token(Iterator &first, Iterator const &last,
 572     string_type& token_value)
 573 {
 574 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 575     size_t const* const lookup = &state_machine_.data()._lookup[0]->front ();
 576     size_t const dfa_alphabet = state_machine_.data()._dfa_alphabet[0];
 577     size_t const* dfa = &state_machine_.data()._dfa[0]->front();
 578 #else
 579     // set up pointers from static data
 580     size_t const* lookup = lookup_;
 581     size_t const dfa_alphabet = dfa_alphabet_;
 582     size_t const* dfa = dfa_;
 583 #endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 584     size_t const* ptr = dfa + dfa_alphabet;
 585
 586     Iterator curr = first;
 587     Iterator end_token = first;
 588     bool end_state = (*ptr != 0);
 589     size_t id = *(ptr + 1);
 590
 591     while (curr != last) {
 592         size_t const state = ptr[lookup[int(*curr)]];
 593         if (0 == state)
 594             break;
 595         ++curr;
 596
 597         ptr = &dfa[state * dfa_alphabet];
 598
 599         if (0 != *ptr) {
 600             end_state = true;
 601             id = *(ptr + 1);
 602             end_token = curr;
 603         }
 604     }
 605
 606     if (end_state) {
 607         if (T_ANY == id) {
 608             id = TOKEN_FROM_ID(*first, UnknownTokenType);
 609         }
 610
 611         // return longest match
 612         string_type str(first, end_token);
 613         token_value.swap(str);
 614         first = end_token;
 615         return wave::token_id(id);
 616     }
 617     return T_EOF;
 618 }
 619
 620 #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 621 ///////////////////////////////////////////////////////////////////////////////
 622 //  load the DFA tables to/from a stream
 623 template <typename Iterator, typename Position>
 624 inline bool
 625 lexertl<Iterator, Position>::load (std::istream& instrm)
 626 {
 627 // #if !defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
 628 //     std::size_t version = 0;
 629 //     boost::lexer::serialise::load_as_binary(instrm, state_machine_, version);
 630 //     if (version != (std::size_t)get_compilation_time())
 631 //         return false;       // too new for us
 632 //     return instrm.good();
 633 // #else
 634     return false;   // always create the dfa when generating the C++ code
 635 // #endif
 636 }
 637
 638 ///////////////////////////////////////////////////////////////////////////////
 639 //  save the DFA tables to/from a stream
 640 template <typename Iterator, typename Position>
 641 inline bool
 642 lexertl<Iterator, Position>::save (std::ostream& outstrm)
 643 {
 644 #if defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
 645     boost::lexer::generate_cpp(state_machine_, outstrm);
 646 #else
 647 //     boost::lexer::serialise::save_as_binary(state_machine_, outstrm,
 648 //         (std::size_t)get_compilation_time());
 649 #endif
 650     return outstrm.good();
 651 }
 652 #endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
 653
 654 ///////////////////////////////////////////////////////////////////////////////
 655 }   // namespace lexer
 656
 657 ///////////////////////////////////////////////////////////////////////////////
 658 template <typename Iterator, typename Position = wave::util::file_position_type>
 659 class lexertl_functor
 660 :   public lexertl_input_interface<wave::cpplexer::lex_token<Position> >
 661 {
 662 public:
 663     typedef wave::util::position_iterator<Iterator, Position> iterator_type;
 664     typedef typename boost::iterators::iterator_value<Iterator>::type
 665         char_type;
 666     typedef BOOST_WAVE_STRINGTYPE string_type;
 667     typedef wave::cpplexer::lex_token<Position> token_type;
 668
 669     lexertl_functor(Iterator const &first_, Iterator const &last_,
 670             Position const &pos_, wave::language_support language)
 671     :   first(first_, last_, pos_), language(language), at_eof(false)
 672     {
 673         lexer_.init_dfa(language, pos_);
 674
 675 #if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0
 676         std::ofstream os("wave_lexertl_tables_next_token.hpp");
 677         // generates a next_token function with an incompatible interface
 678         // to lexertl::next_token(), but you can extract the necessary tables
 679         // and replace them manually:
 680         lexer_.save(os);
 681 #endif
 682     }
 683     ~lexertl_functor() {}
 684
 685 // get the next token from the input stream
 686     token_type& get(token_type& result) BOOST_OVERRIDE
 687     {
 688         if (lexer_.is_initialized() && !at_eof) {
 689             do {
 690             // generate and return the next token
 691             string_type token_val;
 692             Position pos = first.get_position();   // begin of token position
 693             wave::token_id id = lexer_.next_token(first, last, token_val);
 694
 695                 if (T_CONTLINE != id) {
 696                 //  The cast should avoid spurious warnings about missing case labels
 697                 //  for the other token ids's.
 698                     switch (id) {
 699                     case T_IDENTIFIER:
 700                     // test identifier characters for validity (throws if
 701                     // invalid chars found)
 702                         if (!wave::need_no_character_validation(language)) {
 703                             using wave::cpplexer::impl::validate_identifier_name;
 704                             validate_identifier_name(token_val,
 705                                 pos.get_line(), pos.get_column(), pos.get_file());
 706                         }
 707                         break;
 708
 709                     case T_STRINGLIT:
 710                     case T_CHARLIT:
 711                     // test literal characters for validity (throws if invalid
 712                     // chars found)
 713                         if (wave::need_convert_trigraphs(language)) {
 714                             using wave::cpplexer::impl::convert_trigraphs;
 715                             token_val = convert_trigraphs(token_val);
 716                         }
 717                         if (!wave::need_no_character_validation(language)) {
 718                             using wave::cpplexer::impl::validate_literal;
 719                             validate_literal(token_val,
 720                                 pos.get_line(), pos.get_column(), pos.get_file());
 721                         }
 722                         break;
 723
 724                     case T_LONGINTLIT:  // supported in C99 and long_long mode
 725                         if (!wave::need_long_long(language)) {
 726                         // syntax error: not allowed in C++ mode
 727                             BOOST_WAVE_LEXER_THROW(
 728                                 wave::cpplexer::lexing_exception,
 729                                 invalid_long_long_literal, token_val.c_str(),
 730                                 pos.get_line(), pos.get_column(),
 731                                 pos.get_file().c_str());
 732                         }
 733                         break;
 734
 735                     case T_PP_HHEADER:
 736                     case T_PP_QHEADER:
 737                     case T_PP_INCLUDE:
 738                     // convert to the corresponding ..._next token, if appropriate
 739 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 740                         {
 741                         // Skip '#' and whitespace and see whether we find an
 742                         // 'include_next' here.
 743                             typename string_type::size_type start = token_val.find("include");
 744                             if (0 == token_val.compare(start, 12, "include_next", 12))
 745                                 id = token_id(id | AltTokenType);
 746                         }
 747 #endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 748                         break;
 749
 750                     case T_EOF:
 751                     // T_EOF is returned as a valid token, the next call will
 752                     // return T_EOI, i.e. the actual end of input
 753                         at_eof = true;
 754                         token_val.clear();
 755                         break;
 756
 757                     case T_OR_TRIGRAPH:
 758                     case T_XOR_TRIGRAPH:
 759                     case T_LEFTBRACE_TRIGRAPH:
 760                     case T_RIGHTBRACE_TRIGRAPH:
 761                     case T_LEFTBRACKET_TRIGRAPH:
 762                     case T_RIGHTBRACKET_TRIGRAPH:
 763                     case T_COMPL_TRIGRAPH:
 764                     case T_POUND_TRIGRAPH:
 765                     case T_ANY_TRIGRAPH:
 766                         if (wave::need_convert_trigraphs(language))
 767                         {
 768                             using wave::cpplexer::impl::convert_trigraph;
 769                             token_val = convert_trigraph(token_val);
 770                         }
 771                         break;
 772
 773                     case T_ANYCTRL:
 774                         // matched some unexpected character
 775                         {
 776                             // 21 is the max required size for a 64 bit integer
 777                             // represented as a string
 778                             char buffer[22];
 779                             string_type msg("invalid character in input stream: '0x");
 780
 781                             // for some systems sprintf is in namespace std
 782                             using namespace std;
 783                             sprintf(buffer, "%02x'", token_val[0]);
 784                             msg += buffer;
 785                             BOOST_WAVE_LEXER_THROW(
 786                                 wave::cpplexer::lexing_exception,
 787                                 generic_lexing_error,
 788                                 msg.c_str(), pos.get_line(), pos.get_column(),
 789                                 pos.get_file().c_str());
 790                         }
 791                         break;
 792                     }
 793
 794                     result = token_type(id, token_val, pos);
 795 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 796                     return guards.detect_guard(result);
 797 #else
 798                     return result;
 799 #endif
 800                 }
 801             } while (true);     // skip the T_CONTLINE token
 802         }
 803         return result = token_type();           // return T_EOI
 804     }
 805
 806     void set_position(Position const &pos) BOOST_OVERRIDE
 807     {
 808         // set position has to change the file name and line number only
 809         first.get_position().set_file(pos.get_file());
 810         first.get_position().set_line(pos.get_line());
 811     }
 812
 813 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 814     bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
 815         { return guards.detected(guard_name); }
 816 #endif
 817
 818 private:
 819     iterator_type first;
 820     iterator_type last;
 821
 822     wave::language_support language;
 823     bool at_eof;
 824 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 825     include_guards<token_type> guards;
 826 #endif
 827
 828     static lexer::lexertl<iterator_type, Position> lexer_;
 829 };
 830
 831 template <typename Iterator, typename Position>
 832 lexer::lexertl<
 833     typename lexertl_functor<Iterator, Position>::iterator_type, Position>
 834         lexertl_functor<Iterator, Position>::lexer_;
 835
 836 #undef INIT_DATA_SIZE
 837 #undef INIT_DATA_CPP_SIZE
 838 #undef INIT_DATA_PP_NUMBER_SIZE
 839 #undef INIT_MACRO_DATA_SIZE
 840 #undef T_ANYCTRL
 841
 842 #undef T_EXTCHARLIT
 843 #undef T_EXTSTRINGLIT
 844 #undef T_EXTRAWSTRINGLIT
 845
 846 ///////////////////////////////////////////////////////////////////////////////
 847 //
 848 //  The new_lexer_gen<>::new_lexer function (declared in lexertl_interface.hpp)
 849 //  should be defined inline, if the lex_functor shouldn't be instantiated
 850 //  separately from the lex_iterator.
 851 //
 852 //  Separate (explicit) instantiation helps to reduce compilation time.
 853 //
 854 ///////////////////////////////////////////////////////////////////////////////
 855
 856 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
 857 #define BOOST_WAVE_FLEX_NEW_LEXER_INLINE
 858 #else
 859 #define BOOST_WAVE_FLEX_NEW_LEXER_INLINE inline
 860 #endif
 861
 862 ///////////////////////////////////////////////////////////////////////////////
 863 //
 864 //  The 'new_lexer' function allows the opaque generation of a new lexer object.
 865 //  It is coupled to the iterator type to allow to decouple the lexer/iterator
 866 //  configurations at compile time.
 867 //
 868 //  This function is declared inside the xlex_interface.hpp file, which is
 869 //  referenced by the source file calling the lexer and the source file, which
 870 //  instantiates the lex_functor. But it is defined here, so it will be
 871 //  instantiated only while compiling the source file, which instantiates the
 872 //  lex_functor. While the xlex_interface.hpp file may be included everywhere,
 873 //  this file (xlex_lexer.hpp) should be included only once. This allows
 874 //  to decouple the lexer interface from the lexer implementation and reduces
 875 //  compilation time.
 876 //
 877 ///////////////////////////////////////////////////////////////////////////////
 878
 879 template <typename Iterator, typename Position>
 880 BOOST_WAVE_FLEX_NEW_LEXER_INLINE
 881 wave::cpplexer::lex_input_interface<wave::cpplexer::lex_token<Position> > *
 882 new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
 883     Iterator const &last, Position const &pos, wave::language_support language)
 884 {
 885     return new lexertl_functor<Iterator, Position>(first, last, pos, language);
 886 }
 887
 888 #undef BOOST_WAVE_FLEX_NEW_LEXER_INLINE
 889
 890 ///////////////////////////////////////////////////////////////////////////////
 891 }}}}   // namespace boost::wave::cpplexer::lexertl
 892
 893 #endif // !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
 894