ceph/src/boost/libs/wave/samples/cpp_tokens/slex/cpp_slex_lexer.hpp

   1 /*=============================================================================
   2     Boost.Wave: A Standard compliant C++ preprocessor library
   3
   4     SLex (Spirit Lex) based C++ lexer
   5
   6     http://www.boost.org/
   7
   8     Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
   9     Software License, Version 1.0. (See accompanying file
  10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  11 =============================================================================*/
  12
  13 #if !defined(BOOST_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
  14 #define BOOST_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED
  15
  16 #include <string>
  17 #if defined(BOOST_SPIRIT_DEBUG)
  18 #include <iostream>
  19 #endif // defined(BOOST_SPIRIT_DEBUG)
  20
  21 #include <boost/assert.hpp>
  22 #include <boost/spirit/include/classic_core.hpp>
  23
  24 #include <boost/wave/wave_config.hpp>
  25 #include <boost/wave/language_support.hpp>
  26 #include <boost/wave/token_ids.hpp>
  27 #include <boost/wave/util/file_position.hpp>
  28 #include <boost/wave/util/time_conversion_helper.hpp>
  29 #include <boost/wave/cpplexer/validate_universal_char.hpp>
  30 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
  31 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  32 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
  33 #include <boost/wave/cpplexer/detect_include_guards.hpp>
  34 #endif
  35 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
  36
  37 #include "../slex_interface.hpp"
  38 #include "../slex_token.hpp"
  39 #include "../slex_iterator.hpp"
  40
  41 #include "lexer.hpp"   // "spirit/lexer.hpp"
  42
  43 ///////////////////////////////////////////////////////////////////////////////
  44 namespace boost {
  45 namespace wave {
  46 namespace cpplexer {
  47 namespace slex {
  48 namespace lexer {
  49
  50 ///////////////////////////////////////////////////////////////////////////////
  51 //  The following numbers are the array sizes of the token regex's which we
  52 //  need to specify to make the CW compiler happy (at least up to V9.5).
  53 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
  54 #define INIT_DATA_SIZE              175
  55 #else
  56 #define INIT_DATA_SIZE              158
  57 #endif
  58 #define INIT_DATA_CPP_SIZE          15
  59 #define INIT_DATA_PP_NUMBER_SIZE    2
  60 #define INIT_DATA_CPP0X_SIZE        15
  61 #define INIT_DATA_CPP2A_SIZE        10
  62
  63 ///////////////////////////////////////////////////////////////////////////////
  64 //
  65 //  encapsulation of the boost::spirit::classic::slex based cpp lexer
  66 //
  67 ///////////////////////////////////////////////////////////////////////////////
  68
  69 ///////////////////////////////////////////////////////////////////////////////
  70 //  The following lexer_base class was necessary to workaround a CodeWarrior
  71 //  bug (at least up to CW V9.5).
  72 template <typename IteratorT, typename PositionT>
  73 class lexer_base
  74 :   public boost::spirit::classic::lexer<
  75         boost::wave::util::position_iterator<IteratorT, PositionT> >
  76 {
  77 protected:
  78     typedef boost::wave::util::position_iterator<IteratorT, PositionT>
  79         iterator_type;
  80     typedef typename std::iterator_traits<IteratorT>::value_type  char_type;
  81     typedef boost::spirit::classic::lexer<iterator_type> base_type;
  82
  83     lexer_base();
  84
  85 // initialization data (regular expressions for the token definitions)
  86     struct lexer_data {
  87         token_id tokenid;                       // token data
  88         char_type const *tokenregex;            // associated token to match
  89         typename base_type::callback_t tokencb; // associated callback function
  90         unsigned int lexerstate;                // valid for lexer state
  91     };
  92 };
  93
  94 ///////////////////////////////////////////////////////////////////////////////
  95 template <typename IteratorT, typename PositionT>
  96 class lexer
  97 :   public lexer_base<IteratorT, PositionT>
  98 {
  99 public:
 100     typedef boost::wave::cpplexer::slex_token<PositionT>  token_type;
 101
 102     void init_dfa(boost::wave::language_support language);
 103
 104 // get time of last compilation
 105     static std::time_t get_compilation_time()
 106         { return compilation_time.get_time(); }
 107
 108 // helper for calculation of the time of last compilation
 109     static boost::wave::util::time_conversion_helper compilation_time;
 110
 111 private:
 112     typedef lexer_base<IteratorT, PositionT> base_type;
 113
 114     static typename base_type::lexer_data const init_data[INIT_DATA_SIZE];          // common patterns
 115     static typename base_type::lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE];  // C++ only patterns
 116     static typename base_type::lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE];  // pp-number only patterns
 117     static typename base_type::lexer_data const init_data_cpp0x[INIT_DATA_CPP0X_SIZE];  // C++0X only patterns
 118     static typename base_type::lexer_data const init_data_cpp2a[INIT_DATA_CPP2A_SIZE];  // C++2A only patterns
 119 };
 120
 121 ///////////////////////////////////////////////////////////////////////////////
 122 //  data required for initialization of the lexer (token definitions)
 123 #define OR                  "|"
 124 #define Q(c)                "\\" c
 125 #define TRI(c)              Q("?") Q("?") c
 126
 127 // definition of some sub-token regexps to simplify the regex definitions
 128 #define BLANK               "[ \\t]"
 129 #define CCOMMENT            \
 130     Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
 131
 132 #define PPSPACE             "(" BLANK OR CCOMMENT ")*"
 133
 134 #define OCTALDIGIT          "[0-7]"
 135 #define DIGIT               "[0-9]"
 136 #define HEXDIGIT            "[0-9a-fA-F]"
 137 #define OPTSIGN             "[-+]?"
 138 #define EXPSTART            "[eE]" "[-+]"
 139 #define EXPONENT            "(" "[eE]" OPTSIGN "[0-9]+" ")"
 140 #define NONDIGIT            "[a-zA-Z_]"
 141
 142 #define INTEGER             \
 143     "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")"
 144
 145 #define INTEGER_SUFFIX      "(" "[uU][lL]?|[lL][uU]?" ")"
 146 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 147 #define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "ll" OR "LL" ")" OR \
 148                                 "(" "ll" OR "LL" ")" "[uU]" "?" OR \
 149                                 "i64" \
 150                             ")"
 151 #else
 152 #define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "ll" OR "LL" ")" OR \
 153                             "(" "ll" OR "LL" ")" "[uU]" "?" ")"
 154 #endif
 155 #define FLOAT_SUFFIX        "(" "[fF][lL]?" OR "[lL][fF]?" ")"
 156 #define CHAR_SPEC           "L?"
 157 #define EXTCHAR_SPEC        "(" "[uU]" OR "u8" ")"
 158
 159 #define BACKSLASH           "(" Q("\\") OR TRI(Q("/")) ")"
 160 #define ESCAPESEQ           "(" BACKSLASH "(" \
 161                                 "[abfnrtv?'\"]" OR \
 162                                 BACKSLASH OR \
 163                                 "x" HEXDIGIT "+" OR \
 164                                 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
 165                             "))"
 166 #define HEXQUAD             "(" HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT ")"
 167 #define UNIVERSALCHAR       "(" BACKSLASH "(" \
 168                                 "u" HEXQUAD OR \
 169                                 "U" HEXQUAD HEXQUAD \
 170                             "))"
 171
 172 #define POUNDDEF            "(" "#" OR TRI("=") OR Q("%:") ")"
 173 #define NEWLINEDEF          "(" "\n" OR "\r" OR "\r\n" ")"
 174
 175 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 176 #define INCLUDEDEF          "(include|include_next)"
 177 #else
 178 #define INCLUDEDEF          "include"
 179 #endif
 180
 181 #define PP_NUMBERDEF        Q(".") "?" DIGIT "(" DIGIT OR NONDIGIT OR EXPSTART OR Q(".") ")*"
 182
 183 ///////////////////////////////////////////////////////////////////////////////
 184 //  lexer state constants
 185 #define LEXER_STATE_NORMAL  0
 186 #define LEXER_STATE_PP      1
 187
 188 #define NUM_LEXER_STATES    1
 189
 190 //  helper for initializing token data
 191 #define TOKEN_DATA(id, regex)                                                 \
 192         { T_##id, regex, 0, LEXER_STATE_NORMAL }                              \
 193     /**/
 194
 195 #define TOKEN_DATA_EX(id, regex, callback)                                    \
 196         { T_##id, regex, callback, LEXER_STATE_NORMAL }                       \
 197     /**/
 198
 199 ///////////////////////////////////////////////////////////////////////////////
 200 // common C++/C99 token definitions
 201 template <typename IteratorT, typename PositionT>
 202 typename lexer_base<IteratorT, PositionT>::lexer_data const
 203 lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] =
 204 {
 205     TOKEN_DATA(AND, "&"),
 206     TOKEN_DATA(ANDAND, "&&"),
 207     TOKEN_DATA(ASSIGN, "="),
 208     TOKEN_DATA(ANDASSIGN, "&="),
 209     TOKEN_DATA(OR, Q("|")),
 210     TOKEN_DATA(OR_TRIGRAPH, TRI("!")),
 211     TOKEN_DATA(ORASSIGN, Q("|=")),
 212     TOKEN_DATA(ORASSIGN_TRIGRAPH, TRI("!=")),
 213     TOKEN_DATA(XOR, Q("^")),
 214     TOKEN_DATA(XOR_TRIGRAPH, TRI("'")),
 215     TOKEN_DATA(XORASSIGN, Q("^=")),
 216     TOKEN_DATA(XORASSIGN_TRIGRAPH, TRI("'=")),
 217     TOKEN_DATA(COMMA, ","),
 218     TOKEN_DATA(COLON, ":"),
 219     TOKEN_DATA(DIVIDEASSIGN, Q("/=")),
 220     TOKEN_DATA(DIVIDE, Q("/")),
 221     TOKEN_DATA(DOT, Q(".")),
 222     TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),
 223     TOKEN_DATA(EQUAL, "=="),
 224     TOKEN_DATA(GREATER, ">"),
 225     TOKEN_DATA(GREATEREQUAL, ">="),
 226     TOKEN_DATA(LEFTBRACE, Q("{")),
 227     TOKEN_DATA(LEFTBRACE_ALT, "<" Q("%")),
 228     TOKEN_DATA(LEFTBRACE_TRIGRAPH, TRI("<")),
 229     TOKEN_DATA(LESS, "<"),
 230     TOKEN_DATA(LESSEQUAL, "<="),
 231     TOKEN_DATA(LEFTPAREN, Q("(")),
 232     TOKEN_DATA(LEFTBRACKET, Q("[")),
 233     TOKEN_DATA(LEFTBRACKET_ALT, "<:"),
 234     TOKEN_DATA(LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
 235     TOKEN_DATA(MINUS, Q("-")),
 236     TOKEN_DATA(MINUSASSIGN, Q("-=")),
 237     TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),
 238     TOKEN_DATA(PERCENT, Q("%")),
 239     TOKEN_DATA(PERCENTASSIGN, Q("%=")),
 240     TOKEN_DATA(NOT, "!"),
 241     TOKEN_DATA(NOTEQUAL, "!="),
 242     TOKEN_DATA(OROR, Q("|") Q("|")),
 243     TOKEN_DATA(OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
 244     TOKEN_DATA(PLUS, Q("+")),
 245     TOKEN_DATA(PLUSASSIGN, Q("+=")),
 246     TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),
 247     TOKEN_DATA(ARROW, Q("->")),
 248     TOKEN_DATA(QUESTION_MARK, Q("?")),
 249     TOKEN_DATA(RIGHTBRACE, Q("}")),
 250     TOKEN_DATA(RIGHTBRACE_ALT, Q("%>")),
 251     TOKEN_DATA(RIGHTBRACE_TRIGRAPH, TRI(">")),
 252     TOKEN_DATA(RIGHTPAREN, Q(")")),
 253     TOKEN_DATA(RIGHTBRACKET, Q("]")),
 254     TOKEN_DATA(RIGHTBRACKET_ALT, ":>"),
 255     TOKEN_DATA(RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
 256     TOKEN_DATA(SEMICOLON, ";"),
 257     TOKEN_DATA(SHIFTLEFT, "<<"),
 258     TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),
 259     TOKEN_DATA(SHIFTRIGHT, ">>"),
 260     TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),
 261     TOKEN_DATA(STAR, Q("*")),
 262     TOKEN_DATA(COMPL, Q("~")),
 263     TOKEN_DATA(COMPL_TRIGRAPH, TRI("-")),
 264     TOKEN_DATA(STARASSIGN, Q("*=")),
 265     TOKEN_DATA(ASM, "asm"),
 266     TOKEN_DATA(AUTO, "auto"),
 267     TOKEN_DATA(BOOL, "bool"),
 268     TOKEN_DATA(FALSE, "false"),
 269     TOKEN_DATA(TRUE, "true"),
 270     TOKEN_DATA(BREAK, "break"),
 271     TOKEN_DATA(CASE, "case"),
 272     TOKEN_DATA(CATCH, "catch"),
 273     TOKEN_DATA(CHAR, "char"),
 274     TOKEN_DATA(CLASS, "class"),
 275     TOKEN_DATA(CONST, "const"),
 276     TOKEN_DATA(CONSTCAST, "const_cast"),
 277     TOKEN_DATA(CONTINUE, "continue"),
 278     TOKEN_DATA(DEFAULT, "default"),
 279     TOKEN_DATA(DELETE, "delete"),
 280     TOKEN_DATA(DO, "do"),
 281     TOKEN_DATA(DOUBLE, "double"),
 282     TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),
 283     TOKEN_DATA(ELSE, "else"),
 284     TOKEN_DATA(ENUM, "enum"),
 285     TOKEN_DATA(EXPLICIT, "explicit"),
 286     TOKEN_DATA(EXPORT, "export"),
 287     TOKEN_DATA(EXTERN, "extern"),
 288     TOKEN_DATA(FLOAT, "float"),
 289     TOKEN_DATA(FOR, "for"),
 290     TOKEN_DATA(FRIEND, "friend"),
 291     TOKEN_DATA(GOTO, "goto"),
 292     TOKEN_DATA(IF, "if"),
 293     TOKEN_DATA(INLINE, "inline"),
 294     TOKEN_DATA(INT, "int"),
 295     TOKEN_DATA(LONG, "long"),
 296     TOKEN_DATA(MUTABLE, "mutable"),
 297     TOKEN_DATA(NAMESPACE, "namespace"),
 298     TOKEN_DATA(NEW, "new"),
 299     TOKEN_DATA(OPERATOR, "operator"),
 300     TOKEN_DATA(PRIVATE, "private"),
 301     TOKEN_DATA(PROTECTED, "protected"),
 302     TOKEN_DATA(PUBLIC, "public"),
 303     TOKEN_DATA(REGISTER, "register"),
 304     TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),
 305     TOKEN_DATA(RETURN, "return"),
 306     TOKEN_DATA(SHORT, "short"),
 307     TOKEN_DATA(SIGNED, "signed"),
 308     TOKEN_DATA(SIZEOF, "sizeof"),
 309     TOKEN_DATA(STATIC, "static"),
 310     TOKEN_DATA(STATICCAST, "static_cast"),
 311     TOKEN_DATA(STRUCT, "struct"),
 312     TOKEN_DATA(SWITCH, "switch"),
 313     TOKEN_DATA(TEMPLATE, "template"),
 314     TOKEN_DATA(THIS, "this"),
 315     TOKEN_DATA(THROW, "throw"),
 316     TOKEN_DATA(TRY, "try"),
 317     TOKEN_DATA(TYPEDEF, "typedef"),
 318     TOKEN_DATA(TYPEID, "typeid"),
 319     TOKEN_DATA(TYPENAME, "typename"),
 320     TOKEN_DATA(UNION, "union"),
 321     TOKEN_DATA(UNSIGNED, "unsigned"),
 322     TOKEN_DATA(USING, "using"),
 323     TOKEN_DATA(VIRTUAL, "virtual"),
 324     TOKEN_DATA(VOID, "void"),
 325     TOKEN_DATA(VOLATILE, "volatile"),
 326     TOKEN_DATA(WCHART, "wchar_t"),
 327     TOKEN_DATA(WHILE, "while"),
 328     TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),
 329     TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),
 330     TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
 331     TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
 332     TOKEN_DATA(PP_ELSE, POUNDDEF PPSPACE "else"),
 333     TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),
 334     TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),
 335     TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),
 336     TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE \
 337         INCLUDEDEF PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),
 338     TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE \
 339         INCLUDEDEF PPSPACE "<" "[^\\n\\r>]+" ">"),
 340     TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE \
 341         INCLUDEDEF PPSPACE),
 342     TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),
 343     TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
 344     TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),
 345     TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),
 346 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 347     TOKEN_DATA(MSEXT_INT8, "__int8"),
 348     TOKEN_DATA(MSEXT_INT16, "__int16"),
 349     TOKEN_DATA(MSEXT_INT32, "__int32"),
 350     TOKEN_DATA(MSEXT_INT64, "__int64"),
 351     TOKEN_DATA(MSEXT_BASED, "_?" "_based"),
 352     TOKEN_DATA(MSEXT_DECLSPEC, "_?" "_declspec"),
 353     TOKEN_DATA(MSEXT_CDECL, "_?" "_cdecl"),
 354     TOKEN_DATA(MSEXT_FASTCALL, "_?" "_fastcall"),
 355     TOKEN_DATA(MSEXT_STDCALL, "_?" "_stdcall"),
 356     TOKEN_DATA(MSEXT_TRY , "__try"),
 357     TOKEN_DATA(MSEXT_EXCEPT, "__except"),
 358     TOKEN_DATA(MSEXT_FINALLY, "__finally"),
 359     TOKEN_DATA(MSEXT_LEAVE, "__leave"),
 360     TOKEN_DATA(MSEXT_INLINE, "_?" "_inline"),
 361     TOKEN_DATA(MSEXT_ASM, "_?" "_asm"),
 362     TOKEN_DATA(MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
 363     TOKEN_DATA(MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
 364 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
 365 //  TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),
 366 //  TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),
 367 //  TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
 368     TOKEN_DATA(LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
 369     TOKEN_DATA(INTLIT, INTEGER INTEGER_SUFFIX "?"),
 370     TOKEN_DATA(FLOATLIT,
 371         "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
 372         EXPONENT "?" FLOAT_SUFFIX "?" OR
 373         DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
 374     TOKEN_DATA(CCOMMENT, CCOMMENT),
 375     TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
 376     TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
 377                 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
 378     TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
 379                 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
 380 #if BOOST_WAVE_USE_STRICT_LEXER != 0
 381     TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
 382 #else
 383     TOKEN_DATA(IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
 384 #endif
 385     TOKEN_DATA(SPACE, "[ \t\v\f]+"),
 386 //    TOKEN_DATA(SPACE2, "[\\v\\f]+"),
 387     TOKEN_DATA(CONTLINE, Q("\\") "\n"),
 388     TOKEN_DATA(NEWLINE, NEWLINEDEF),
 389     TOKEN_DATA(POUND_POUND, "##"),
 390     TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
 391     TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
 392     TOKEN_DATA(POUND, "#"),
 393     TOKEN_DATA(POUND_ALT, Q("%:")),
 394     TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),
 395     TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))),
 396     TOKEN_DATA(ANY, "."),     // this should be the last recognized token
 397     { token_id(0) }           // this should be the last entry
 398 };
 399
 400 ///////////////////////////////////////////////////////////////////////////////
 401 // C++ only token definitions
 402 template <typename IteratorT, typename PositionT>
 403 typename lexer_base<IteratorT, PositionT>::lexer_data const
 404 lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] =
 405 {
 406     TOKEN_DATA(AND_ALT, "bitand"),
 407     TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
 408     TOKEN_DATA(ANDAND_ALT, "and"),
 409     TOKEN_DATA(OR_ALT, "bitor"),
 410     TOKEN_DATA(ORASSIGN_ALT, "or_eq"),
 411     TOKEN_DATA(OROR_ALT, "or"),
 412     TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),
 413     TOKEN_DATA(XOR_ALT, "xor"),
 414     TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),
 415     TOKEN_DATA(NOT_ALT, "not"),
 416     TOKEN_DATA(COMPL_ALT, "compl"),
 417 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
 418     TOKEN_DATA(IMPORT, "import"),
 419 #endif
 420     TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
 421     TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
 422     TOKEN_DATA(COLON_COLON, "::"),
 423     { token_id(0) }       // this should be the last entry
 424 };
 425
 426 ///////////////////////////////////////////////////////////////////////////////
 427 // C++ only token definitions
 428 template <typename IteratorT, typename PositionT>
 429 typename lexer_base<IteratorT, PositionT>::lexer_data const
 430 lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
 431 {
 432     TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF),
 433     { token_id(0) }       // this should be the last entry
 434 };
 435
 436 ///////////////////////////////////////////////////////////////////////////////
 437 // C++11 only token definitions
 438
 439 #define T_EXTCHARLIT      token_id(T_CHARLIT|AltTokenType)
 440 #define T_EXTSTRINGLIT    token_id(T_STRINGLIT|AltTokenType)
 441 #define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
 442
 443 template <typename IteratorT, typename PositionT>
 444 typename lexer_base<IteratorT, PositionT>::lexer_data const
 445 lexer<IteratorT, PositionT>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
 446 {
 447     TOKEN_DATA(EXTCHARLIT, EXTCHAR_SPEC "'"
 448                 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
 449     TOKEN_DATA(EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
 450                 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
 451     TOKEN_DATA(RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
 452                 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
 453     TOKEN_DATA(EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
 454                 "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
 455     TOKEN_DATA(ALIGNAS, "alignas"),
 456     TOKEN_DATA(ALIGNOF, "alignof"),
 457     TOKEN_DATA(CHAR16_T, "char16_t"),
 458     TOKEN_DATA(CHAR32_T, "char32_t"),
 459     TOKEN_DATA(CONSTEXPR, "constexpr"),
 460     TOKEN_DATA(DECLTYPE, "decltype"),
 461     TOKEN_DATA(NOEXCEPT, "noexcept"),
 462     TOKEN_DATA(NULLPTR, "nullptr"),
 463     TOKEN_DATA(STATICASSERT, "static_assert"),
 464     TOKEN_DATA(THREADLOCAL, "thread_local"),
 465     { token_id(0) }       // this should be the last entry
 466 };
 467
 468 ///////////////////////////////////////////////////////////////////////////////
 469 // C++20 only token definitions
 470
 471 template <typename IteratorT, typename PositionT>
 472 typename lexer_base<IteratorT, PositionT>::lexer_data const
 473 lexer<IteratorT, PositionT>::init_data_cpp2a[INIT_DATA_CPP2A_SIZE] =
 474 {
 475     TOKEN_DATA(CHAR8_T, "char8_t"),
 476     TOKEN_DATA(CONCEPT, "concept"),
 477     TOKEN_DATA(CONSTEVAL, "consteval"),
 478     TOKEN_DATA(CONSTINIT, "constinit"),
 479     TOKEN_DATA(CO_AWAIT, "co_await"),
 480     TOKEN_DATA(CO_RETURN, "co_return"),
 481     TOKEN_DATA(CO_YIELD, "co_yield"),
 482     TOKEN_DATA(REQUIRES, "requires"),
 483     TOKEN_DATA(SPACESHIP, "<=>"),
 484
 485     { token_id(0) }       // this should be the last entry
 486 };
 487
 488
 489 ///////////////////////////////////////////////////////////////////////////////
 490 //  undefine macros, required for regular expression definitions
 491 #undef INCLUDEDEF
 492 #undef POUNDDEF
 493 #undef CCOMMENT
 494 #undef PPSPACE
 495 #undef DIGIT
 496 #undef OCTALDIGIT
 497 #undef HEXDIGIT
 498 #undef NONDIGIT
 499 #undef OPTSIGN
 500 #undef EXPSTART
 501 #undef EXPONENT
 502 #undef LONGINTEGER_SUFFIX
 503 #undef INTEGER_SUFFIX
 504 #undef INTEGER
 505 #undef FLOAT_SUFFIX
 506 #undef CHAR_SPEC
 507 #undef BACKSLASH
 508 #undef ESCAPESEQ
 509 #undef HEXQUAD
 510 #undef UNIVERSALCHAR
 511 #undef PP_NUMBERDEF
 512
 513 #undef Q
 514 #undef TRI
 515 #undef OR
 516
 517 #undef TOKEN_DATA
 518 #undef TOKEN_DATA_EX
 519
 520 ///////////////////////////////////////////////////////////////////////////////
 521 // initialize cpp lexer with token data
 522 template <typename IteratorT, typename PositionT>
 523 inline
 524 lexer_base<IteratorT, PositionT>::lexer_base()
 525 :   base_type(NUM_LEXER_STATES)
 526 {
 527 }
 528
 529 template <typename IteratorT, typename PositionT>
 530 inline void
 531 lexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang)
 532 {
 533     if (this->has_compiled_dfa())
 534         return;
 535
 536 // if pp-numbers should be preferred, insert the corresponding rule first
 537     if (boost::wave::need_prefer_pp_numbers(lang)) {
 538         for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
 539             this->register_regex(init_data_pp_number[j].tokenregex,
 540                 init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,
 541                 init_data_pp_number[j].lexerstate);
 542         }
 543     }
 544
 545 // if in C99 mode, some of the keywords are not valid
 546     if (!boost::wave::need_c99(lang)) {
 547         for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
 548             this->register_regex(init_data_cpp[j].tokenregex,
 549                 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,
 550                 init_data_cpp[j].lexerstate);
 551         }
 552     }
 553
 554 // if in C++0x mode, add appropriate keywords
 555 #if BOOST_WAVE_SUPPORT_CPP0X != 0
 556     if (boost::wave::need_cpp0x(lang) || boost::wave::need_cpp2a(lang)) {
 557         for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
 558             this->register_regex(init_data_cpp0x[j].tokenregex,
 559                 init_data_cpp0x[j].tokenid, init_data_cpp0x[j].tokencb,
 560                 init_data_cpp0x[j].lexerstate);
 561         }
 562     }
 563 #endif
 564
 565     // if in C++2a mode, add those keywords
 566 #if BOOST_WAVE_SUPPORT_CPP2A != 0
 567         if (wave::need_cpp2a(lang)) {
 568             for (int j = 0; 0 != init_data_cpp2a[j].tokenid; ++j) {
 569                 this->register_regex(init_data_cpp2a[j].tokenregex,
 570                                      init_data_cpp2a[j].tokenid,
 571                                      init_data_cpp2a[j].tokencb,
 572                                      init_data_cpp2a[j].lexerstate);
 573         }
 574     }
 575 #endif
 576
 577     for (int i = 0; 0 != init_data[i].tokenid; ++i) {
 578         this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
 579             init_data[i].tokencb, init_data[i].lexerstate);
 580     }
 581 }
 582
 583 ///////////////////////////////////////////////////////////////////////////////
 584 // get time of last compilation of this file
 585 template <typename IteratorT, typename PositionT>
 586 boost::wave::util::time_conversion_helper
 587     lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
 588
 589 ///////////////////////////////////////////////////////////////////////////////
 590 }   // namespace lexer
 591
 592 ///////////////////////////////////////////////////////////////////////////////
 593 //
 594 template <typename IteratorT, typename PositionT>
 595 inline void
 596 init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,
 597     boost::wave::language_support language, bool force_reinit = false)
 598 {
 599     if (lexer.has_compiled_dfa())
 600         return;     // nothing to do
 601
 602     using std::ifstream;
 603     using std::ofstream;
 604     using std::ios;
 605     using std::cerr;
 606     using std::endl;
 607
 608 ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);
 609
 610     lexer.init_dfa(language);
 611     if (force_reinit || !dfa_in.is_open() ||
 612         !lexer.load (dfa_in, (long)lexer.get_compilation_time()))
 613     {
 614 #if defined(BOOST_SPIRIT_DEBUG)
 615         cerr << "Compiling regular expressions for slex ...";
 616 #endif // defined(BOOST_SPIRIT_DEBUG)
 617
 618         dfa_in.close();
 619         lexer.create_dfa();
 620
 621     ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);
 622
 623         if (dfa_out.is_open())
 624             lexer.save (dfa_out, (long)lexer.get_compilation_time());
 625
 626 #if defined(BOOST_SPIRIT_DEBUG)
 627         cerr << " Done." << endl;
 628 #endif // defined(BOOST_SPIRIT_DEBUG)
 629     }
 630 }
 631
 632 ///////////////////////////////////////////////////////////////////////////////
 633 //
 634 //  lex_functor
 635 //
 636 ///////////////////////////////////////////////////////////////////////////////
 637
 638 template <typename IteratorT, typename PositionT = wave::util::file_position_type>
 639 class slex_functor
 640 :   public slex_input_interface<
 641         typename lexer::lexer<IteratorT, PositionT>::token_type
 642     >
 643 {
 644 public:
 645
 646     typedef boost::wave::util::position_iterator<IteratorT, PositionT>
 647           iterator_type;
 648     typedef typename std::iterator_traits<IteratorT>::value_type    char_type;
 649     typedef BOOST_WAVE_STRINGTYPE                                   string_type;
 650     typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;
 651
 652     slex_functor(IteratorT const &first_, IteratorT const &last_,
 653             PositionT const &pos_, boost::wave::language_support language_)
 654     :   first(first_, last_, pos_), language(language_), at_eof(false)
 655     {
 656         // initialize lexer dfa tables
 657         init_lexer(lexer, language_);
 658     }
 659     virtual ~slex_functor() {}
 660
 661 // get the next token from the input stream
 662     token_type& get(token_type& result) BOOST_OVERRIDE
 663     {
 664         if (!at_eof) {
 665             do {
 666             // generate and return the next token
 667             std::string value;
 668             PositionT pos = first.get_position();   // begin of token position
 669             token_id id = token_id(lexer.next_token(first, last, &value));
 670
 671                 if ((token_id)(-1) == id)
 672                     id = T_EOF;     // end of input reached
 673
 674             string_type token_val(value.c_str());
 675
 676                 if (boost::wave::need_emit_contnewlines(language) ||
 677                     T_CONTLINE != id)
 678                 {
 679                 //  The cast should avoid spurious warnings about missing case labels
 680                 //  for the other token ids's.
 681                     switch (id) {
 682                     case T_IDENTIFIER:
 683                     // test identifier characters for validity (throws if
 684                     // invalid chars found)
 685                         if (!boost::wave::need_no_character_validation(language)) {
 686                             using boost::wave::cpplexer::impl::validate_identifier_name;
 687                             validate_identifier_name(token_val,
 688                                 pos.get_line(), pos.get_column(), pos.get_file());
 689                         }
 690                         break;
 691
 692                     case T_EXTCHARLIT:
 693                     case T_EXTSTRINGLIT:
 694                     case T_EXTRAWSTRINGLIT:
 695                         id = token_id(id & ~AltTokenType);
 696                         BOOST_FALLTHROUGH;
 697
 698                     case T_CHARLIT:
 699                     case T_STRINGLIT:
 700                     case T_RAWSTRINGLIT:
 701                     // test literal characters for validity (throws if invalid
 702                     // chars found)
 703                         if (boost::wave::need_convert_trigraphs(language)) {
 704                             using boost::wave::cpplexer::impl::convert_trigraphs;
 705                             token_val = convert_trigraphs(token_val);
 706                         }
 707                         if (!boost::wave::need_no_character_validation(language)) {
 708                             using boost::wave::cpplexer::impl::validate_literal;
 709                             validate_literal(token_val,
 710                                 pos.get_line(), pos.get_column(), pos.get_file());
 711                         }
 712                         break;
 713
 714                     case T_LONGINTLIT:  // supported in C99 and long_long mode
 715                         if (!boost::wave::need_long_long(language)) {
 716                         // syntax error: not allowed in C++ mode
 717                             BOOST_WAVE_LEXER_THROW(
 718                                 boost::wave::cpplexer::lexing_exception,
 719                                 invalid_long_long_literal, value.c_str(),
 720                                 pos.get_line(), pos.get_column(),
 721                                 pos.get_file().c_str());
 722                         }
 723                         break;
 724
 725                     case T_PP_HHEADER:
 726                     case T_PP_QHEADER:
 727                     case T_PP_INCLUDE:
 728                     // convert to the corresponding ..._next token, if appropriate
 729                         {
 730 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 731                         // Skip '#' and whitespace and see whether we find an
 732                         // 'include_next' here.
 733                             typename string_type::size_type start = value.find("include");
 734                             if (0 == value.compare(start, 12, "include_next", 12))
 735                                 id = token_id(id | AltTokenType);
 736 #endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
 737                             break;
 738                         }
 739
 740                     case T_EOF:
 741                     // T_EOF is returned as a valid token, the next call will
 742                     // return T_EOI, i.e. the actual end of input
 743                         at_eof = true;
 744                         token_val.clear();
 745                         break;
 746
 747                     case T_OR_TRIGRAPH:
 748                     case T_XOR_TRIGRAPH:
 749                     case T_LEFTBRACE_TRIGRAPH:
 750                     case T_RIGHTBRACE_TRIGRAPH:
 751                     case T_LEFTBRACKET_TRIGRAPH:
 752                     case T_RIGHTBRACKET_TRIGRAPH:
 753                     case T_COMPL_TRIGRAPH:
 754                     case T_POUND_TRIGRAPH:
 755                     case T_ANY_TRIGRAPH:
 756                         if (boost::wave::need_convert_trigraphs(language))
 757                         {
 758                             using boost::wave::cpplexer::impl::convert_trigraph;
 759                             token_val = convert_trigraph(token_val);
 760                         }
 761                         break;
 762                     }
 763
 764                     result = token_type(id, token_val, pos);
 765 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 766                     return guards.detect_guard(result);
 767 #else
 768                     return result;
 769 #endif
 770                 }
 771
 772             // skip the T_CONTLINE token
 773             } while (true);
 774         }
 775         return result = token_type();   // return T_EOI
 776     }
 777
 778     void set_position(PositionT const &pos) BOOST_OVERRIDE
 779     {
 780         // set position has to change the file name and line number only
 781         first.get_position().set_file(pos.get_file());
 782         first.get_position().set_line(pos.get_line());
 783     }
 784
 785 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 786     bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
 787         { return guards.detected(guard_name); }
 788 #endif
 789
 790 private:
 791     iterator_type first;
 792     iterator_type last;
 793     boost::wave::language_support language;
 794     static lexer::lexer<IteratorT, PositionT> lexer;   // needed only once
 795
 796     bool at_eof;
 797
 798 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
 799     include_guards<token_type> guards;
 800 #endif
 801 };
 802
 803 template <typename IteratorT, typename PositionT>
 804 lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;
 805
 806 #undef T_EXTCHARLIT
 807 #undef T_EXTSTRINGLIT
 808 #undef T_EXTRAWSTRINGLIT
 809
 810 ///////////////////////////////////////////////////////////////////////////////
 811 //
 812 //  The 'new_lexer' function allows the opaque generation of a new lexer object.
 813 //  It is coupled to the iterator type to allow to decouple the lexer/iterator
 814 //  configurations at compile time.
 815 //
 816 //  This function is declared inside the cpp_slex_token.hpp file, which is
 817 //  referenced by the source file calling the lexer and the source file, which
 818 //  instantiates the lex_functor. But it is defined here, so it will be
 819 //  instantiated only while compiling the source file, which instantiates the
 820 //  lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
 821 //  this file (cpp_slex_lexer.hpp) should be included only once. This allows
 822 //  to decouple the lexer interface from the lexer implementation and reduces
 823 //  compilation time.
 824 //
 825 ///////////////////////////////////////////////////////////////////////////////
 826
 827 ///////////////////////////////////////////////////////////////////////////////
 828 //
 829 //  The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
 830 //  should be defined inline, if the lex_functor shouldn't be instantiated
 831 //  separately from the lex_iterator.
 832 //
 833 //  Separate (explicit) instantiation helps to reduce compilation time.
 834 //
 835 ///////////////////////////////////////////////////////////////////////////////
 836
 837 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
 838 #define BOOST_WAVE_SLEX_NEW_LEXER_INLINE
 839 #else
 840 #define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline
 841 #endif
 842
 843 template <typename IteratorT, typename PositionT>
 844 BOOST_WAVE_SLEX_NEW_LEXER_INLINE
 845 lex_input_interface<slex_token<PositionT> > *
 846 new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
 847     IteratorT const &last, PositionT const &pos,
 848     boost::wave::language_support language)
 849 {
 850     return new slex_functor<IteratorT, PositionT>(first, last, pos,
 851         language);
 852 }
 853
 854 #undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE
 855
 856 ///////////////////////////////////////////////////////////////////////////////
 857 }   // namespace slex
 858 }   // namespace cpplexer
 859 }   // namespace wave
 860 }   // namespace boost
 861
 862 #endif // !defined(BOOST_SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)