/*============================================================================= Boost.Wave: A Standard compliant C++ preprocessor library Sample: IDL lexer http://www.boost.org/ Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #include #include #include #include #include #include #include #include #if defined(BOOST_HAS_UNISTD_H) #include #else #include #endif #include #include // reuse the token ids and re2c helper functions from the default C++ lexer #include #include #include #include #include "idl_re.hpp" #if defined(_MSC_VER) && !defined(__COMO__) #pragma warning (disable: 4101) // 'foo' : unreferenced local variable #pragma warning (disable: 4102) // 'foo' : unreferenced label #endif #define BOOST_WAVE_BSIZE 196608 #define YYCTYPE uchar #define YYCURSOR cursor #define YYLIMIT s->lim #define YYMARKER s->ptr #define YYFILL(n) {cursor = fill(s, cursor);} //#define BOOST_WAVE_RET(i) {s->cur = cursor; return (i);} #define BOOST_WAVE_RET(i) \ { \ s->line += count_backslash_newlines(s, cursor); \ s->cur = cursor; \ return (i); \ } \ /**/ /////////////////////////////////////////////////////////////////////////////// namespace boost { namespace wave { namespace idllexer { namespace re2clex { #define RE2C_ASSERT BOOST_ASSERT int get_one_char(boost::wave::cpplexer::re2clex::Scanner *s) { using namespace boost::wave::cpplexer::re2clex; if (0 != s->act) { RE2C_ASSERT(s->first != 0 && s->last != 0); RE2C_ASSERT(s->first <= s->act && s->act <= s->last); if (s->act < s->last) return *(s->act)++; } return -1; } std::ptrdiff_t rewind_stream (boost::wave::cpplexer::re2clex::Scanner *s, int cnt) { if (0 != s->act) { RE2C_ASSERT(s->first != 0 && s->last != 0); s->act += cnt; RE2C_ASSERT(s->first <= s->act && s->act <= s->last); return s->act - s->first; } return 0; } std::size_t get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner* s) { if (!AQ_EMPTY(s->eol_offsets)) { return s->eol_offsets->queue[s->eol_offsets->head]; } else { return (unsigned int)-1; } } void adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner* s, std::size_t adjustment) { boost::wave::cpplexer::re2clex::aq_queue q; std::size_t i; if (!s->eol_offsets) s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create(); q = s->eol_offsets; if (AQ_EMPTY(q)) return; i = q->head; while (i != q->tail) { if (adjustment > q->queue[i]) q->queue[i] = 0; else q->queue[i] -= adjustment; ++i; if (i == q->max_size) i = 0; } if (adjustment > q->queue[i]) q->queue[i] = 0; else q->queue[i] -= adjustment; } int count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner *s, boost::wave::cpplexer::re2clex::uchar *cursor) { using namespace boost::wave::cpplexer::re2clex; std::size_t diff, offset; int skipped = 0; /* figure out how many backslash-newlines skipped over unknowingly. */ diff = cursor - s->bot; offset = get_first_eol_offset(s); while (offset <= diff && offset != (unsigned int)-1) { skipped++; boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets); offset = get_first_eol_offset(s); } return skipped; } bool is_backslash( boost::wave::cpplexer::re2clex::uchar *p, boost::wave::cpplexer::re2clex::uchar *end, int &len) { if (*p == '\\') { len = 1; return true; } else if (*p == '?' && *(p+1) == '?' && (p+2 < end && *(p+2) == '/')) { len = 3; return true; } return false; } boost::wave::cpplexer::re2clex::uchar * fill(boost::wave::cpplexer::re2clex::Scanner *s, boost::wave::cpplexer::re2clex::uchar *cursor) { using namespace std; // some systems have memcpy etc. in namespace std using namespace boost::wave::cpplexer::re2clex; if(!s->eof) { uchar* p; std::ptrdiff_t cnt = s->tok - s->bot; if(cnt) { memcpy(s->bot, s->tok, s->lim - s->tok); s->tok = s->bot; s->ptr -= cnt; cursor -= cnt; s->lim -= cnt; adjust_eol_offsets(s, cnt); } if((s->top - s->lim) < BOOST_WAVE_BSIZE) { uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar)); if (buf == 0) { using namespace std; // some systems have printf in std if (0 != s->error_proc) { (*s->error_proc)(s, cpplexer::lexing_exception::unexpected_error, "Out of memory!"); } else printf("Out of memory!\n"); /* get the scanner to stop */ *cursor = 0; return cursor; } memcpy(buf, s->tok, s->lim - s->tok); s->tok = buf; s->ptr = &buf[s->ptr - s->bot]; cursor = &buf[cursor - s->bot]; s->lim = &buf[s->lim - s->bot]; s->top = &s->lim[BOOST_WAVE_BSIZE]; free(s->bot); s->bot = buf; } if (s->act != 0) { cnt = s->last - s->act; if (cnt > BOOST_WAVE_BSIZE) cnt = BOOST_WAVE_BSIZE; memcpy(s->lim, s->act, cnt); s->act += cnt; if (cnt != BOOST_WAVE_BSIZE) { s->eof = &s->lim[cnt]; *(s->eof)++ = '\0'; } } /* backslash-newline erasing time */ /* first scan for backslash-newline and erase them */ for (p = s->lim; p < s->lim + cnt - 2; ++p) { int len = 0; if (is_backslash(p, s->lim + cnt, len)) { if (*(p+len) == '\n') { int offset = len + 1; memmove(p, p + offset, s->lim + cnt - p - offset); cnt -= offset; --p; aq_enqueue(s->eol_offsets, p - s->bot + 1); } else if (*(p+len) == '\r') { if (*(p+len+1) == '\n') { int offset = len + 2; memmove(p, p + offset, s->lim + cnt - p - offset); cnt -= offset; --p; } else { int offset = len + 1; memmove(p, p + offset, s->lim + cnt - p - offset); cnt -= offset; --p; } aq_enqueue(s->eol_offsets, p - s->bot + 1); } } } /* FIXME: the following code should be fixed to recognize correctly the trigraph backslash token */ /* check to see if what we just read ends in a backslash */ if (cnt >= 2) { uchar last = s->lim[cnt-1]; uchar last2 = s->lim[cnt-2]; /* check \ EOB */ if (last == '\\') { int next = get_one_char(s); /* check for \ \n or \ \r or \ \r \n straddling the border */ if (next == '\n') { --cnt; /* chop the final \, we've already read the \n. */ boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot)); } else if (next == '\r') { int next2 = get_one_char(s); if (next2 == '\n') { --cnt; /* skip the backslash */ } else { /* rewind one, and skip one char */ rewind_stream(s, -1); --cnt; } boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot)); } else if (next != -1) /* -1 means end of file */ { /* next was something else, so rewind the stream */ rewind_stream(s, -1); } } /* check \ \r EOB */ else if (last == '\r' && last2 == '\\') { int next = get_one_char(s); if (next == '\n') { cnt -= 2; /* skip the \ \r */ } else { /* rewind one, and skip two chars */ rewind_stream(s, -1); cnt -= 2; } boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot)); } /* check \ \n EOB */ else if (last == '\n' && last2 == '\\') { cnt -= 2; boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot)); } } s->lim += cnt; if (s->eof) /* eof needs adjusting if we erased backslash-newlines */ { s->eof = s->lim; *(s->eof)++ = '\0'; } } return cursor; } boost::wave::token_id scan(boost::wave::cpplexer::re2clex::Scanner *s) { using namespace boost::wave::cpplexer::re2clex; uchar *cursor = s->tok = s->cur; /*!re2c re2c:indent:string = " "; any = [\t\v\f\r\n\040-\377]; anyctrl = [\000-\377]; OctalDigit = [0-7]; Digit = [0-9]; HexDigit = [a-fA-F0-9]; ExponentPart = [Ee] [+-]? Digit+; FractionalConstant = (Digit* "." Digit+) | (Digit+ "."); FloatingSuffix = [fF][lL]?|[lL][fF]?; IntegerSuffix = [uU][lL]?|[lL][uU]?; FixedPointSuffix = [dD]; Backslash = [\\]|"??/"; EscapeSequence = Backslash ([abfnrtv?'"] | Backslash | "x" HexDigit+ | OctalDigit OctalDigit? OctalDigit?); HexQuad = HexDigit HexDigit HexDigit HexDigit; UniversalChar = Backslash ("u" HexQuad | "U" HexQuad HexQuad); Newline = "\r\n" | "\n" | "\r"; PPSpace = ([ \t]|("/*"(any\[*]|Newline|("*"+(any\[*/]|Newline)))*"*"+"/"))*; Pound = "#" | "??=" | "%:"; */ /*!re2c "/*" { goto ccomment; } "//" { goto cppcomment; } "TRUE" { BOOST_WAVE_RET(T_TRUE); } "FALSE" { BOOST_WAVE_RET(T_FALSE); } "{" { BOOST_WAVE_RET(T_LEFTBRACE); } "}" { BOOST_WAVE_RET(T_RIGHTBRACE); } "[" { BOOST_WAVE_RET(T_LEFTBRACKET); } "]" { BOOST_WAVE_RET(T_RIGHTBRACKET); } "#" { BOOST_WAVE_RET(T_POUND); } "##" { BOOST_WAVE_RET(T_POUND_POUND); } "(" { BOOST_WAVE_RET(T_LEFTPAREN); } ")" { BOOST_WAVE_RET(T_RIGHTPAREN); } ";" { BOOST_WAVE_RET(T_SEMICOLON); } ":" { BOOST_WAVE_RET(T_COLON); } "?" { BOOST_WAVE_RET(T_QUESTION_MARK); } "." { BOOST_WAVE_RET(T_DOT); } "+" { BOOST_WAVE_RET(T_PLUS); } "-" { BOOST_WAVE_RET(T_MINUS); } "*" { BOOST_WAVE_RET(T_STAR); } "/" { BOOST_WAVE_RET(T_DIVIDE); } "%" { BOOST_WAVE_RET(T_PERCENT); } "^" { BOOST_WAVE_RET(T_XOR); } "&" { BOOST_WAVE_RET(T_AND); } "|" { BOOST_WAVE_RET(T_OR); } "~" { BOOST_WAVE_RET(T_COMPL); } "!" { BOOST_WAVE_RET(T_NOT); } "=" { BOOST_WAVE_RET(T_ASSIGN); } "<" { BOOST_WAVE_RET(T_LESS); } ">" { BOOST_WAVE_RET(T_GREATER); } "<<" { BOOST_WAVE_RET(T_SHIFTLEFT); } ">>" { BOOST_WAVE_RET(T_SHIFTRIGHT); } "==" { BOOST_WAVE_RET(T_EQUAL); } "!=" { BOOST_WAVE_RET(T_NOTEQUAL); } "<=" { BOOST_WAVE_RET(T_LESSEQUAL); } ">=" { BOOST_WAVE_RET(T_GREATEREQUAL); } "&&" { BOOST_WAVE_RET(T_ANDAND); } "||" { BOOST_WAVE_RET(T_OROR); } "++" { BOOST_WAVE_RET(T_PLUSPLUS); } "--" { BOOST_WAVE_RET(T_MINUSMINUS); } "," { BOOST_WAVE_RET(T_COMMA); } ([a-zA-Z_] | UniversalChar) ([a-zA-Z_0-9] | UniversalChar)* { BOOST_WAVE_RET(T_IDENTIFIER); } (("0" [xX] HexDigit+) | ("0" OctalDigit*) | ([1-9] Digit*)) IntegerSuffix? { BOOST_WAVE_RET(T_INTLIT); } ((FractionalConstant ExponentPart?) | (Digit+ ExponentPart)) FloatingSuffix? { BOOST_WAVE_RET(T_FLOATLIT); } (FractionalConstant | Digit+) FixedPointSuffix { BOOST_WAVE_RET(T_FIXEDPOINTLIT); } "L"? (['] (EscapeSequence|any\[\n\r\\']|UniversalChar)+ [']) { BOOST_WAVE_RET(T_CHARLIT); } "L"? (["] (EscapeSequence|any\[\n\r\\"]|UniversalChar)* ["]) { BOOST_WAVE_RET(T_STRINGLIT); } Pound PPSpace "include" PPSpace "<" (any\[\n\r>])+ ">" { BOOST_WAVE_RET(T_PP_HHEADER); } Pound PPSpace "include" PPSpace "\"" (any\[\n\r"])+ "\"" { BOOST_WAVE_RET(T_PP_QHEADER); } Pound PPSpace "include" PPSpace { BOOST_WAVE_RET(T_PP_INCLUDE); } Pound PPSpace "if" { BOOST_WAVE_RET(T_PP_IF); } Pound PPSpace "ifdef" { BOOST_WAVE_RET(T_PP_IFDEF); } Pound PPSpace "ifndef" { BOOST_WAVE_RET(T_PP_IFNDEF); } Pound PPSpace "else" { BOOST_WAVE_RET(T_PP_ELSE); } Pound PPSpace "elif" { BOOST_WAVE_RET(T_PP_ELIF); } Pound PPSpace "endif" { BOOST_WAVE_RET(T_PP_ENDIF); } Pound PPSpace "define" { BOOST_WAVE_RET(T_PP_DEFINE); } Pound PPSpace "undef" { BOOST_WAVE_RET(T_PP_UNDEF); } Pound PPSpace "line" { BOOST_WAVE_RET(T_PP_LINE); } Pound PPSpace "error" { BOOST_WAVE_RET(T_PP_ERROR); } Pound PPSpace "pragma" { BOOST_WAVE_RET(T_PP_PRAGMA); } Pound PPSpace "warning" { BOOST_WAVE_RET(T_PP_WARNING); } [ \t\v\f]+ { BOOST_WAVE_RET(T_SPACE); } Newline { s->line++; BOOST_WAVE_RET(T_NEWLINE); } "\000" { if(cursor != s->eof) { using namespace std; // some systems have printf in std if (0 != s->error_proc) { (*s->error_proc)(s, cpplexer::lexing_exception::generic_lexing_error, "'\\000' in input stream"); } else printf("Error: 0 in file\n"); } BOOST_WAVE_RET(T_EOF); } anyctrl { BOOST_WAVE_RET(TOKEN_FROM_ID(*s->tok, UnknownTokenType)); } */ ccomment: /*!re2c "*/" { BOOST_WAVE_RET(T_CCOMMENT); } Newline { /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF);*/ /*s->tok = cursor; */ s->line += count_backslash_newlines(s, cursor) +1; goto ccomment; } any { goto ccomment; } "\000" { using namespace std; // some systems have printf in std if(cursor == s->eof) { if (s->error_proc) (*s->error_proc)(s, cpplexer::lexing_exception::generic_lexing_warning, "Unterminated comment"); else printf("Error: Unterminated comment\n"); } else { if (s->error_proc) (*s->error_proc)(s, cpplexer::lexing_exception::generic_lexing_error, "'\\000' in input stream"); else printf("Error: 0 in file"); } /* adjust cursor such next call returns T_EOF */ --YYCURSOR; /* the comment is unterminated, but nevertheless its a comment */ BOOST_WAVE_RET(T_CCOMMENT); } anyctrl { if (s->error_proc) (*s->error_proc)(s, cpplexer::lexing_exception::generic_lexing_error, "invalid character in input stream"); else printf("Error: 0 in file"); } */ cppcomment: /*!re2c Newline { /*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF); */ /*s->tok = cursor; */ s->line++; BOOST_WAVE_RET(T_CPPCOMMENT); } any { goto cppcomment; } "\000" { using namespace std; // some systems have printf in std if(cursor != s->eof) { if (s->error_proc) (*s->error_proc)(s, cpplexer::lexing_exception::generic_lexing_error, "'\\000' in input stream"); else printf("Error: 0 in file"); } /* adjust cursor such next call returns T_EOF */ --YYCURSOR; /* the comment is unterminated, but nevertheless its a comment */ BOOST_WAVE_RET(T_CPPCOMMENT); } */ } /* end of scan */ #undef RE2C_ASSERT /////////////////////////////////////////////////////////////////////////////// } // namespace re2clex } // namespace idllexer } // namespace wave } // namespace boost