Boost.Wave: A Standard compliant C++ preprocessor library
SLex (Spirit Lex) based C++ lexer
-
+
http://www.boost.org/
- Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
- Software License, Version 1.0. (See accompanying file
+ Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
+ Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#define INIT_DATA_CPP0X_SIZE 15
///////////////////////////////////////////////////////////////////////////////
-//
+//
// encapsulation of the boost::spirit::classic::slex based cpp lexer
//
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
-// The following lexer_base class was necessary to workaround a CodeWarrior
+// The following lexer_base class was necessary to workaround a CodeWarrior
// bug (at least up to CW V9.5).
template <typename IteratorT, typename PositionT>
-class lexer_base
+class lexer_base
: public boost::spirit::classic::lexer<
boost::wave::util::position_iterator<IteratorT, PositionT> >
{
protected:
- typedef boost::wave::util::position_iterator<IteratorT, PositionT>
+ typedef boost::wave::util::position_iterator<IteratorT, PositionT>
iterator_type;
typedef typename std::iterator_traits<IteratorT>::value_type char_type;
typedef boost::spirit::classic::lexer<iterator_type> base_type;
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT>
-class lexer
+class lexer
: public lexer_base<IteratorT, PositionT>
{
public:
typedef boost::wave::cpplexer::slex_token<PositionT> token_type;
-
+
void init_dfa(boost::wave::language_support language);
// get time of last compilation
- static std::time_t get_compilation_time()
+ static std::time_t get_compilation_time()
{ return compilation_time.get_time(); }
// helper for calculation of the time of last compilation
#define BLANK "[ \\t]"
#define CCOMMENT \
Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
-
+
#define PPSPACE "(" BLANK OR CCOMMENT ")*"
#define OCTALDIGIT "[0-7]"
#define INTEGER \
"(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")"
-
+
#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
"(" "[lL][lL]" ")" "[uU]" "?" OR \
"i64" \
- ")"
+ ")"
#else
#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
"(" "[lL][lL]" ")" "[uU]" "?" ")"
#define UNIVERSALCHAR "(" BACKSLASH "(" \
"u" HEXQUAD OR \
"U" HEXQUAD HEXQUAD \
- "))"
+ "))"
#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
#define NEWLINEDEF "(" "\n" OR "\r" OR "\r\n" ")"
///////////////////////////////////////////////////////////////////////////////
// common C++/C99 token definitions
template <typename IteratorT, typename PositionT>
-typename lexer_base<IteratorT, PositionT>::lexer_data const
-lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] =
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] =
{
TOKEN_DATA(AND, "&"),
TOKEN_DATA(ANDAND, "&&"),
// TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
TOKEN_DATA(LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
TOKEN_DATA(INTLIT, INTEGER INTEGER_SUFFIX "?"),
- TOKEN_DATA(FLOATLIT,
- "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
+ TOKEN_DATA(FLOATLIT,
+ "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
EXPONENT "?" FLOAT_SUFFIX "?" OR
DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
TOKEN_DATA(CCOMMENT, CCOMMENT),
TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
- TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
+ TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
"(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
- TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
+ TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
"(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
#if BOOST_WAVE_USE_STRICT_LEXER != 0
TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
#endif
TOKEN_DATA(SPACE, "[ \t\v\f]+"),
// TOKEN_DATA(SPACE2, "[\\v\\f]+"),
- TOKEN_DATA(CONTLINE, Q("\\") "\n"),
+ TOKEN_DATA(CONTLINE, Q("\\") "\n"),
TOKEN_DATA(NEWLINE, NEWLINEDEF),
TOKEN_DATA(POUND_POUND, "##"),
TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
///////////////////////////////////////////////////////////////////////////////
// C++ only token definitions
template <typename IteratorT, typename PositionT>
-typename lexer_base<IteratorT, PositionT>::lexer_data const
-lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] =
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] =
{
TOKEN_DATA(AND_ALT, "bitand"),
TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
///////////////////////////////////////////////////////////////////////////////
// C++ only token definitions
template <typename IteratorT, typename PositionT>
-typename lexer_base<IteratorT, PositionT>::lexer_data const
-lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
{
TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF),
{ token_id(0) } // this should be the last entry
#define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
template <typename IteratorT, typename PositionT>
-typename lexer_base<IteratorT, PositionT>::lexer_data const
-lexer<IteratorT, PositionT>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
{
- TOKEN_DATA(EXTCHARLIT, EXTCHAR_SPEC "'"
+ TOKEN_DATA(EXTCHARLIT, EXTCHAR_SPEC "'"
"(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
- TOKEN_DATA(EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
+ TOKEN_DATA(EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
"(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
- TOKEN_DATA(RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
+ TOKEN_DATA(RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
"(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
- TOKEN_DATA(EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
+ TOKEN_DATA(EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
"(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
TOKEN_DATA(ALIGNAS, "alignas"),
TOKEN_DATA(ALIGNOF, "alignof"),
#undef INTEGER
#undef FLOAT_SUFFIX
#undef CHAR_SPEC
-#undef BACKSLASH
-#undef ESCAPESEQ
-#undef HEXQUAD
+#undef BACKSLASH
+#undef ESCAPESEQ
+#undef HEXQUAD
#undef UNIVERSALCHAR
#undef PP_NUMBERDEF
// initialize cpp lexer with token data
template <typename IteratorT, typename PositionT>
inline
-lexer_base<IteratorT, PositionT>::lexer_base()
+lexer_base<IteratorT, PositionT>::lexer_base()
: base_type(NUM_LEXER_STATES)
{
}
// if pp-numbers should be preferred, insert the corresponding rule first
if (boost::wave::need_prefer_pp_numbers(lang)) {
for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
- this->register_regex(init_data_pp_number[j].tokenregex,
- init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,
+ this->register_regex(init_data_pp_number[j].tokenregex,
+ init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,
init_data_pp_number[j].lexerstate);
}
}
-
-// if in C99 mode, some of the keywords are not valid
+
+// if in C99 mode, some of the keywords are not valid
if (!boost::wave::need_c99(lang)) {
for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
- this->register_regex(init_data_cpp[j].tokenregex,
- init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,
+ this->register_regex(init_data_cpp[j].tokenregex,
+ init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,
init_data_cpp[j].lexerstate);
}
}
-
+
// if in C++0x mode, add all new keywords
#if BOOST_WAVE_SUPPORT_CPP0X != 0
if (boost::wave::need_cpp0x(lang)) {
for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
- this->register_regex(init_data_cpp0x[j].tokenregex,
- init_data_cpp0x[j].tokenid, init_data_cpp0x[j].tokencb,
+ this->register_regex(init_data_cpp0x[j].tokenregex,
+ init_data_cpp0x[j].tokenid, init_data_cpp0x[j].tokencb,
init_data_cpp0x[j].lexerstate);
}
}
#endif
for (int i = 0; 0 != init_data[i].tokenid; ++i) {
- this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
+ this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
init_data[i].tokencb, init_data[i].lexerstate);
}
}
///////////////////////////////////////////////////////////////////////////////
// get time of last compilation of this file
template <typename IteratorT, typename PositionT>
-boost::wave::util::time_conversion_helper
+boost::wave::util::time_conversion_helper
lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
///////////////////////////////////////////////////////////////////////////////
} // namespace lexer
///////////////////////////////////////////////////////////////////////////////
-//
+//
template <typename IteratorT, typename PositionT>
-inline void
-init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,
+inline void
+init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,
boost::wave::language_support language, bool force_reinit = false)
{
if (lexer.has_compiled_dfa())
return; // nothing to do
-
+
using std::ifstream;
using std::ofstream;
using std::ios;
using std::cerr;
using std::endl;
-
+
ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);
lexer.init_dfa(language);
}
///////////////////////////////////////////////////////////////////////////////
-//
+//
// lex_functor
//
///////////////////////////////////////////////////////////////////////////////
template <typename IteratorT, typename PositionT = wave::util::file_position_type>
-class slex_functor
+class slex_functor
: public slex_input_interface<
typename lexer::lexer<IteratorT, PositionT>::token_type
>
typedef BOOST_WAVE_STRINGTYPE string_type;
typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;
- slex_functor(IteratorT const &first_, IteratorT const &last_,
+ slex_functor(IteratorT const &first_, IteratorT const &last_,
PositionT const &pos_, boost::wave::language_support language_)
: first(first_, last_, pos_), language(language_), at_eof(false)
{
// initialize lexer dfa tables
- init_lexer(lexer, language_);
+ init_lexer(lexer, language_);
}
virtual ~slex_functor() {}
string_type token_val(value.c_str());
if (boost::wave::need_emit_contnewlines(language) ||
- T_CONTLINE != id)
+ T_CONTLINE != id)
{
- // The cast should avoid spurious warnings about missing case labels
+ // The cast should avoid spurious warnings about missing case labels
// for the other token ids's.
- switch (static_cast<unsigned int>(id)) {
+ switch (id) {
case T_IDENTIFIER:
- // test identifier characters for validity (throws if
+ // test identifier characters for validity (throws if
// invalid chars found)
if (!boost::wave::need_no_character_validation(language)) {
using boost::wave::cpplexer::impl::validate_identifier_name;
- validate_identifier_name(token_val,
- pos.get_line(), pos.get_column(), pos.get_file());
+ validate_identifier_name(token_val,
+ pos.get_line(), pos.get_column(), pos.get_file());
}
break;
case T_CHARLIT:
case T_STRINGLIT:
case T_RAWSTRINGLIT:
- // test literal characters for validity (throws if invalid
+ // test literal characters for validity (throws if invalid
// chars found)
if (boost::wave::need_convert_trigraphs(language)) {
using boost::wave::cpplexer::impl::convert_trigraphs;
- token_val = convert_trigraphs(token_val);
+ token_val = convert_trigraphs(token_val);
}
if (!boost::wave::need_no_character_validation(language)) {
using boost::wave::cpplexer::impl::validate_literal;
- validate_literal(token_val,
- pos.get_line(), pos.get_column(), pos.get_file());
+ validate_literal(token_val,
+ pos.get_line(), pos.get_column(), pos.get_file());
}
break;
if (!boost::wave::need_long_long(language)) {
// syntax error: not allowed in C++ mode
BOOST_WAVE_LEXER_THROW(
- boost::wave::cpplexer::lexing_exception,
- invalid_long_long_literal, value.c_str(),
- pos.get_line(), pos.get_column(),
+ boost::wave::cpplexer::lexing_exception,
+ invalid_long_long_literal, value.c_str(),
+ pos.get_line(), pos.get_column(),
pos.get_file().c_str());
}
break;
case T_PP_INCLUDE:
// convert to the corresponding ..._next token, if appropriate
{
- // Skip '#' and whitespace and see whether we find an
+ // Skip '#' and whitespace and see whether we find an
// 'include_next' here.
typename string_type::size_type start = value.find("include");
if (0 == value.compare(start, 12, "include_next", 12))
#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
case T_EOF:
- // T_EOF is returned as a valid token, the next call will
+ // T_EOF is returned as a valid token, the next call will
// return T_EOI, i.e. the actual end of input
at_eof = true;
token_val.clear();
return result = token_type(); // return T_EOI
}
- void set_position(PositionT const &pos)
- {
+ void set_position(PositionT const &pos)
+ {
// set position has to change the file name and line number only
- first.get_position().set_file(pos.get_file());
- first.get_position().set_line(pos.get_line());
+ first.get_position().set_file(pos.get_file());
+ first.get_position().set_line(pos.get_line());
}
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
- bool has_include_guards(std::string& guard_name) const
+ bool has_include_guards(std::string& guard_name) const
{ return guards.detected(guard_name); }
#endif
iterator_type last;
boost::wave::language_support language;
static lexer::lexer<IteratorT, PositionT> lexer; // needed only once
-
+
bool at_eof;
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
template <typename IteratorT, typename PositionT>
lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;
-#undef T_EXTCHARLIT
-#undef T_EXTSTRINGLIT
+#undef T_EXTCHARLIT
+#undef T_EXTSTRINGLIT
#undef T_EXTRAWSTRINGLIT
///////////////////////////////////////////////////////////////////////////////
//
// The 'new_lexer' function allows the opaque generation of a new lexer object.
-// It is coupled to the iterator type to allow to decouple the lexer/iterator
+// It is coupled to the iterator type to allow to decouple the lexer/iterator
// configurations at compile time.
//
-// This function is declared inside the cpp_slex_token.hpp file, which is
+// This function is declared inside the cpp_slex_token.hpp file, which is
// referenced by the source file calling the lexer and the source file, which
-// instantiates the lex_functor. But it is defined here, so it will be
-// instantiated only while compiling the source file, which instantiates the
+// instantiates the lex_functor. But it is defined here, so it will be
+// instantiated only while compiling the source file, which instantiates the
// lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
// this file (cpp_slex_lexer.hpp) should be included only once. This allows
-// to decouple the lexer interface from the lexer implementation and reduces
+// to decouple the lexer interface from the lexer implementation and reduces
// compilation time.
//
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
-//
+//
// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
-// should be defined inline, if the lex_functor shouldn't be instantiated
+// should be defined inline, if the lex_functor shouldn't be instantiated
// separately from the lex_iterator.
//
// Separate (explicit) instantiation helps to reduce compilation time.
#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE
#else
#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline
-#endif
+#endif
template <typename IteratorT, typename PositionT>
BOOST_WAVE_SLEX_NEW_LEXER_INLINE
lex_input_interface<slex_token<PositionT> > *
new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
- IteratorT const &last, PositionT const &pos,
+ IteratorT const &last, PositionT const &pos,
boost::wave::language_support language)
{
- return new slex_functor<IteratorT, PositionT>(first, last, pos,
+ return new slex_functor<IteratorT, PositionT>(first, last, pos,
language);
}
} // namespace cpplexer
} // namespace wave
} // namespace boost
-
+
#endif // !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)