]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/wave/samples/token_statistics/xlex/xlex_lexer.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / wave / samples / token_statistics / xlex / xlex_lexer.hpp
1 /*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Xpressive based C++ lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12
13 #if !defined(XLEX_LEXER_HPP)
14 #define XLEX_LEXER_HPP
15
16 #include <string>
17 #include <cstdio>
18 #include <cstdarg>
19 #if defined(BOOST_SPIRIT_DEBUG)
20 #include <iostream>
21 #endif // defined(BOOST_SPIRIT_DEBUG)
22
23 #include <boost/concept_check.hpp>
24 #include <boost/assert.hpp>
25 #include <boost/spirit/include/classic_core.hpp>
26
27 #include <boost/wave/token_ids.hpp>
28 #include <boost/wave/language_support.hpp>
29 #include <boost/wave/util/file_position.hpp>
30 #include <boost/wave/cpplexer/validate_universal_char.hpp>
31 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
32 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
33 #include <boost/wave/cpplexer/detect_include_guards.hpp>
34 #endif
35 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
36
37 // reuse the default token type
38 #include "../xlex_iterator.hpp"
39
40 // include the xpressive headers
41 #include "xpressive_lexer.hpp"
42
43 ///////////////////////////////////////////////////////////////////////////////
44 namespace boost {
45 namespace wave {
46 namespace cpplexer {
47 namespace xlex {
48 namespace lexer {
49
50 ///////////////////////////////////////////////////////////////////////////////
51 //
52 // encapsulation of the xpressive based C++ lexer
53 //
54 ///////////////////////////////////////////////////////////////////////////////
55
56 template <
57 typename Iterator,
58 typename Position = boost::wave::util::file_position_type
59 >
60 class lexer
61 {
62 public:
63 typedef char char_type;
64 typedef boost::wave::cpplexer::lex_token<Position> token_type;
65 typedef typename token_type::string_type string_type;
66
67 lexer(Iterator const &first, Iterator const &last,
68 Position const &pos, boost::wave::language_support language);
69 ~lexer() {}
70
71 token_type& get(token_type& t);
72 void set_position(Position const &pos)
73 {
74 // set position has to change the file name and line number only
75 filename = pos.get_file();
76 line = pos.get_line();
77 }
78
79 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
80 bool has_include_guards(std::string& guard_name) const
81 { return guards.detected(guard_name); }
82 #endif
83
84 private:
85 typedef xpressive_lexer<Iterator, token_id> lexer_type;
86 typedef typename lexer_type::callback_type callback_type;
87
88 lexer_type xlexer;
89 Iterator first;
90 Iterator last;
91
92 string_type filename;
93 int line;
94 bool at_eof;
95 boost::wave::language_support language;
96
97 // initialization data (regular expressions for the token definitions)
98 struct lexer_data {
99 token_id tokenid; // token data
100 char_type const *tokenregex; // associated token to match
101 callback_type tokencb; // associated callback function
102 };
103
104 static lexer_data const init_data[]; // common patterns
105 static lexer_data const init_data_cpp[]; // C++ only patterns
106
107 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
108 boost::wave::cpplexer::include_guards<token_type> guards;
109 #endif
110 };
111
112 ///////////////////////////////////////////////////////////////////////////////
113 // helper for initializing token data
114 #define TOKEN_DATA(id, regex) \
115 { id, regex, 0 }
116
117 #define TOKEN_DATA_EX(id, regex, callback) \
118 { id, regex, callback }
119
120 ///////////////////////////////////////////////////////////////////////////////
121 // data required for initialization of the lexer (token definitions)
122 #define OR "|"
123 #define Q(c) "\\" c
124 #define TRI(c) Q("?") Q("?") c
125
126 // definition of some subtoken regexps to simplify the regex definitions
127 #define BLANK "[ \t]"
128 #define CCOMMENT Q("/") Q("*") ".*?" Q("*") Q("/")
129
130 #define PPSPACE "(" BLANK OR CCOMMENT ")*"
131
132 #define OCTALDIGIT "[0-7]"
133 #define DIGIT "[0-9]"
134 #define HEXDIGIT "[0-9a-fA-F]"
135 #define SIGN "[-+]?"
136 #define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")"
137
138 #define INTEGER "(" \
139 "(0x|0X)" HEXDIGIT "+" OR \
140 "0" OCTALDIGIT "*" OR \
141 "[1-9]" DIGIT "*" \
142 ")"
143
144 #define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
145 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
146 #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
147 "(" "[lL][lL]" ")" "[uU]" "?" OR \
148 "i64" \
149 ")"
150 #else
151 #define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
152 "(" "[lL][lL]" ")" "[uU]" "?" ")"
153 #endif
154 #define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")"
155 #define CHAR_SPEC "L?"
156
157 #define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
158 #define ESCAPESEQ BACKSLASH "(" \
159 "[abfnrtv?'\"]" OR \
160 BACKSLASH OR \
161 "x" HEXDIGIT "+" OR \
162 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
163 ")"
164 #define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
165 #define UNIVERSALCHAR BACKSLASH "(" \
166 "u" HEXQUAD OR \
167 "U" HEXQUAD HEXQUAD \
168 ")"
169
170 #define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
171 #define NEWLINEDEF "(" "\n" OR "\r\n" OR "\r" ")"
172
173 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
174 #define INCLUDEDEF "(include_next|include)"
175 #else
176 #define INCLUDEDEF "include"
177 #endif
178
179 ///////////////////////////////////////////////////////////////////////////////
180 // common C++/C99 token definitions
181 template <typename Iterator, typename Position>
182 typename lexer<Iterator, Position>::lexer_data const
183 lexer<Iterator, Position>::init_data[] =
184 {
185 TOKEN_DATA(T_CCOMMENT, CCOMMENT),
186 TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ),
187 TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'"
188 "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"),
189 TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"")
190 "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
191 TOKEN_DATA(T_ANDAND, "&&"),
192 TOKEN_DATA(T_ANDASSIGN, "&="),
193 TOKEN_DATA(T_AND, "&"),
194 TOKEN_DATA(T_EQUAL, "=="),
195 TOKEN_DATA(T_ASSIGN, "="),
196 TOKEN_DATA(T_ORASSIGN, Q("|=")),
197 TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")),
198 TOKEN_DATA(T_OROR, Q("|") Q("|")),
199 TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
200 TOKEN_DATA(T_OR, Q("|")),
201 TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")),
202 TOKEN_DATA(T_XORASSIGN, Q("^=")),
203 TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")),
204 TOKEN_DATA(T_XOR, Q("^")),
205 TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")),
206 TOKEN_DATA(T_COMMA, ","),
207 TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
208 TOKEN_DATA(T_COLON, ":"),
209 TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
210 TOKEN_DATA(T_DIVIDE, Q("/")),
211 TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")),
212 TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
213 TOKEN_DATA(T_SHIFTRIGHT, ">>"),
214 TOKEN_DATA(T_GREATEREQUAL, ">="),
215 TOKEN_DATA(T_GREATER, ">"),
216 TOKEN_DATA(T_LEFTBRACE, Q("{")),
217 TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
218 TOKEN_DATA(T_SHIFTLEFT, "<<"),
219 TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
220 TOKEN_DATA(T_LESSEQUAL, "<="),
221 TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
222 TOKEN_DATA(T_LESS, "<"),
223 TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")),
224 TOKEN_DATA(T_LEFTPAREN, Q("(")),
225 TOKEN_DATA(T_LEFTBRACKET, Q("[")),
226 TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
227 TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")),
228 TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
229 TOKEN_DATA(T_ARROW, Q("->")),
230 TOKEN_DATA(T_MINUS, Q("-")),
231 TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
232 TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
233 TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
234 TOKEN_DATA(T_POUND_ALT, Q("%:")),
235 TOKEN_DATA(T_PERCENT, Q("%")),
236 TOKEN_DATA(T_NOTEQUAL, "!="),
237 TOKEN_DATA(T_NOT, "!"),
238 TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
239 TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")),
240 TOKEN_DATA(T_PLUS, Q("+")),
241 TOKEN_DATA(T_RIGHTBRACE, Q("}")),
242 TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")),
243 TOKEN_DATA(T_RIGHTPAREN, Q(")")),
244 TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
245 TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
246 TOKEN_DATA(T_SEMICOLON, ";"),
247 TOKEN_DATA(T_STARASSIGN, Q("*=")),
248 TOKEN_DATA(T_STAR, Q("*")),
249 TOKEN_DATA(T_COMPL, Q("~")),
250 TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")),
251 TOKEN_DATA(T_ASM, "asm"),
252 TOKEN_DATA(T_AUTO, "auto"),
253 TOKEN_DATA(T_BOOL, "bool"),
254 TOKEN_DATA(T_FALSE, "false"),
255 TOKEN_DATA(T_TRUE, "true"),
256 TOKEN_DATA(T_BREAK, "break"),
257 TOKEN_DATA(T_CASE, "case"),
258 TOKEN_DATA(T_CATCH, "catch"),
259 TOKEN_DATA(T_CHAR, "char"),
260 TOKEN_DATA(T_CLASS, "class"),
261 TOKEN_DATA(T_CONSTCAST, "const_cast"),
262 TOKEN_DATA(T_CONST, "const"),
263 TOKEN_DATA(T_CONTINUE, "continue"),
264 TOKEN_DATA(T_DEFAULT, "default"),
265 TOKEN_DATA(T_DELETE, "delete"),
266 TOKEN_DATA(T_DOUBLE, "double"),
267 TOKEN_DATA(T_DO, "do"),
268 TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
269 TOKEN_DATA(T_ELSE, "else"),
270 TOKEN_DATA(T_ENUM, "enum"),
271 TOKEN_DATA(T_EXPLICIT, "explicit"),
272 TOKEN_DATA(T_EXPORT, "export"),
273 TOKEN_DATA(T_EXTERN, "extern"),
274 TOKEN_DATA(T_FLOAT, "float"),
275 TOKEN_DATA(T_FOR, "for"),
276 TOKEN_DATA(T_FRIEND, "friend"),
277 TOKEN_DATA(T_GOTO, "goto"),
278 TOKEN_DATA(T_IF, "if"),
279 TOKEN_DATA(T_INLINE, "inline"),
280 TOKEN_DATA(T_INT, "int"),
281 TOKEN_DATA(T_LONG, "long"),
282 TOKEN_DATA(T_MUTABLE, "mutable"),
283 TOKEN_DATA(T_NAMESPACE, "namespace"),
284 TOKEN_DATA(T_NEW, "new"),
285 TOKEN_DATA(T_OPERATOR, "operator"),
286 TOKEN_DATA(T_PRIVATE, "private"),
287 TOKEN_DATA(T_PROTECTED, "protected"),
288 TOKEN_DATA(T_PUBLIC, "public"),
289 TOKEN_DATA(T_REGISTER, "register"),
290 TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
291 TOKEN_DATA(T_RETURN, "return"),
292 TOKEN_DATA(T_SHORT, "short"),
293 TOKEN_DATA(T_SIGNED, "signed"),
294 TOKEN_DATA(T_SIZEOF, "sizeof"),
295 TOKEN_DATA(T_STATICCAST, "static_cast"),
296 TOKEN_DATA(T_STATIC, "static"),
297 TOKEN_DATA(T_STRUCT, "struct"),
298 TOKEN_DATA(T_SWITCH, "switch"),
299 TOKEN_DATA(T_TEMPLATE, "template"),
300 TOKEN_DATA(T_THIS, "this"),
301 TOKEN_DATA(T_THROW, "throw"),
302 TOKEN_DATA(T_TRY, "try"),
303 TOKEN_DATA(T_TYPEDEF, "typedef"),
304 TOKEN_DATA(T_TYPEID, "typeid"),
305 TOKEN_DATA(T_TYPENAME, "typename"),
306 TOKEN_DATA(T_UNION, "union"),
307 TOKEN_DATA(T_UNSIGNED, "unsigned"),
308 TOKEN_DATA(T_USING, "using"),
309 TOKEN_DATA(T_VIRTUAL, "virtual"),
310 TOKEN_DATA(T_VOID, "void"),
311 TOKEN_DATA(T_VOLATILE, "volatile"),
312 TOKEN_DATA(T_WCHART, "wchar_t"),
313 TOKEN_DATA(T_WHILE, "while"),
314 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
315 TOKEN_DATA(T_MSEXT_INT8, "__int8"),
316 TOKEN_DATA(T_MSEXT_INT16, "__int16"),
317 TOKEN_DATA(T_MSEXT_INT32, "__int32"),
318 TOKEN_DATA(T_MSEXT_INT64, "__int64"),
319 TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
320 TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
321 TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
322 TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
323 TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
324 TOKEN_DATA(T_MSEXT_TRY , "__try"),
325 TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
326 TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
327 TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
328 TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
329 TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
330 TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
331 TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
332 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
333 TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"),
334 TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
335 TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
336 TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"),
337 TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"),
338 TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"),
339 TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"),
340 TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"),
341 TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \
342 INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")),
343 TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \
344 INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"),
345 TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \
346 INCLUDEDEF PPSPACE),
347 TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"),
348 TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
349 TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"),
350 TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"),
351 TOKEN_DATA(T_FLOATLIT,
352 "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
353 EXPONENT "?" FLOAT_SUFFIX "?" OR
354 DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
355 TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
356 TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"),
357 #if BOOST_WAVE_USE_STRICT_LEXER != 0
358 TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
359 #else
360 TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
361 #endif
362 TOKEN_DATA(T_SPACE, BLANK "+"),
363 TOKEN_DATA(T_SPACE2, "[\v\f]+"),
364 TOKEN_DATA(T_CONTLINE, Q("\\") "\n"),
365 TOKEN_DATA(T_NEWLINE, NEWLINEDEF),
366 TOKEN_DATA(T_POUND_POUND, "##"),
367 TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
368 TOKEN_DATA(T_POUND, "#"),
369 TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")),
370 TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))),
371 TOKEN_DATA(T_QUESTION_MARK, Q("?")),
372 TOKEN_DATA(T_DOT, Q(".")),
373 TOKEN_DATA(T_ANY, "."),
374 { token_id(0) } // this should be the last entry
375 };
376
377 ///////////////////////////////////////////////////////////////////////////////
378 // C++ only token definitions
379 template <typename Iterator, typename Position>
380 typename lexer<Iterator, Position>::lexer_data const
381 lexer<Iterator, Position>::init_data_cpp[] =
382 {
383 TOKEN_DATA(T_AND_ALT, "bitand"),
384 TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
385 TOKEN_DATA(T_ANDAND_ALT, "and"),
386 TOKEN_DATA(T_OR_ALT, "bitor"),
387 TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
388 TOKEN_DATA(T_OROR_ALT, "or"),
389 TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
390 TOKEN_DATA(T_XOR_ALT, "xor"),
391 TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
392 TOKEN_DATA(T_NOT_ALT, "not"),
393 TOKEN_DATA(T_COMPL_ALT, "compl"),
394 TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
395 TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
396 TOKEN_DATA(T_COLON_COLON, "::"),
397 { token_id(0) } // this should be the last entry
398 };
399
400 ///////////////////////////////////////////////////////////////////////////////
401 // undefine macros, required for regular expression definitions
402 #undef INCLUDEDEF
403 #undef POUNDDEF
404 #undef CCOMMENT
405 #undef PPSPACE
406 #undef DIGIT
407 #undef OCTALDIGIT
408 #undef HEXDIGIT
409 #undef SIGN
410 #undef EXPONENT
411 #undef LONGINTEGER_SUFFIX
412 #undef INTEGER_SUFFIX
413 #undef INTEGER
414 #undef FLOAT_SUFFIX
415 #undef CHAR_SPEC
416 #undef BACKSLASH
417 #undef ESCAPESEQ
418 #undef HEXQUAD
419 #undef UNIVERSALCHAR
420
421 #undef Q
422 #undef TRI
423 #undef OR
424
425 #undef TOKEN_DATA
426 #undef TOKEN_DATA_EX
427
428 ///////////////////////////////////////////////////////////////////////////////
429 // initialize cpp lexer
430 template <typename Iterator, typename Position>
431 inline
432 lexer<Iterator, Position>::lexer(Iterator const &first,
433 Iterator const &last, Position const &pos,
434 boost::wave::language_support language)
435 : first(first), last(last),
436 filename(pos.get_file()), line(0), at_eof(false), language(language)
437 {
438 // if in C99 mode, some of the keywords/operators are not valid
439 if (!boost::wave::need_c99(language)) {
440 for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
441 xlexer.register_regex(init_data_cpp[j].tokenregex,
442 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb);
443 }
444 }
445
446 // tokens valid for C++ and C99
447 for (int i = 0; 0 != init_data[i].tokenid; ++i) {
448 xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid,
449 init_data[i].tokencb);
450 }
451 }
452
453 ///////////////////////////////////////////////////////////////////////////////
454 // get the next token from the input stream
455 template <typename Iterator, typename Position>
456 inline boost::wave::cpplexer::lex_token<Position>&
457 lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t)
458 {
459 using namespace boost::wave; // to import token ids to this scope
460
461 if (at_eof)
462 return t = cpplexer::lex_token<Position>(); // return T_EOI
463
464 std::string tokval;
465 token_id id = xlexer.next_token(first, last, tokval);
466 string_type value = tokval.c_str();
467
468 if ((token_id)(-1) == id)
469 id = T_EOF; // end of input reached
470
471 if (T_IDENTIFIER == id) {
472 // test identifier characters for validity (throws if invalid chars found)
473 if (!boost::wave::need_no_character_validation(language)) {
474 cpplexer::impl::validate_identifier_name(value, line, -1, filename);
475 }
476 }
477 else if (T_STRINGLIT == id || T_CHARLIT == id) {
478 // test literal characters for validity (throws if invalid chars found)
479 if (!boost::wave::need_no_character_validation(language)) {
480 cpplexer::impl::validate_literal(value, line, -1, filename);
481 }
482 }
483 else if (T_EOF == id) {
484 // T_EOF is returned as a valid token, the next call will return T_EOI,
485 // i.e. the actual end of input
486 at_eof = true;
487 value.clear();
488 }
489
490 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
491 cpplexer::lex_token<Position> tok(id, value, Position(filename, line, -1));
492 return t = guards.detect_guard(tok);
493 #else
494 return t = cpplexer::lex_token<Position>(id, value,
495 Position(filename, line, -1));
496 #endif
497 }
498
499 ///////////////////////////////////////////////////////////////////////////////
500 //
501 // lex_functor
502 //
503 ///////////////////////////////////////////////////////////////////////////////
504 template <
505 typename Iterator,
506 typename Position = boost::wave::util::file_position_type
507 >
508 class xlex_functor
509 : public xlex_input_interface<typename lexer<Iterator, Position>::token_type>
510 {
511 public:
512
513 typedef typename lexer<Iterator, Position>::token_type token_type;
514
515 xlex_functor(Iterator const &first, Iterator const &last,
516 Position const &pos, boost::wave::language_support language)
517 : lexer_(first, last, pos, language)
518 {}
519 virtual ~xlex_functor() {}
520
521 // get the next token from the input stream
522 token_type& get(token_type& t) { return lexer_.get(t); }
523 void set_position(Position const &pos) { lexer_.set_position(pos); }
524
525 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
526 bool has_include_guards(std::string& guard_name) const
527 { return lexer_.has_include_guards(guard_name); }
528 #endif
529
530 private:
531 lexer<Iterator, Position> lexer_;
532 };
533
534 } // namespace lexer
535
536 ///////////////////////////////////////////////////////////////////////////////
537 //
538 // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
539 // should be defined inline, if the lex_functor shouldn't be instantiated
540 // separately from the lex_iterator.
541 //
542 // Separate (explicit) instantiation helps to reduce compilation time.
543 //
544 ///////////////////////////////////////////////////////////////////////////////
545
546 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
547 #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE
548 #else
549 #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline
550 #endif
551
552 ///////////////////////////////////////////////////////////////////////////////
553 //
554 // The 'new_lexer' function allows the opaque generation of a new lexer object.
555 // It is coupled to the iterator type to allow to decouple the lexer/iterator
556 // configurations at compile time.
557 //
558 // This function is declared inside the xlex_interface.hpp file, which is
559 // referenced by the source file calling the lexer and the source file, which
560 // instantiates the lex_functor. But it is defined here, so it will be
561 // instantiated only while compiling the source file, which instantiates the
562 // lex_functor. While the xlex_interface.hpp file may be included everywhere,
563 // this file (xlex_lexer.hpp) should be included only once. This allows
564 // to decouple the lexer interface from the lexer implementation and reduces
565 // compilation time.
566 //
567 ///////////////////////////////////////////////////////////////////////////////
568
569 template <typename Iterator, typename Position>
570 BOOST_WAVE_XLEX_NEW_LEXER_INLINE
571 lex_input_interface<boost::wave::cpplexer::lex_token<Position> > *
572 new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
573 Iterator const &last, Position const &pos,
574 wave::language_support language)
575 {
576 return new lexer::xlex_functor<Iterator, Position>(
577 first, last, pos, language);
578 }
579
580 #undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE
581
582 ///////////////////////////////////////////////////////////////////////////////
583 } // namespace xlex
584 } // namespace cpplexer
585 } // namespace wave
586 } // namespace boost
587
588 #endif // !defined(XLEX_LEXER_HPP)