]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / boost / boost / wave / cpplexer / re2clex / cpp_re2c_lexer.hpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Re2C based C++ lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11=============================================================================*/
12
13#if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
14#define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
15
16#include <string>
17#include <cstdio>
18#include <cstdarg>
19#if defined(BOOST_SPIRIT_DEBUG)
20#include <iostream>
21#endif // defined(BOOST_SPIRIT_DEBUG)
22
23#include <boost/concept_check.hpp>
24#include <boost/assert.hpp>
25#include <boost/spirit/include/classic_core.hpp>
26
27#include <boost/wave/wave_config.hpp>
28#include <boost/wave/language_support.hpp>
29#include <boost/wave/token_ids.hpp>
30#include <boost/wave/util/file_position.hpp>
31#include <boost/wave/cpplexer/validate_universal_char.hpp>
32#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
33#include <boost/wave/cpplexer/token_cache.hpp>
34#include <boost/wave/cpplexer/convert_trigraphs.hpp>
35
36#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
37#include <boost/wave/cpplexer/re2clex/scanner.hpp>
38#include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
39#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
40#include <boost/wave/cpplexer/detect_include_guards.hpp>
41#endif
42
43#include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>
44
45// this must occur after all of the includes and before any code appears
46#ifdef BOOST_HAS_ABI_HEADERS
47#include BOOST_ABI_PREFIX
48#endif
49
50///////////////////////////////////////////////////////////////////////////////
51namespace boost {
52namespace wave {
53namespace cpplexer {
54namespace re2clex {
55
56///////////////////////////////////////////////////////////////////////////////
57//
58// encapsulation of the re2c based cpp lexer
59//
60///////////////////////////////////////////////////////////////////////////////
61
62template <typename IteratorT,
63 typename PositionT = boost::wave::util::file_position_type,
64 typename TokenT = lex_token<PositionT> >
65class lexer
66{
67public:
68 typedef TokenT token_type;
69 typedef typename token_type::string_type string_type;
70
71 lexer(IteratorT const &first, IteratorT const &last,
72 PositionT const &pos, boost::wave::language_support language_);
73 ~lexer();
74
75 token_type& get(token_type&);
76 void set_position(PositionT const &pos)
77 {
78 // set position has to change the file name and line number only
79 filename = pos.get_file();
80 scanner.line = pos.get_line();
81// scanner.column = scanner.curr_column = pos.get_column();
82 scanner.file_name = filename.c_str();
83 }
84#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
85 bool has_include_guards(std::string& guard_name) const
86 {
87 return guards.detected(guard_name);
88 }
89#endif
90
91// error reporting from the re2c generated lexer
11fdf7f2 92 static int report_error(Scanner<IteratorT> const* s, int code, char const *, ...);
7c673cae
FG
93
94private:
95 static char const *tok_names[];
96
11fdf7f2 97 Scanner<IteratorT> scanner;
7c673cae
FG
98 string_type filename;
99 string_type value;
100 bool at_eof;
101 boost::wave::language_support language;
102#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
103 include_guards<token_type> guards;
104#endif
105
106#if BOOST_WAVE_SUPPORT_THREADING == 0
107 static token_cache<string_type> const cache;
108#else
109 token_cache<string_type> const cache;
110#endif
111};
112
113///////////////////////////////////////////////////////////////////////////////
114// initialize cpp lexer
115template <typename IteratorT, typename PositionT, typename TokenT>
116inline
117lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first,
118 IteratorT const &last, PositionT const &pos,
119 boost::wave::language_support language_)
11fdf7f2
TL
120 : scanner(first, last),
121 filename(pos.get_file()), at_eof(false), language(language_)
7c673cae
FG
122#if BOOST_WAVE_SUPPORT_THREADING != 0
123 , cache()
124#endif
125{
126 using namespace std; // some systems have memset in std
7c673cae
FG
127 scanner.line = pos.get_line();
128 scanner.column = scanner.curr_column = pos.get_column();
129 scanner.error_proc = report_error;
130 scanner.file_name = filename.c_str();
131
132#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
133 scanner.enable_ms_extensions = true;
134#else
135 scanner.enable_ms_extensions = false;
136#endif
137
138#if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
139 scanner.act_in_c99_mode = boost::wave::need_c99(language_);
140#endif
141
142#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
143 scanner.enable_import_keyword = !boost::wave::need_c99(language_);
144#else
145 scanner.enable_import_keyword = false;
146#endif
147
148 scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_);
149 scanner.single_line_only = boost::wave::need_single_line(language_);
150
151#if BOOST_WAVE_SUPPORT_CPP0X != 0
152 scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_);
153#else
154 scanner.act_in_cpp0x_mode = false;
155#endif
156}
157
158template <typename IteratorT, typename PositionT, typename TokenT>
159inline
160lexer<IteratorT, PositionT, TokenT>::~lexer()
161{
162 using namespace std; // some systems have free in std
7c673cae
FG
163 free(scanner.bot);
164}
165
166///////////////////////////////////////////////////////////////////////////////
167// get the next token from the input stream
168template <typename IteratorT, typename PositionT, typename TokenT>
169inline TokenT&
170lexer<IteratorT, PositionT, TokenT>::get(TokenT& result)
171{
172 if (at_eof)
173 return result = token_type(); // return T_EOI
174
175 std::size_t actline = scanner.line;
176 token_id id = token_id(scan(&scanner));
177
b32b8144 178 switch (id) {
7c673cae
FG
179 case T_IDENTIFIER:
180 // test identifier characters for validity (throws if invalid chars found)
181 value = string_type((char const *)scanner.tok,
182 scanner.cur-scanner.tok);
183 if (!boost::wave::need_no_character_validation(language))
184 impl::validate_identifier_name(value, actline, scanner.column, filename);
185 break;
186
187 case T_STRINGLIT:
188 case T_CHARLIT:
189 case T_RAWSTRINGLIT:
190 // test literal characters for validity (throws if invalid chars found)
191 value = string_type((char const *)scanner.tok,
192 scanner.cur-scanner.tok);
193 if (boost::wave::need_convert_trigraphs(language))
194 value = impl::convert_trigraphs(value);
195 if (!boost::wave::need_no_character_validation(language))
196 impl::validate_literal(value, actline, scanner.column, filename);
197 break;
198
7c673cae
FG
199 case T_PP_HHEADER:
200 case T_PP_QHEADER:
201 case T_PP_INCLUDE:
202 // convert to the corresponding ..._next token, if appropriate
203 {
204 value = string_type((char const *)scanner.tok,
205 scanner.cur-scanner.tok);
206
f67539c2 207#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
7c673cae
FG
208 // Skip '#' and whitespace and see whether we find an 'include_next' here.
209 typename string_type::size_type start = value.find("include");
210 if (value.compare(start, 12, "include_next", 12) == 0)
211 id = token_id(id | AltTokenType);
f67539c2 212#endif
7c673cae
FG
213 break;
214 }
7c673cae
FG
215
216 case T_LONGINTLIT: // supported in C++11, C99 and long_long mode
217 value = string_type((char const *)scanner.tok,
218 scanner.cur-scanner.tok);
219 if (!boost::wave::need_long_long(language)) {
220 // syntax error: not allowed in C++ mode
221 BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal,
222 value.c_str(), actline, scanner.column, filename.c_str());
223 }
224 break;
225
226 case T_OCTALINT:
227 case T_DECIMALINT:
228 case T_HEXAINT:
229 case T_INTLIT:
230 case T_FLOATLIT:
231 case T_FIXEDPOINTLIT:
232 case T_CCOMMENT:
233 case T_CPPCOMMENT:
234 case T_SPACE:
235 case T_SPACE2:
236 case T_ANY:
237 case T_PP_NUMBER:
238 value = string_type((char const *)scanner.tok,
239 scanner.cur-scanner.tok);
240 break;
241
242 case T_EOF:
243 // T_EOF is returned as a valid token, the next call will return T_EOI,
244 // i.e. the actual end of input
245 at_eof = true;
246 value.clear();
247 break;
248
249 case T_OR_TRIGRAPH:
250 case T_XOR_TRIGRAPH:
251 case T_LEFTBRACE_TRIGRAPH:
252 case T_RIGHTBRACE_TRIGRAPH:
253 case T_LEFTBRACKET_TRIGRAPH:
254 case T_RIGHTBRACKET_TRIGRAPH:
255 case T_COMPL_TRIGRAPH:
256 case T_POUND_TRIGRAPH:
257 if (boost::wave::need_convert_trigraphs(language)) {
258 value = cache.get_token_value(BASEID_FROM_TOKEN(id));
259 }
260 else {
261 value = string_type((char const *)scanner.tok,
262 scanner.cur-scanner.tok);
263 }
264 break;
265
266 case T_ANY_TRIGRAPH:
267 if (boost::wave::need_convert_trigraphs(language)) {
268 value = impl::convert_trigraph(
269 string_type((char const *)scanner.tok));
270 }
271 else {
272 value = string_type((char const *)scanner.tok,
273 scanner.cur-scanner.tok);
274 }
275 break;
276
277 default:
278 if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) ||
279 IS_CATEGORY(id, UnknownTokenType))
280 {
281 value = string_type((char const *)scanner.tok,
282 scanner.cur-scanner.tok);
283 }
284 else {
285 value = cache.get_token_value(id);
286 }
287 break;
288 }
289
290// std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl;
291
292 // the re2c lexer reports the new line number for newline tokens
293 result = token_type(id, value, PositionT(filename, actline, scanner.column));
294
295#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
296 return guards.detect_guard(result);
297#else
298 return result;
299#endif
300}
301
302template <typename IteratorT, typename PositionT, typename TokenT>
303inline int
11fdf7f2 304lexer<IteratorT, PositionT, TokenT>::report_error(Scanner<IteratorT> const *s, int errcode,
7c673cae
FG
305 char const *msg, ...)
306{
307 BOOST_ASSERT(0 != s);
308 BOOST_ASSERT(0 != msg);
309
310 using namespace std; // some system have vsprintf in namespace std
311
312 char buffer[200]; // should be large enough
313 va_list params;
314 va_start(params, msg);
315 vsprintf(buffer, msg, params);
316 va_end(params);
317
318 BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line,
319 s->column, s->file_name);
320// BOOST_UNREACHABLE_RETURN(0);
321 return 0;
322}
323
324///////////////////////////////////////////////////////////////////////////////
325//
326// lex_functor
327//
328///////////////////////////////////////////////////////////////////////////////
329
330template <typename IteratorT,
331 typename PositionT = boost::wave::util::file_position_type,
332 typename TokenT = typename lexer<IteratorT, PositionT>::token_type>
333class lex_functor
334: public lex_input_interface_generator<TokenT>
335{
336public:
337 typedef TokenT token_type;
338
339 lex_functor(IteratorT const &first, IteratorT const &last,
340 PositionT const &pos, boost::wave::language_support language)
341 : re2c_lexer(first, last, pos, language)
342 {}
343 virtual ~lex_functor() {}
344
345// get the next token from the input stream
346 token_type& get(token_type& result) { return re2c_lexer.get(result); }
347 void set_position(PositionT const &pos) { re2c_lexer.set_position(pos); }
348#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
349 bool has_include_guards(std::string& guard_name) const
350 { return re2c_lexer.has_include_guards(guard_name); }
351#endif
352
353private:
354 lexer<IteratorT, PositionT, TokenT> re2c_lexer;
355};
356
357#if BOOST_WAVE_SUPPORT_THREADING == 0
358///////////////////////////////////////////////////////////////////////////////
359template <typename IteratorT, typename PositionT, typename TokenT>
360token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const
361 lexer<IteratorT, PositionT, TokenT>::cache =
362 token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>();
363#endif
364
365} // namespace re2clex
366
367///////////////////////////////////////////////////////////////////////////////
368//
369// The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)
370// should be defined inline, if the lex_functor shouldn't be instantiated
371// separately from the lex_iterator.
372//
373// Separate (explicit) instantiation helps to reduce compilation time.
374//
375///////////////////////////////////////////////////////////////////////////////
376
377#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
378#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
379#else
380#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
381#endif
382
383///////////////////////////////////////////////////////////////////////////////
384//
385// The 'new_lexer' function allows the opaque generation of a new lexer object.
386// It is coupled to the iterator type to allow to decouple the lexer/iterator
387// configurations at compile time.
388//
389// This function is declared inside the cpp_lex_token.hpp file, which is
390// referenced by the source file calling the lexer and the source file, which
391// instantiates the lex_functor. But it is defined here, so it will be
392// instantiated only while compiling the source file, which instantiates the
393// lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
394// this file (cpp_re2c_lexer.hpp) should be included only once. This allows
395// to decouple the lexer interface from the lexer implementation and reduces
396// compilation time.
397//
398///////////////////////////////////////////////////////////////////////////////
399
400template <typename IteratorT, typename PositionT, typename TokenT>
401BOOST_WAVE_RE2C_NEW_LEXER_INLINE
402lex_input_interface<TokenT> *
403new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first,
404 IteratorT const &last, PositionT const &pos,
405 boost::wave::language_support language)
406{
407 using re2clex::lex_functor;
408 return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language);
409}
410
411#undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
412
413///////////////////////////////////////////////////////////////////////////////
414} // namespace cpplexer
415} // namespace wave
416} // namespace boost
417
418// the suffix header occurs after all of the code
419#ifdef BOOST_HAS_ABI_HEADERS
420#include BOOST_ABI_SUFFIX
421#endif
422
423#endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)