]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/spirit/include/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / spirit / include / boost / spirit / home / support / detail / lexer / parser / tokeniser / re_tokeniser_helper.hpp
CommitLineData
7c673cae
FG
1// tokeniser_helper.hpp
2// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
7#define BOOST_LEXER_RE_TOKENISER_HELPER_H
8
9#include "../../char_traits.hpp"
10// strlen()
11#include <cstring>
12#include "../../size_t.hpp"
13#include "re_tokeniser_state.hpp"
14
15namespace boost
16{
17namespace lexer
18{
19namespace detail
20{
21template<typename CharT, typename Traits = char_traits<CharT> >
22class basic_re_tokeniser_helper
23{
24public:
25 typedef basic_re_tokeniser_state<CharT> state;
26 typedef std::basic_string<CharT> string;
27
28 static const CharT *escape_sequence (state &state_, CharT &ch_,
29 std::size_t &str_len_)
30 {
31 bool eos_ = state_.eos ();
32
33 if (eos_)
34 {
35 throw runtime_error ("Unexpected end of regex "
36 "following '\\'.");
37 }
38
39 const CharT *str_ = charset_shortcut (*state_._curr, str_len_);
40
41 if (str_)
42 {
43 state_.increment ();
44 }
45 else
46 {
47 ch_ = chr (state_);
48 }
49
50 return str_;
51 }
52
53 // This function can call itself.
54 static void charset (state &state_, string &chars_, bool &negated_)
55 {
56 CharT ch_ = 0;
57 bool eos_ = state_.next (ch_);
58
59 if (eos_)
60 {
61 // Pointless returning index if at end of string
62 throw runtime_error ("Unexpected end of regex "
63 "following '['.");
64 }
65
66 negated_ = ch_ == '^';
67
68 if (negated_)
69 {
70 eos_ = state_.next (ch_);
71
72 if (eos_)
73 {
74 // Pointless returning index if at end of string
75 throw runtime_error ("Unexpected end of regex "
76 "following '^'.");
77 }
78 }
79
80 bool chset_ = false;
81 CharT prev_ = 0;
82
83 while (ch_ != ']')
84 {
85 if (ch_ == '\\')
86 {
87 std::size_t str_len_ = 0;
88 const CharT *str_ = escape_sequence (state_, prev_, str_len_);
89
90 chset_ = str_ != 0;
91
92 if (chset_)
93 {
94 state temp_state_ (str_ + 1, str_ + str_len_,
95 state_._flags, state_._locale);
96 string temp_chars_;
97 bool temp_negated_ = false;
98
99 charset (temp_state_, temp_chars_, temp_negated_);
100
101 if (negated_ != temp_negated_)
102 {
103 std::ostringstream ss_;
104
105 ss_ << "Mismatch in charset negation preceding "
106 "index " << state_.index () << '.';
107 throw runtime_error (ss_.str ().c_str ());
108 }
109
110 chars_ += temp_chars_;
111 }
112 }
113/*
114 else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
115 {
116 // TODO: POSIX charsets
117 }
118*/
119 else
120 {
121 chset_ = false;
122 prev_ = ch_;
123 }
124
125 eos_ = state_.next (ch_);
126
127 // Covers preceding if, else if and else
128 if (eos_)
129 {
130 // Pointless returning index if at end of string
131 throw runtime_error ("Unexpected end of regex "
132 "(missing ']').");
133 }
134
135 if (ch_ == '-')
136 {
137 charset_range (chset_, state_, eos_, ch_, prev_, chars_);
138 }
139 else if (!chset_)
140 {
141 if ((state_._flags & icase) &&
142 (std::isupper (prev_, state_._locale) ||
143 std::islower (prev_, state_._locale)))
144 {
145 CharT upper_ = std::toupper (prev_, state_._locale);
146 CharT lower_ = std::tolower (prev_, state_._locale);
147
148 chars_ += upper_;
149 chars_ += lower_;
150 }
151 else
152 {
153 chars_ += prev_;
154 }
155 }
156 }
157
158 if (!negated_ && chars_.empty ())
159 {
160 throw runtime_error ("Empty charsets not allowed.");
161 }
162 }
163
164 static CharT chr (state &state_)
165 {
166 CharT ch_ = 0;
167
168 // eos_ has already been checked for.
169 switch (*state_._curr)
170 {
171 case '0':
172 case '1':
173 case '2':
174 case '3':
175 case '4':
176 case '5':
177 case '6':
178 case '7':
179 ch_ = decode_octal (state_);
180 break;
181 case 'a':
182 ch_ = '\a';
183 state_.increment ();
184 break;
185 case 'b':
186 ch_ = '\b';
187 state_.increment ();
188 break;
189 case 'c':
190 ch_ = decode_control_char (state_);
191 break;
192 case 'e':
193 ch_ = 27; // '\e' not recognised by compiler
194 state_.increment ();
195 break;
196 case 'f':
197 ch_ = '\f';
198 state_.increment ();
199 break;
200 case 'n':
201 ch_ = '\n';
202 state_.increment ();
203 break;
204 case 'r':
205 ch_ = '\r';
206 state_.increment ();
207 break;
208 case 't':
209 ch_ = '\t';
210 state_.increment ();
211 break;
212 case 'v':
213 ch_ = '\v';
214 state_.increment ();
215 break;
216 case 'x':
217 ch_ = decode_hex (state_);
218 break;
219 default:
220 ch_ = *state_._curr;
221 state_.increment ();
222 break;
223 }
224
225 return ch_;
226 }
227
228private:
229 static const char *charset_shortcut (const char ch_,
230 std::size_t &str_len_)
231 {
232 const char *str_ = 0;
233
234 switch (ch_)
235 {
236 case 'd':
237 str_ = "[0-9]";
238 break;
239 case 'D':
240 str_ = "[^0-9]";
241 break;
242 case 's':
243 str_ = "[ \t\n\r\f\v]";
244 break;
245 case 'S':
246 str_ = "[^ \t\n\r\f\v]";
247 break;
248 case 'w':
249 str_ = "[_0-9A-Za-z]";
250 break;
251 case 'W':
252 str_ = "[^_0-9A-Za-z]";
253 break;
254 }
255
256 if (str_)
257 {
258 // Some systems have strlen in namespace std.
259 using namespace std;
260
261 str_len_ = strlen (str_);
262 }
263 else
264 {
265 str_len_ = 0;
266 }
267
268 return str_;
269 }
270
271 static const wchar_t *charset_shortcut (const wchar_t ch_,
272 std::size_t &str_len_)
273 {
274 const wchar_t *str_ = 0;
275
276 switch (ch_)
277 {
278 case 'd':
279 str_ = L"[0-9]";
280 break;
281 case 'D':
282 str_ = L"[^0-9]";
283 break;
284 case 's':
285 str_ = L"[ \t\n\r\f\v]";
286 break;
287 case 'S':
288 str_ = L"[^ \t\n\r\f\v]";
289 break;
290 case 'w':
291 str_ = L"[_0-9A-Za-z]";
292 break;
293 case 'W':
294 str_ = L"[^_0-9A-Za-z]";
295 break;
296 }
297
298 if (str_)
299 {
300 // Some systems have wcslen in namespace std.
301 using namespace std;
302
303 str_len_ = wcslen (str_);
304 }
305 else
306 {
307 str_len_ = 0;
308 }
309
310 return str_;
311 }
312
313 static CharT decode_octal (state &state_)
314 {
315 std::size_t accumulator_ = 0;
316 CharT ch_ = *state_._curr;
317 unsigned short count_ = 3;
318 bool eos_ = false;
319
320 for (;;)
321 {
322 accumulator_ *= 8;
323 accumulator_ += ch_ - '0';
324 --count_;
325 state_.increment ();
326 eos_ = state_.eos ();
327
328 if (!count_ || eos_) break;
329
330 ch_ = *state_._curr;
331
332 // Don't consume invalid chars!
333 if (ch_ < '0' || ch_ > '7')
334 {
335 break;
336 }
337 }
338
339 return static_cast<CharT> (accumulator_);
340 }
341
342 static CharT decode_control_char (state &state_)
343 {
344 // Skip over 'c'
345 state_.increment ();
346
347 CharT ch_ = 0;
348 bool eos_ = state_.next (ch_);
349
350 if (eos_)
351 {
352 // Pointless returning index if at end of string
353 throw runtime_error ("Unexpected end of regex following \\c.");
354 }
355 else
356 {
357 if (ch_ >= 'a' && ch_ <= 'z')
358 {
359 ch_ -= 'a' - 1;
360 }
361 else if (ch_ >= 'A' && ch_ <= 'Z')
362 {
363 ch_ -= 'A' - 1;
364 }
365 else if (ch_ == '@')
366 {
367 // Apparently...
368 ch_ = 0;
369 }
370 else
371 {
372 std::ostringstream ss_;
373
374 ss_ << "Invalid control char at index " <<
375 state_.index () - 1 << '.';
376 throw runtime_error (ss_.str ().c_str ());
377 }
378 }
379
380 return ch_;
381 }
382
383 static CharT decode_hex (state &state_)
384 {
385 // Skip over 'x'
386 state_.increment ();
387
388 CharT ch_ = 0;
389 bool eos_ = state_.next (ch_);
390
391 if (eos_)
392 {
393 // Pointless returning index if at end of string
394 throw runtime_error ("Unexpected end of regex following \\x.");
395 }
396
397 if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||
398 (ch_ >= 'A' && ch_ <= 'F')))
399 {
400 std::ostringstream ss_;
401
402 ss_ << "Illegal char following \\x at index " <<
403 state_.index () - 1 << '.';
404 throw runtime_error (ss_.str ().c_str ());
405 }
406
407 std::size_t hex_ = 0;
408
409 do
410 {
411 hex_ *= 16;
412
413 if (ch_ >= '0' && ch_ <= '9')
414 {
415 hex_ += ch_ - '0';
416 }
417 else if (ch_ >= 'a' && ch_ <= 'f')
418 {
419 hex_ += 10 + (ch_ - 'a');
420 }
421 else
422 {
423 hex_ += 10 + (ch_ - 'A');
424 }
425
426 eos_ = state_.eos ();
427
428 if (!eos_)
429 {
430 ch_ = *state_._curr;
431
432 // Don't consume invalid chars!
433 if (((ch_ >= '0' && ch_ <= '9') ||
434 (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))
435 {
436 state_.increment ();
437 }
438 else
439 {
440 eos_ = true;
441 }
442 }
443 } while (!eos_);
444
445 return static_cast<CharT> (hex_);
446 }
447
448 static void charset_range (const bool chset_, state &state_, bool &eos_,
449 CharT &ch_, const CharT prev_, string &chars_)
450 {
451 if (chset_)
452 {
453 std::ostringstream ss_;
454
455 ss_ << "Charset cannot form start of range preceding "
456 "index " << state_.index () - 1 << '.';
457 throw runtime_error (ss_.str ().c_str ());
458 }
459
460 eos_ = state_.next (ch_);
461
462 if (eos_)
463 {
464 // Pointless returning index if at end of string
465 throw runtime_error ("Unexpected end of regex "
466 "following '-'.");
467 }
468
469 CharT curr_ = 0;
470
471 if (ch_ == '\\')
472 {
473 std::size_t str_len_ = 0;
474
475 if (escape_sequence (state_, curr_, str_len_))
476 {
477 std::ostringstream ss_;
478
479 ss_ << "Charset cannot form end of range preceding index "
480 << state_.index () << '.';
481 throw runtime_error (ss_.str ().c_str ());
482 }
483 }
484/*
485 else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
486 {
487 std::ostringstream ss_;
488
489 ss_ << "POSIX char class cannot form end of range at "
490 "index " << state_.index () - 1 << '.';
491 throw runtime_error (ss_.str ().c_str ());
492 }
493*/
494 else
495 {
496 curr_ = ch_;
497 }
498
499 eos_ = state_.next (ch_);
500
501 // Covers preceding if and else
502 if (eos_)
503 {
504 // Pointless returning index if at end of string
505 throw runtime_error ("Unexpected end of regex "
506 "(missing ']').");
507 }
508
509 std::size_t start_ = static_cast<typename Traits::index_type> (prev_);
510 std::size_t end_ = static_cast<typename Traits::index_type> (curr_);
511
512 // Semanic check
513 if (end_ < start_)
514 {
515 std::ostringstream ss_;
516
517 ss_ << "Invalid range in charset preceding index " <<
518 state_.index () - 1 << '.';
519 throw runtime_error (ss_.str ().c_str ());
520 }
521
522 chars_.reserve (chars_.size () + (end_ + 1 - start_));
523
524 for (; start_ <= end_; ++start_)
525 {
526 CharT ch_ = static_cast<CharT> (start_);
527
528 if ((state_._flags & icase) &&
529 (std::isupper (ch_, state_._locale) ||
530 std::islower (ch_, state_._locale)))
531 {
532 CharT upper_ = std::toupper (ch_, state_._locale);
533 CharT lower_ = std::tolower (ch_, state_._locale);
534
535 chars_ += (upper_);
536 chars_ += (lower_);
537 }
538 else
539 {
540 chars_ += (ch_);
541 }
542 }
543 }
544};
545}
546}
547}
548
549#endif