]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/xpressive/include/boost/xpressive/detail/dynamic/parse_charset.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / xpressive / include / boost / xpressive / detail / dynamic / parse_charset.hpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // parse_charset.hpp
3 //
4 // Copyright 2008 Eric Niebler. Distributed under the Boost
5 // Software License, Version 1.0. (See accompanying file
6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
9 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
10
11 // MS compatible compilers support #pragma once
12 #if defined(_MSC_VER)
13 # pragma once
14 #endif
15
16 #include <boost/config.hpp>
17 #include <boost/integer.hpp>
18 #include <boost/mpl/bool.hpp>
19 #include <boost/throw_exception.hpp>
20 #include <boost/numeric/conversion/converter.hpp>
21 #include <boost/xpressive/detail/detail_fwd.hpp>
22 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
23 #include <boost/xpressive/detail/utility/literals.hpp>
24 #include <boost/xpressive/detail/utility/chset/chset.hpp>
25 #include <boost/xpressive/regex_constants.hpp>
26
27 namespace boost { namespace xpressive { namespace detail
28 {
29
30 enum escape_type
31 {
32 escape_char
33 , escape_mark
34 , escape_class
35 };
36
37 ///////////////////////////////////////////////////////////////////////////////
38 // escape_value
39 //
40 template<typename Char, typename Class>
41 struct escape_value
42 {
43 Char ch_;
44 int mark_nbr_;
45 Class class_;
46 escape_type type_;
47 };
48
49 ///////////////////////////////////////////////////////////////////////////////
50 // char_overflow_handler
51 //
52 struct char_overflow_handler
53 {
54 void operator ()(numeric::range_check_result result) const // throw(regex_error)
55 {
56 if(numeric::cInRange != result)
57 {
58 BOOST_THROW_EXCEPTION(
59 regex_error(
60 regex_constants::error_escape
61 , "character escape too large to fit in target character type"
62 )
63 );
64 }
65 }
66 };
67
68 ///////////////////////////////////////////////////////////////////////////////
69 // parse_escape
70 //
71 template<typename FwdIter, typename CompilerTraits>
72 escape_value<typename iterator_value<FwdIter>::type, typename CompilerTraits::regex_traits::char_class_type>
73 parse_escape(FwdIter &begin, FwdIter end, CompilerTraits &tr)
74 {
75 using namespace regex_constants;
76 typedef typename iterator_value<FwdIter>::type char_type;
77 typedef typename CompilerTraits::regex_traits regex_traits;
78 typedef typename regex_traits::char_class_type char_class_type;
79
80 // define an unsigned type the same size as char_type
81 typedef typename boost::uint_t<CHAR_BIT * sizeof(char_type)>::least uchar_t;
82 BOOST_MPL_ASSERT_RELATION(sizeof(uchar_t), ==, sizeof(char_type));
83 typedef numeric::conversion_traits<uchar_t, int> converstion_traits;
84
85 BOOST_XPR_ENSURE_(begin != end, error_escape, "unexpected end of pattern found");
86 numeric::converter<int, uchar_t, converstion_traits, char_overflow_handler> converter;
87 escape_value<char_type,char_class_type> esc = { 0, 0, 0, escape_char };
88 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
89 regex_traits const &rxtraits = tr.traits();
90 FwdIter tmp;
91
92 esc.class_ = rxtraits.lookup_classname(begin, begin + 1, icase);
93 if(0 != esc.class_)
94 {
95 esc.type_ = escape_class;
96 return esc;
97 }
98
99 if(-1 != rxtraits.value(*begin, 8))
100 {
101 esc.ch_ = converter(toi(begin, end, rxtraits, 8, 0777));
102 return esc;
103 }
104
105 switch(*begin)
106 {
107 // bell character
108 case BOOST_XPR_CHAR_(char_type, 'a'):
109 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\a');
110 ++begin;
111 break;
112 // escape character
113 case BOOST_XPR_CHAR_(char_type, 'e'):
114 esc.ch_ = converter(27);
115 ++begin;
116 break;
117 // control character
118 case BOOST_XPR_CHAR_(char_type, 'c'):
119 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
120 BOOST_XPR_ENSURE_
121 (
122 rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'a'), BOOST_XPR_CHAR_(char_type, 'z'), *begin)
123 || rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'A'), BOOST_XPR_CHAR_(char_type, 'Z'), *begin)
124 , error_escape
125 , "invalid escape control letter; must be one of a-z or A-Z"
126 );
127 // Convert to character according to ECMA-262, section 15.10.2.10:
128 esc.ch_ = converter(*begin % 32);
129 ++begin;
130 break;
131 // formfeed character
132 case BOOST_XPR_CHAR_(char_type, 'f'):
133 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\f');
134 ++begin;
135 break;
136 // newline
137 case BOOST_XPR_CHAR_(char_type, 'n'):
138 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\n');
139 ++begin;
140 break;
141 // return
142 case BOOST_XPR_CHAR_(char_type, 'r'):
143 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\r');
144 ++begin;
145 break;
146 // horizontal tab
147 case BOOST_XPR_CHAR_(char_type, 't'):
148 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\t');
149 ++begin;
150 break;
151 // vertical tab
152 case BOOST_XPR_CHAR_(char_type, 'v'):
153 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\v');
154 ++begin;
155 break;
156 // hex escape sequence
157 case BOOST_XPR_CHAR_(char_type, 'x'):
158 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
159 tmp = begin;
160 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xff));
161 BOOST_XPR_ENSURE_(2 == std::distance(tmp, begin), error_escape, "invalid hex escape : "
162 "must be \\x HexDigit HexDigit");
163 break;
164 // Unicode escape sequence
165 case BOOST_XPR_CHAR_(char_type, 'u'):
166 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
167 tmp = begin;
168 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xffff));
169 BOOST_XPR_ENSURE_(4 == std::distance(tmp, begin), error_escape, "invalid Unicode escape : "
170 "must be \\u HexDigit HexDigit HexDigit HexDigit");
171 break;
172 // backslash
173 case BOOST_XPR_CHAR_(char_type, '\\'):
174 //esc.ch_ = BOOST_XPR_CHAR_(char_type, '\\');
175 //++begin;
176 //break;
177 // all other escaped characters represent themselves
178 default:
179 esc.ch_ = *begin;
180 ++begin;
181 break;
182 }
183
184 return esc;
185 }
186
187 //////////////////////////////////////////////////////////////////////////
188 // parse_charset
189 //
190 template<typename FwdIter, typename RegexTraits, typename CompilerTraits>
191 inline void parse_charset
192 (
193 FwdIter &begin
194 , FwdIter end
195 , compound_charset<RegexTraits> &chset
196 , CompilerTraits &tr
197 )
198 {
199 using namespace regex_constants;
200 typedef typename RegexTraits::char_type char_type;
201 typedef typename RegexTraits::char_class_type char_class_type;
202 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
203 RegexTraits const &rxtraits = tr.traits();
204 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
205 FwdIter iprev = FwdIter();
206 escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};
207 bool invert = false;
208
209 // check to see if we have an inverse charset
210 if(begin != end && token_charset_invert == tr.get_charset_token(iprev = begin, end))
211 {
212 begin = iprev;
213 invert = true;
214 }
215
216 // skip the end token if-and-only-if it is the first token in the charset
217 if(begin != end && token_charset_end == tr.get_charset_token(iprev = begin, end))
218 {
219 for(; begin != iprev; ++begin)
220 {
221 chset.set_char(*begin, rxtraits, icase);
222 }
223 }
224
225 compiler_token_type tok;
226 char_type ch_prev = char_type(), ch_next = char_type();
227 bool have_prev = false;
228
229 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
230
231 // remember the current position and grab the next token
232 iprev = begin;
233 tok = tr.get_charset_token(begin, end);
234 do
235 {
236 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
237
238 if(token_charset_hyphen == tok && have_prev)
239 {
240 // remember the current position
241 FwdIter iprev2 = begin;
242 have_prev = false;
243
244 // ch_prev is lower bound of a range
245 switch(tr.get_charset_token(begin, end))
246 {
247 case token_charset_hyphen:
248 case token_charset_invert:
249 begin = iprev2; // un-get these tokens and fall through
250 BOOST_FALLTHROUGH;
251 case token_literal:
252 ch_next = *begin++;
253 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
254 chset.set_range(ch_prev, ch_next, rxtraits, icase);
255 continue;
256 case token_charset_backspace:
257 ch_next = char_type(8); // backspace
258 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
259 chset.set_range(ch_prev, ch_next, rxtraits, icase);
260 continue;
261 case token_escape:
262 esc = parse_escape(begin, end, tr);
263 if(escape_char == esc.type_)
264 {
265 BOOST_XPR_ENSURE_(ch_prev <= esc.ch_, error_range, "invalid charset range");
266 chset.set_range(ch_prev, esc.ch_, rxtraits, icase);
267 continue;
268 }
269 BOOST_FALLTHROUGH;
270 case token_charset_end:
271 default: // not a range.
272 begin = iprev; // backup to hyphen token
273 chset.set_char(ch_prev, rxtraits, icase);
274 chset.set_char(*begin++, rxtraits, icase);
275 continue;
276 }
277 }
278
279 if(have_prev)
280 {
281 chset.set_char(ch_prev, rxtraits, icase);
282 have_prev = false;
283 }
284
285 switch(tok)
286 {
287 case token_charset_hyphen:
288 case token_charset_invert:
289 case token_charset_end:
290 case token_posix_charset_end:
291 begin = iprev; // un-get these tokens
292 ch_prev = *begin++;
293 have_prev = true;
294 continue;
295
296 case token_charset_backspace:
297 ch_prev = char_type(8); // backspace
298 have_prev = true;
299 continue;
300
301 case token_posix_charset_begin:
302 {
303 FwdIter tmp = begin, start = begin;
304 bool invert = (token_charset_invert == tr.get_charset_token(tmp, end));
305 if(invert)
306 {
307 begin = start = tmp;
308 }
309 while(token_literal == (tok = tr.get_charset_token(begin, end)))
310 {
311 tmp = ++begin;
312 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
313 }
314 if(token_posix_charset_end == tok)
315 {
316 char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);
317 BOOST_XPR_ENSURE_(0 != chclass, error_ctype, "unknown class name");
318 chset.set_class(chclass, invert);
319 continue;
320 }
321 begin = iprev; // un-get this token
322 ch_prev = *begin++;
323 have_prev = true;
324 }
325 continue;
326
327 case token_escape:
328 esc = parse_escape(begin, end, tr);
329 if(escape_char == esc.type_)
330 {
331 ch_prev = esc.ch_;
332 have_prev = true;
333 }
334 else if(escape_class == esc.type_)
335 {
336 char_class_type upper_ = lookup_classname(rxtraits, "upper");
337 BOOST_ASSERT(0 != upper_);
338 chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));
339 }
340 else
341 {
342 BOOST_ASSERT(false);
343 }
344 continue;
345
346 default:
347 ch_prev = *begin++;
348 have_prev = true;
349 continue;
350 }
351 }
352 while(BOOST_XPR_ENSURE_((iprev = begin) != end, error_brack, "unexpected end of pattern found"),
353 token_charset_end != (tok = tr.get_charset_token(begin, end)));
354
355 if(have_prev)
356 {
357 chset.set_char(ch_prev, rxtraits, icase);
358 }
359
360 if(invert)
361 {
362 chset.inverse();
363 }
364 }
365
366 }}} // namespace boost::xpressive::detail
367
368 #endif