]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * | |
3 | * Copyright (c) 2004 | |
4 | * John Maddock | |
5 | * | |
6 | * Use, modification and distribution are subject to the | |
7 | * Boost Software License, Version 1.0. (See accompanying file | |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | * | |
10 | */ | |
11 | ||
12 | /* | |
13 | * LOCATION: see http://www.boost.org for most recent version. | |
14 | * FILE regex_traits_defaults.hpp | |
15 | * VERSION see <boost/version.hpp> | |
16 | * DESCRIPTION: Declares API's for access to regex_traits default properties. | |
17 | */ | |
18 | ||
19 | #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED | |
20 | #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED | |
21 | ||
22 | #ifdef BOOST_MSVC | |
23 | #pragma warning(push) | |
24 | #pragma warning(disable: 4103) | |
25 | #endif | |
26 | #ifdef BOOST_HAS_ABI_HEADERS | |
27 | # include BOOST_ABI_PREFIX | |
28 | #endif | |
29 | #ifdef BOOST_MSVC | |
30 | #pragma warning(pop) | |
31 | #endif | |
32 | ||
33 | #include <boost/regex/config.hpp> | |
34 | ||
35 | #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP | |
36 | #include <boost/regex/v4/syntax_type.hpp> | |
37 | #endif | |
38 | #ifndef BOOST_REGEX_ERROR_TYPE_HPP | |
39 | #include <boost/regex/v4/error_type.hpp> | |
40 | #endif | |
41 | #include <boost/type_traits/make_unsigned.hpp> | |
92f5a8d4 | 42 | #include <boost/utility/enable_if.hpp> |
7c673cae FG |
43 | |
44 | #ifdef BOOST_NO_STDC_NAMESPACE | |
45 | namespace std{ | |
46 | using ::strlen; | |
47 | } | |
48 | #endif | |
49 | ||
50 | namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ | |
51 | ||
52 | ||
53 | // | |
54 | // helpers to suppress warnings: | |
55 | // | |
56 | template <class charT> | |
57 | inline bool is_extended(charT c) | |
58 | { | |
59 | typedef typename make_unsigned<charT>::type unsigned_type; | |
60 | return (sizeof(charT) > 1) && (static_cast<unsigned_type>(c) >= 256u); | |
61 | } | |
62 | inline bool is_extended(char) | |
63 | { return false; } | |
64 | ||
65 | ||
66 | BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); | |
67 | BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); | |
68 | BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); | |
69 | BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); | |
70 | ||
71 | // is charT c a combining character? | |
72 | BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); | |
73 | ||
74 | template <class charT> | |
75 | inline bool is_combining(charT c) | |
76 | { | |
77 | return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c))); | |
78 | } | |
79 | template <> | |
80 | inline bool is_combining<char>(char) | |
81 | { | |
82 | return false; | |
83 | } | |
84 | template <> | |
85 | inline bool is_combining<signed char>(signed char) | |
86 | { | |
87 | return false; | |
88 | } | |
89 | template <> | |
90 | inline bool is_combining<unsigned char>(unsigned char) | |
91 | { | |
92 | return false; | |
93 | } | |
94 | #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives | |
95 | #ifdef _MSC_VER | |
96 | template<> | |
97 | inline bool is_combining<wchar_t>(wchar_t c) | |
98 | { | |
99 | return is_combining_implementation(static_cast<unsigned short>(c)); | |
100 | } | |
101 | #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) | |
102 | #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) | |
103 | template<> | |
104 | inline bool is_combining<wchar_t>(wchar_t c) | |
105 | { | |
106 | return is_combining_implementation(static_cast<unsigned short>(c)); | |
107 | } | |
108 | #else | |
109 | template<> | |
110 | inline bool is_combining<wchar_t>(wchar_t c) | |
111 | { | |
112 | return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c)); | |
113 | } | |
114 | #endif | |
115 | #endif | |
116 | #endif | |
117 | ||
118 | // | |
119 | // is a charT c a line separator? | |
120 | // | |
121 | template <class charT> | |
122 | inline bool is_separator(charT c) | |
123 | { | |
124 | return BOOST_REGEX_MAKE_BOOL( | |
125 | (c == static_cast<charT>('\n')) | |
126 | || (c == static_cast<charT>('\r')) | |
127 | || (c == static_cast<charT>('\f')) | |
128 | || (static_cast<boost::uint16_t>(c) == 0x2028u) | |
129 | || (static_cast<boost::uint16_t>(c) == 0x2029u) | |
130 | || (static_cast<boost::uint16_t>(c) == 0x85u)); | |
131 | } | |
132 | template <> | |
133 | inline bool is_separator<char>(char c) | |
134 | { | |
135 | return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); | |
136 | } | |
137 | ||
138 | // | |
139 | // get a default collating element: | |
140 | // | |
141 | BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); | |
142 | ||
143 | // | |
20effc67 | 144 | // get the state_id of a character classification, the individual |
7c673cae FG |
145 | // traits classes then transform that state_id into a bitmask: |
146 | // | |
147 | template <class charT> | |
148 | struct character_pointer_range | |
149 | { | |
150 | const charT* p1; | |
151 | const charT* p2; | |
152 | ||
153 | bool operator < (const character_pointer_range& r)const | |
154 | { | |
155 | return std::lexicographical_compare(p1, p2, r.p1, r.p2); | |
156 | } | |
157 | bool operator == (const character_pointer_range& r)const | |
158 | { | |
159 | // Not only do we check that the ranges are of equal size before | |
160 | // calling std::equal, but there is no other algorithm available: | |
161 | // not even a non-standard MS one. So forward to unchecked_equal | |
162 | // in the MS case. | |
163 | return ((p2 - p1) == (r.p2 - r.p1)) && BOOST_REGEX_DETAIL_NS::equal(p1, p2, r.p1); | |
164 | } | |
165 | }; | |
166 | template <class charT> | |
167 | int get_default_class_id(const charT* p1, const charT* p2) | |
168 | { | |
169 | static const charT data[73] = { | |
170 | 'a', 'l', 'n', 'u', 'm', | |
171 | 'a', 'l', 'p', 'h', 'a', | |
172 | 'b', 'l', 'a', 'n', 'k', | |
173 | 'c', 'n', 't', 'r', 'l', | |
174 | 'd', 'i', 'g', 'i', 't', | |
175 | 'g', 'r', 'a', 'p', 'h', | |
176 | 'l', 'o', 'w', 'e', 'r', | |
177 | 'p', 'r', 'i', 'n', 't', | |
178 | 'p', 'u', 'n', 'c', 't', | |
179 | 's', 'p', 'a', 'c', 'e', | |
180 | 'u', 'n', 'i', 'c', 'o', 'd', 'e', | |
181 | 'u', 'p', 'p', 'e', 'r', | |
182 | 'v', | |
183 | 'w', 'o', 'r', 'd', | |
184 | 'x', 'd', 'i', 'g', 'i', 't', | |
185 | }; | |
186 | ||
187 | static const character_pointer_range<charT> ranges[21] = | |
188 | { | |
189 | {data+0, data+5,}, // alnum | |
190 | {data+5, data+10,}, // alpha | |
191 | {data+10, data+15,}, // blank | |
192 | {data+15, data+20,}, // cntrl | |
193 | {data+20, data+21,}, // d | |
194 | {data+20, data+25,}, // digit | |
195 | {data+25, data+30,}, // graph | |
196 | {data+29, data+30,}, // h | |
197 | {data+30, data+31,}, // l | |
198 | {data+30, data+35,}, // lower | |
199 | {data+35, data+40,}, // print | |
200 | {data+40, data+45,}, // punct | |
201 | {data+45, data+46,}, // s | |
202 | {data+45, data+50,}, // space | |
203 | {data+57, data+58,}, // u | |
204 | {data+50, data+57,}, // unicode | |
205 | {data+57, data+62,}, // upper | |
206 | {data+62, data+63,}, // v | |
207 | {data+63, data+64,}, // w | |
208 | {data+63, data+67,}, // word | |
209 | {data+67, data+73,}, // xdigit | |
210 | }; | |
20effc67 TL |
211 | const character_pointer_range<charT>* ranges_begin = ranges; |
212 | const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); | |
7c673cae FG |
213 | |
214 | character_pointer_range<charT> t = { p1, p2, }; | |
215 | const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t); | |
216 | if((p != ranges_end) && (t == *p)) | |
217 | return static_cast<int>(p - ranges); | |
218 | return -1; | |
219 | } | |
220 | ||
221 | // | |
222 | // helper functions: | |
223 | // | |
224 | template <class charT> | |
225 | std::ptrdiff_t global_length(const charT* p) | |
226 | { | |
227 | std::ptrdiff_t n = 0; | |
228 | while(*p) | |
229 | { | |
230 | ++p; | |
231 | ++n; | |
232 | } | |
233 | return n; | |
234 | } | |
235 | template<> | |
236 | inline std::ptrdiff_t global_length<char>(const char* p) | |
237 | { | |
238 | return (std::strlen)(p); | |
239 | } | |
240 | #ifndef BOOST_NO_WREGEX | |
241 | template<> | |
242 | inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p) | |
243 | { | |
92f5a8d4 | 244 | return (std::ptrdiff_t)(std::wcslen)(p); |
7c673cae FG |
245 | } |
246 | #endif | |
247 | template <class charT> | |
248 | inline charT BOOST_REGEX_CALL global_lower(charT c) | |
249 | { | |
250 | return c; | |
251 | } | |
252 | template <class charT> | |
253 | inline charT BOOST_REGEX_CALL global_upper(charT c) | |
254 | { | |
255 | return c; | |
256 | } | |
257 | ||
258 | BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); | |
259 | BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); | |
260 | #ifndef BOOST_NO_WREGEX | |
261 | BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); | |
262 | BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); | |
263 | #endif | |
264 | #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T | |
265 | BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); | |
266 | BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); | |
267 | #endif | |
268 | // | |
269 | // This sucks: declare template specialisations of global_lower/global_upper | |
270 | // that just forward to the non-template implementation functions. We do | |
271 | // this because there is one compiler (Compaq Tru64 C++) that doesn't seem | |
272 | // to differentiate between templates and non-template overloads.... | |
273 | // what's more, the primary template, plus all overloads have to be | |
274 | // defined in the same translation unit (if one is inline they all must be) | |
275 | // otherwise the "local template instantiation" compiler option can pick | |
276 | // the wrong instantiation when linking: | |
277 | // | |
278 | template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); } | |
279 | template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); } | |
280 | #ifndef BOOST_NO_WREGEX | |
281 | template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); } | |
282 | template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); } | |
283 | #endif | |
284 | #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T | |
285 | template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); } | |
286 | template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); } | |
287 | #endif | |
288 | ||
289 | template <class charT> | |
290 | int global_value(charT c) | |
291 | { | |
292 | static const charT zero = '0'; | |
293 | static const charT nine = '9'; | |
294 | static const charT a = 'a'; | |
295 | static const charT f = 'f'; | |
296 | static const charT A = 'A'; | |
297 | static const charT F = 'F'; | |
298 | ||
299 | if(c > f) return -1; | |
300 | if(c >= a) return 10 + (c - a); | |
301 | if(c > F) return -1; | |
302 | if(c >= A) return 10 + (c - A); | |
303 | if(c > nine) return -1; | |
304 | if(c >= zero) return c - zero; | |
305 | return -1; | |
306 | } | |
307 | template <class charT, class traits> | |
308 | boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) | |
309 | { | |
310 | (void)t; // warning suppression | |
b32b8144 | 311 | boost::intmax_t limit = (std::numeric_limits<boost::intmax_t>::max)() / radix; |
7c673cae FG |
312 | boost::intmax_t next_value = t.value(*p1, radix); |
313 | if((p1 == p2) || (next_value < 0) || (next_value >= radix)) | |
314 | return -1; | |
315 | boost::intmax_t result = 0; | |
316 | while(p1 != p2) | |
317 | { | |
318 | next_value = t.value(*p1, radix); | |
319 | if((next_value < 0) || (next_value >= radix)) | |
320 | break; | |
321 | result *= radix; | |
322 | result += next_value; | |
323 | ++p1; | |
b32b8144 FG |
324 | if (result > limit) |
325 | return -1; | |
7c673cae FG |
326 | } |
327 | return result; | |
328 | } | |
329 | ||
330 | template <class charT> | |
92f5a8d4 | 331 | inline typename boost::enable_if_c<(sizeof(charT) > 1), const charT*>::type get_escape_R_string() |
7c673cae FG |
332 | { |
333 | #ifdef BOOST_MSVC | |
334 | # pragma warning(push) | |
335 | # pragma warning(disable:4309 4245) | |
336 | #endif | |
92f5a8d4 TL |
337 | static const charT e1[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', |
338 | '|', '[', '\x0A', '\x0B', '\x0C', static_cast<charT>(0x85), static_cast<charT>(0x2028), | |
339 | static_cast<charT>(0x2029), ']', ')', ')', '\0' }; | |
340 | static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', | |
341 | '|', '[', '\x0A', '\x0B', '\x0C', static_cast<charT>(0x85), ']', ')', ')', '\0' }; | |
7c673cae FG |
342 | |
343 | charT c = static_cast<charT>(0x2029u); | |
344 | bool b = (static_cast<unsigned>(c) == 0x2029u); | |
345 | ||
346 | return (b ? e1 : e2); | |
347 | #ifdef BOOST_MSVC | |
348 | # pragma warning(pop) | |
349 | #endif | |
350 | } | |
351 | ||
92f5a8d4 TL |
352 | template <class charT> |
353 | inline typename boost::disable_if_c<(sizeof(charT) > 1), const charT*>::type get_escape_R_string() | |
7c673cae FG |
354 | { |
355 | #ifdef BOOST_MSVC | |
356 | # pragma warning(push) | |
357 | # pragma warning(disable:4309) | |
358 | #endif | |
92f5a8d4 TL |
359 | static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?', |
360 | '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', ')', '\0' }; | |
7c673cae FG |
361 | return e2; |
362 | #ifdef BOOST_MSVC | |
363 | # pragma warning(pop) | |
364 | #endif | |
365 | } | |
366 | ||
367 | } // BOOST_REGEX_DETAIL_NS | |
368 | } // boost | |
369 | ||
370 | #ifdef BOOST_MSVC | |
371 | #pragma warning(push) | |
372 | #pragma warning(disable: 4103) | |
373 | #endif | |
374 | #ifdef BOOST_HAS_ABI_HEADERS | |
375 | # include BOOST_ABI_SUFFIX | |
376 | #endif | |
377 | #ifdef BOOST_MSVC | |
378 | #pragma warning(pop) | |
379 | #endif | |
380 | ||
381 | #endif |