6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE c_regex_traits.hpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
19 #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
20 #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
22 #ifndef BOOST_REGEX_CONFIG_HPP
23 #include <boost/regex/config.hpp>
25 #ifndef BOOST_REGEX_WORKAROUND_HPP
26 #include <boost/regex/v4/regex_workaround.hpp>
31 #ifdef BOOST_NO_STDC_NAMESPACE
33 using ::strlen; using ::tolower;
39 #pragma warning(disable: 4103 4244)
41 #ifdef BOOST_HAS_ABI_HEADERS
42 # include BOOST_ABI_PREFIX
50 namespace BOOST_REGEX_DETAIL_NS {
54 char_class_space = 1 << 0,
55 char_class_print = 1 << 1,
56 char_class_cntrl = 1 << 2,
57 char_class_upper = 1 << 3,
58 char_class_lower = 1 << 4,
59 char_class_alpha = 1 << 5,
60 char_class_digit = 1 << 6,
61 char_class_punct = 1 << 7,
62 char_class_xdigit = 1 << 8,
63 char_class_alnum = char_class_alpha | char_class_digit,
64 char_class_graph = char_class_alnum | char_class_punct,
65 char_class_blank = 1 << 9,
66 char_class_word = 1 << 10,
67 char_class_unicode = 1 << 11,
68 char_class_horizontal = 1 << 12,
69 char_class_vertical = 1 << 13
74 template <class charT>
75 struct c_regex_traits;
78 struct c_regex_traits<char>
81 typedef char char_type;
82 typedef std::size_t size_type;
83 typedef std::string string_type;
85 typedef boost::uint32_t char_class_type;
87 static size_type length(const char_type* p)
89 return (std::strlen)(p);
92 char translate(char c) const
96 char translate_nocase(char c) const
98 return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
101 static string_type BOOST_REGEX_CALL transform(const char* p1, const char* p2);
102 static string_type BOOST_REGEX_CALL transform_primary(const char* p1, const char* p2);
104 static char_class_type BOOST_REGEX_CALL lookup_classname(const char* p1, const char* p2);
105 static string_type BOOST_REGEX_CALL lookup_collatename(const char* p1, const char* p2);
107 static bool BOOST_REGEX_CALL isctype(char, char_class_type);
108 static int BOOST_REGEX_CALL value(char, int);
110 locale_type imbue(locale_type l)
112 locale_type getloc()const
113 { return locale_type(); }
116 // this type is not copyable:
117 c_regex_traits(const c_regex_traits&);
118 c_regex_traits& operator=(const c_regex_traits&);
121 #ifndef BOOST_NO_WREGEX
123 struct c_regex_traits<wchar_t>
126 typedef wchar_t char_type;
127 typedef std::size_t size_type;
128 typedef std::wstring string_type;
129 struct locale_type{};
130 typedef boost::uint32_t char_class_type;
132 static size_type length(const char_type* p)
134 return (std::wcslen)(p);
137 wchar_t translate(wchar_t c) const
141 wchar_t translate_nocase(wchar_t c) const
143 return (std::towlower)(c);
146 static string_type BOOST_REGEX_CALL transform(const wchar_t* p1, const wchar_t* p2);
147 static string_type BOOST_REGEX_CALL transform_primary(const wchar_t* p1, const wchar_t* p2);
149 static char_class_type BOOST_REGEX_CALL lookup_classname(const wchar_t* p1, const wchar_t* p2);
150 static string_type BOOST_REGEX_CALL lookup_collatename(const wchar_t* p1, const wchar_t* p2);
152 static bool BOOST_REGEX_CALL isctype(wchar_t, char_class_type);
153 static int BOOST_REGEX_CALL value(wchar_t, int);
155 locale_type imbue(locale_type l)
157 locale_type getloc()const
158 { return locale_type(); }
161 // this type is not copyable:
162 c_regex_traits(const c_regex_traits&);
163 c_regex_traits& operator=(const c_regex_traits&);
166 #endif // BOOST_NO_WREGEX
168 inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::transform(const char* p1, const char* p2)
170 std::string result(10, ' ');
171 std::size_t s = result.size();
173 std::string src(p1, p2);
174 while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
176 #if defined(_CPPLIB_VER)
178 // A bug in VC11 and 12 causes the program to hang if we pass a null-string
179 // to std::strxfrm, but only for certain locales :-(
180 // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
185 result.insert(result.begin(), static_cast<char>(0));
189 result.append(r - s + 3, ' ');
196 inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
199 static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
202 // What we do here depends upon the format of the sort key returned by
203 // sort key returned by this->transform:
205 switch (s_collate_type)
207 case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
208 case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
209 // the best we can do is translate to lower case, then get a regular sort key:
211 result.assign(p1, p2);
212 for (std::string::size_type i = 0; i < result.size(); ++i)
213 result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
214 result = transform(&*result.begin(), &*result.begin() + result.size());
217 case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
219 // get a regular sort key, and then truncate it:
220 result = transform(p1, p2);
221 result.erase(s_delim);
224 case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
225 // get a regular sort key, and then truncate everything after the delim:
226 result = transform(p1, p2);
227 if ((!result.empty()) && (result[0] == s_delim))
230 for (i = 0; i < result.size(); ++i)
232 if (result[i] == s_delim)
239 result = std::string(1, char(0));
243 inline c_regex_traits<char>::char_class_type BOOST_REGEX_CALL c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
245 using namespace BOOST_REGEX_DETAIL_NS;
246 static const char_class_type masks[] =
256 char_class_horizontal,
267 char_class_alnum | char_class_word,
268 char_class_alnum | char_class_word,
272 int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
275 std::string s(p1, p2);
276 for (std::string::size_type i = 0; i < s.size(); ++i)
277 s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
278 idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
280 BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
281 return masks[idx + 1];
284 inline bool BOOST_REGEX_CALL c_regex_traits<char>::isctype(char c, char_class_type mask)
286 using namespace BOOST_REGEX_DETAIL_NS;
288 ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
289 || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
290 || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
291 || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
292 || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
293 || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
294 || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
295 || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
296 || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
297 || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
298 || ((mask & char_class_word) && (c == '_'))
299 || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
300 || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
303 inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
305 std::string s(p1, p2);
306 s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
307 if (s.empty() && (p2 - p1 == 1))
312 inline int BOOST_REGEX_CALL c_regex_traits<char>::value(char c, int radix)
314 char b[2] = { c, '\0', };
316 int result = std::strtol(b, &ep, radix);
322 #ifndef BOOST_NO_WREGEX
324 inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
328 std::wstring src(p1, p2);
329 std::wstring result(s, L' ');
330 while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
332 #if defined(_CPPLIB_VER)
334 // A bug in VC11 and 12 causes the program to hang if we pass a null-string
335 // to std::strxfrm, but only for certain locales :-(
336 // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
341 result.insert(result.begin(), static_cast<wchar_t>(0));
345 result.append(r - s + 3, L' ');
352 inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
354 static wchar_t s_delim;
355 static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
358 // What we do here depends upon the format of the sort key returned by
359 // sort key returned by this->transform:
361 switch (s_collate_type)
363 case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
364 case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
365 // the best we can do is translate to lower case, then get a regular sort key:
367 result.assign(p1, p2);
368 for (std::wstring::size_type i = 0; i < result.size(); ++i)
369 result[i] = (std::towlower)(result[i]);
370 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
373 case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
375 // get a regular sort key, and then truncate it:
376 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
377 result.erase(s_delim);
380 case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
381 // get a regular sort key, and then truncate everything after the delim:
382 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
383 if ((!result.empty()) && (result[0] == s_delim))
386 for (i = 0; i < result.size(); ++i)
388 if (result[i] == s_delim)
395 result = std::wstring(1, char(0));
399 inline c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
401 using namespace BOOST_REGEX_DETAIL_NS;
402 static const char_class_type masks[] =
412 char_class_horizontal,
423 char_class_alnum | char_class_word,
424 char_class_alnum | char_class_word,
428 int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
431 std::wstring s(p1, p2);
432 for (std::wstring::size_type i = 0; i < s.size(); ++i)
433 s[i] = (std::towlower)(s[i]);
434 idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
436 BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
437 return masks[idx + 1];
440 inline bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
442 using namespace BOOST_REGEX_DETAIL_NS;
444 ((mask & char_class_space) && (std::iswspace)(c))
445 || ((mask & char_class_print) && (std::iswprint)(c))
446 || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
447 || ((mask & char_class_upper) && (std::iswupper)(c))
448 || ((mask & char_class_lower) && (std::iswlower)(c))
449 || ((mask & char_class_alpha) && (std::iswalpha)(c))
450 || ((mask & char_class_digit) && (std::iswdigit)(c))
451 || ((mask & char_class_punct) && (std::iswpunct)(c))
452 || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
453 || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
454 || ((mask & char_class_word) && (c == '_'))
455 || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
456 || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
457 || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
460 inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
463 #pragma warning(push)
464 #pragma warning(disable: 4244)
466 std::string name(p1, p2);
470 name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
472 return string_type(name.begin(), name.end());
474 return string_type(1, *p1);
475 return string_type();
478 inline int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix)
480 #ifdef BOOST_BORLANDC
481 // workaround for broken wcstol:
482 if ((std::iswxdigit)(c) == 0)
485 wchar_t b[2] = { c, '\0', };
487 int result = std::wcstol(b, &ep, radix);
498 #pragma warning(push)
499 #pragma warning(disable: 4103)
501 #ifdef BOOST_HAS_ABI_HEADERS
502 # include BOOST_ABI_SUFFIX