]>
git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/regex/test/regress/test_unicode.cpp
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE test_unicode.hpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Unicode specific tests (requires ICU).
19 #include <boost/regex/config.hpp>
24 #pragma warning(disable:4127)
27 #ifndef BOOST_NO_STD_WSTRING
29 #define TEST_REGEX_SEARCH_U(s, f, t, m, a)\
31 const wchar_t e[] = { s };\
32 std::wstring se(e, (sizeof(e) / sizeof(wchar_t)) - 1);\
33 const wchar_t st[] = { t };\
34 std::wstring sst(st, (sizeof(st) / sizeof(wchar_t)) - 1);\
35 test_info<wchar_t>::set_info(__FILE__, __LINE__, se, f, sst, m, a);\
36 test_icu(wchar_t(0), test_regex_search_tag());\
39 #define TEST_REGEX_CLASS_U(classname, character)\
41 L"[[:" BOOST_JOIN(L, BOOST_STRINGIZE(classname)) L":]]",\
45 BOOST_JOIN(\x, character))), \
47 make_array(0, 1, -2, -2))
51 #define TEST_REGEX_SEARCH_U(s, f, t, m, a)
52 #define TEST_REGEX_CLASS_U(classname, character)
58 using namespace boost::regex_constants
;
60 TEST_REGEX_CLASS_U(L
*, 3108);
61 TEST_REGEX_CLASS_U(Letter
, 3108);
62 TEST_REGEX_CLASS_U(Lu
, 2145);
63 TEST_REGEX_CLASS_U(Uppercase Letter
, 2145);
64 TEST_REGEX_CLASS_U(Ll
, 2146);
65 TEST_REGEX_CLASS_U(Lowercase Letter
, 2146);
66 TEST_REGEX_CLASS_U(Lt
, 1FFC
);
67 TEST_REGEX_CLASS_U(Titlecase Letter
, 1FFC
);
68 TEST_REGEX_CLASS_U(Lm
, 1D61
);
69 TEST_REGEX_CLASS_U(Modifier Letter
, 1D61
);
70 TEST_REGEX_CLASS_U(Lo
, 1974);
71 TEST_REGEX_CLASS_U(Other Letter
, 1974);
72 TEST_REGEX_CLASS_U(M
*, 20EA
);
73 TEST_REGEX_CLASS_U(Mark
, 20EA
);
74 TEST_REGEX_CLASS_U(Mn
, 20EA
);
75 TEST_REGEX_CLASS_U(Non
-Spacing Mark
, 20EA
);
76 TEST_REGEX_CLASS_U(Mc
, 1938);
77 TEST_REGEX_CLASS_U(Spacing Combining Mark
, 1938);
78 TEST_REGEX_CLASS_U(Me
, 0488);
79 TEST_REGEX_CLASS_U(Enclosing Mark
, 0488);
80 TEST_REGEX_CLASS_U(N
*, 0669);
81 TEST_REGEX_CLASS_U(Number
, 0669);
82 TEST_REGEX_CLASS_U(Nd
, 0669);
83 TEST_REGEX_CLASS_U(Decimal Digit Number
, 0669);
84 TEST_REGEX_CLASS_U(Nl
, 303A
);
85 TEST_REGEX_CLASS_U(Letter Number
, 303A
);
86 TEST_REGEX_CLASS_U(No
, 2793);
87 TEST_REGEX_CLASS_U(Other Number
, 2793);
89 TEST_REGEX_CLASS_U(S
*, 2144);
90 TEST_REGEX_CLASS_U(Symbol
, 2144);
91 TEST_REGEX_CLASS_U(Sm
, 2144);
92 TEST_REGEX_CLASS_U(Math Symbol
, 2144);
93 TEST_REGEX_CLASS_U(Sc
, 20B1
);
94 TEST_REGEX_CLASS_U(Currency Symbol
, 20B1
);
95 TEST_REGEX_CLASS_U(Sk
, 1FFE
);
96 TEST_REGEX_CLASS_U(Modifier Symbol
, 1FFE
);
97 TEST_REGEX_CLASS_U(So
, 19FF
);
98 TEST_REGEX_CLASS_U(Other Symbol
, 19FF
);
100 TEST_REGEX_CLASS_U(P
*, 005F
);
101 TEST_REGEX_CLASS_U(Punctuation
, 005F
);
102 TEST_REGEX_CLASS_U(Pc
, 005F
);
103 TEST_REGEX_CLASS_U(Connector Punctuation
, 005F
);
104 TEST_REGEX_CLASS_U(Pd
, 002D
);
105 TEST_REGEX_CLASS_U(Dash Punctuation
, 002D
);
106 TEST_REGEX_CLASS_U(Ps
, 0028);
107 TEST_REGEX_CLASS_U(Open Punctuation
, 0028);
108 TEST_REGEX_CLASS_U(Pe
, FF63
);
109 TEST_REGEX_CLASS_U(Close Punctuation
, FF63
);
110 TEST_REGEX_CLASS_U(Pi
, 2039);
111 TEST_REGEX_CLASS_U(Initial Punctuation
, 2039);
112 TEST_REGEX_CLASS_U(Pf
, 203A
);
113 TEST_REGEX_CLASS_U(Final Punctuation
, 203A
);
114 TEST_REGEX_CLASS_U(Po
, 2038);
115 TEST_REGEX_CLASS_U(Other Punctuation
, 2038);
117 TEST_REGEX_CLASS_U(Z
*, 202F
);
118 TEST_REGEX_CLASS_U(Separator
, 202F
);
119 TEST_REGEX_CLASS_U(Zs
, 202F
);
120 TEST_REGEX_CLASS_U(Space Separator
, 202F
);
121 TEST_REGEX_CLASS_U(Zl
, 2028);
122 TEST_REGEX_CLASS_U(Line Separator
, 2028);
123 TEST_REGEX_CLASS_U(Zp
, 2029);
124 TEST_REGEX_CLASS_U(Paragraph Separator
, 2029);
125 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
126 // Some tests have to be disabled for VC6 because the compiler
127 // mangles the string literals...
128 TEST_REGEX_CLASS_U(C
*, 009F
);
129 TEST_REGEX_CLASS_U(Other
, 009F
);
130 TEST_REGEX_CLASS_U(Cc
, 009F
);
131 TEST_REGEX_CLASS_U(Control
, 009F
);
133 TEST_REGEX_CLASS_U(Cf
, FFFB
);
134 TEST_REGEX_CLASS_U(Format
, FFFB
);
135 //TEST_REGEX_CLASS_U(Cs, DC00);
136 //TEST_REGEX_CLASS_U(Surrogate, DC00);
137 TEST_REGEX_CLASS_U(Co
, F8FF
);
138 TEST_REGEX_CLASS_U(Private Use
, F8FF
);
139 TEST_REGEX_CLASS_U(Cn
, FFFF
);
140 TEST_REGEX_CLASS_U(Not Assigned
, FFFF
);
141 TEST_REGEX_CLASS_U(Any
, 2038);
142 TEST_REGEX_CLASS_U(Assigned
, 2038);
143 TEST_REGEX_CLASS_U(ASCII
, 7f
);
144 TEST_REGEX_SEARCH_U(L
"[[:Assigned:]]", perl
, L
"\xffff", match_default
, make_array(-2, -2));
145 TEST_REGEX_SEARCH_U(L
"[[:ASCII:]]", perl
, L
"\x80", match_default
, make_array(-2, -2));
147 TEST_REGEX_SEARCH_U(L
"\\N{KHMER DIGIT SIX}", perl
, L
"\x17E6", match_default
, make_array(0, 1, -2, -2));
148 TEST_REGEX_SEARCH_U(L
"\\N{MODIFIER LETTER LOW ACUTE ACCENT}", perl
, L
"\x02CF", match_default
, make_array(0, 1, -2, -2));
149 TEST_REGEX_SEARCH_U(L
"\\N{SUPERSCRIPT ONE}", perl
, L
"\x00B9", match_default
, make_array(0, 1, -2, -2));
150 TEST_REGEX_SEARCH_U(L
"[\\N{KHMER DIGIT SIX}]", perl
, L
"\x17E6", match_default
, make_array(0, 1, -2, -2));
151 TEST_REGEX_SEARCH_U(L
"[\\N{MODIFIER LETTER LOW ACUTE ACCENT}]", perl
, L
"\x02CF", match_default
, make_array(0, 1, -2, -2));
152 TEST_REGEX_SEARCH_U(L
"[\\N{SUPERSCRIPT ONE}]", perl
, L
"\x00B9", match_default
, make_array(0, 1, -2, -2));
153 TEST_REGEX_SEARCH_U(L
"\\N{CJK UNIFIED IDEOGRAPH-7FED}", perl
, L
"\x7FED", match_default
, make_array(0, 1, -2, -2));
154 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
155 // Some tests have to be disabled for VC6 because the compiler
156 // mangles the string literals...
157 TEST_REGEX_SEARCH_U(L
"\\w+", perl
, L
" e\x301" L
"coute ", match_default
, make_array(1, 8, -2, -2));
159 TEST_REGEX_SEARCH_U(L
"^", perl
, L
" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ",
160 match_default
| match_not_bol
, make_array(2, 2, -2, 4, 4, -2, 7, 7, -2, 9, 9, -2, 11, 11, -2, 13, 13, -2, 15, 15, -2, -2));
161 TEST_REGEX_SEARCH_U(L
"$", perl
, L
" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ",
162 match_default
| match_not_eol
, make_array(1, 1, -2, 3, 3, -2, 5, 5, -2, 8, 8, -2, 10, 10, -2, 12, 12, -2, 14, 14, -2, -2));
163 TEST_REGEX_SEARCH_U(L
".", perl
, L
" \x2028\x2029\x000D\x000A\x000A\x000C\x000D\x0085 ",
164 match_default
| match_not_dot_newline
, make_array(0, 1, -2, 9, 10, -2, -2));
169 void test_unicode(){}