]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * | |
3 | * Copyright (c) 2004 | |
4 | * John Maddock | |
5 | * | |
6 | * Use, modification and distribution are subject to the | |
7 | * Boost Software License, Version 1.0. (See accompanying file | |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | * | |
10 | */ | |
11 | ||
12 | /* | |
13 | * LOCATION: see http://www.boost.org for most recent version. | |
14 | * FILE test_icu.cpp | |
15 | * VERSION see <boost/version.hpp> | |
16 | * DESCRIPTION: Test code for Unicode regexes with ICU support. | |
17 | */ | |
18 | ||
19 | // | |
20 | // We can only build this if we have ICU support: | |
21 | // | |
22 | #include <boost/regex/config.hpp> | |
23 | #if defined(BOOST_HAS_ICU) && !defined(BOOST_NO_STD_WSTRING) | |
24 | ||
25 | #include <boost/regex/icu.hpp> | |
26 | #include "test.hpp" | |
27 | ||
28 | namespace unnecessary_fix{ | |
29 | // | |
30 | // Some outrageously broken std lib's don't have a conforming | |
31 | // back_insert_iterator, which means we can't use the std version | |
32 | // as an argument to regex_replace, sigh... use our own: | |
33 | // | |
34 | template <class Seq> | |
35 | class back_insert_iterator | |
36 | #ifndef BOOST_NO_STD_ITERATOR | |
37 | : public std::iterator<std::output_iterator_tag,void,void,void,void> | |
38 | #endif | |
39 | { | |
40 | private: | |
41 | Seq* container; | |
42 | public: | |
43 | typedef const typename Seq::value_type value_type; | |
44 | typedef Seq container_type; | |
45 | typedef std::output_iterator_tag iterator_category; | |
46 | ||
47 | explicit back_insert_iterator(Seq& x) : container(&x) {} | |
48 | back_insert_iterator& operator=(const value_type& val) | |
49 | { | |
50 | container->push_back(val); | |
51 | return *this; | |
52 | } | |
53 | back_insert_iterator& operator*() { return *this; } | |
54 | back_insert_iterator& operator++() { return *this; } | |
55 | back_insert_iterator operator++(int) { return *this; } | |
56 | }; | |
57 | ||
58 | template <class Seq> | |
59 | inline back_insert_iterator<Seq> back_inserter(Seq& x) | |
60 | { | |
61 | return back_insert_iterator<Seq>(x); | |
62 | } | |
63 | ||
64 | } | |
65 | ||
66 | // | |
67 | // compare two match_results struct's for equality, | |
68 | // converting the iterator as needed: | |
69 | // | |
70 | template <class MR1, class MR2> | |
71 | void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<2> const*) | |
72 | { | |
73 | typedef typename MR2::value_type MR2_value_type; | |
74 | typedef typename MR2_value_type::const_iterator MR2_iterator_type; | |
75 | typedef boost::u16_to_u32_iterator<MR2_iterator_type> iterator_type; | |
76 | //typedef typename MR1::size_type size_type; | |
77 | if(w1.size() != w2.size()) | |
78 | { | |
79 | BOOST_REGEX_TEST_ERROR("Size mismatch in match_results class", UChar32); | |
80 | } | |
81 | for(int i = 0; i < (int)w1.size(); ++i) | |
82 | { | |
83 | if(w1[i].matched) | |
84 | { | |
85 | if(w2[i].matched == 0) | |
86 | { | |
87 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
88 | } | |
89 | if((w1.position(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) | |
90 | { | |
91 | BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); | |
92 | } | |
93 | } | |
94 | else if(w2[i].matched) | |
95 | { | |
96 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
97 | } | |
98 | } | |
b32b8144 FG |
99 | // |
100 | // We don't have a way to access a list of named sub-expressions since we only store | |
101 | // hashes, but "abc" and "N" are common names used in our tests, so check those: | |
102 | // | |
103 | if (w1["abc"].matched) | |
104 | { | |
105 | if (w2["abc"].matched == 0) | |
106 | { | |
107 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
108 | } | |
109 | if ((w1.position("abc") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2["abc"].first))) || (w1.length("abc") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2["abc"].first), iterator_type(w2["abc"].second)))) | |
110 | { | |
111 | BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); | |
112 | } | |
113 | } | |
114 | else if (w2["abc"].matched) | |
115 | { | |
116 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
117 | } | |
118 | if (w1["N"].matched) | |
119 | { | |
120 | if (w2["N"].matched == 0) | |
121 | { | |
122 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
123 | } | |
124 | if ((w1.position("N") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2["N"].first))) || (w1.length("N") != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2["N"].first), iterator_type(w2["N"].second)))) | |
125 | { | |
126 | BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); | |
127 | } | |
128 | } | |
129 | else if (w2["N"].matched) | |
130 | { | |
131 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
132 | } | |
7c673cae FG |
133 | } |
134 | template <class MR1, class MR2> | |
135 | void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<1> const*) | |
136 | { | |
137 | typedef typename MR2::value_type MR2_value_type; | |
138 | typedef typename MR2_value_type::const_iterator MR2_iterator_type; | |
139 | typedef boost::u8_to_u32_iterator<MR2_iterator_type> iterator_type; | |
140 | //typedef typename MR1::size_type size_type; | |
141 | if(w1.size() != w2.size()) | |
142 | { | |
143 | BOOST_REGEX_TEST_ERROR("Size mismatch in match_results class", UChar32); | |
144 | } | |
145 | for(int i = 0; i < (int)w1.size(); ++i) | |
146 | { | |
147 | if(w1[i].matched) | |
148 | { | |
149 | if(w2[i].matched == 0) | |
150 | { | |
151 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
152 | } | |
153 | if((w1.position(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::BOOST_REGEX_DETAIL_NS::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) | |
154 | { | |
155 | BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); | |
156 | } | |
157 | } | |
158 | else if(w2[i].matched) | |
159 | { | |
160 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); | |
161 | } | |
162 | } | |
163 | } | |
164 | ||
165 | void test_icu_grep(const boost::u32regex& r, const std::vector< ::UChar32>& search_text) | |
166 | { | |
167 | typedef std::vector< ::UChar32>::const_iterator const_iterator; | |
168 | typedef boost::u32regex_iterator<const_iterator> test_iterator; | |
169 | boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options(); | |
170 | const int* answer_table = test_info<wchar_t>::answer_table(); | |
171 | test_iterator start(search_text.begin(), search_text.end(), r, opts), end; | |
172 | test_iterator copy(start); | |
173 | const_iterator last_end = search_text.begin(); | |
174 | while(start != end) | |
175 | { | |
176 | if(start != copy) | |
177 | { | |
178 | BOOST_REGEX_TEST_ERROR("Failed iterator != comparison.", wchar_t); | |
179 | } | |
180 | if(!(start == copy)) | |
181 | { | |
182 | BOOST_REGEX_TEST_ERROR("Failed iterator == comparison.", wchar_t); | |
183 | } | |
184 | test_result(*start, search_text.begin(), answer_table); | |
185 | // test $` and $' : | |
186 | if(start->prefix().first != last_end) | |
187 | { | |
188 | BOOST_REGEX_TEST_ERROR("Incorrect position for start of $`", wchar_t); | |
189 | } | |
190 | if(start->prefix().second != (*start)[0].first) | |
191 | { | |
192 | BOOST_REGEX_TEST_ERROR("Incorrect position for end of $`", wchar_t); | |
193 | } | |
194 | if(start->prefix().matched != (start->prefix().first != start->prefix().second)) | |
195 | { | |
196 | BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $`", wchar_t); | |
197 | } | |
198 | if(start->suffix().first != (*start)[0].second) | |
199 | { | |
200 | BOOST_REGEX_TEST_ERROR("Incorrect position for start of $'", wchar_t); | |
201 | } | |
202 | if(start->suffix().second != search_text.end()) | |
203 | { | |
204 | BOOST_REGEX_TEST_ERROR("Incorrect position for end of $'", wchar_t); | |
205 | } | |
206 | if(start->suffix().matched != (start->suffix().first != start->suffix().second)) | |
207 | { | |
208 | BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $'", wchar_t); | |
209 | } | |
210 | last_end = (*start)[0].second; | |
211 | ++start; | |
212 | ++copy; | |
213 | // move on the answer table to next set of answers; | |
214 | if(*answer_table != -2) | |
215 | while(*answer_table++ != -2){} | |
216 | } | |
217 | if(answer_table[0] >= 0) | |
218 | { | |
219 | // we should have had a match but didn't: | |
220 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", wchar_t); | |
221 | } | |
222 | } | |
223 | ||
224 | void test_icu(const wchar_t&, const test_regex_search_tag& ) | |
225 | { | |
226 | boost::u32regex r; | |
227 | if(*test_locale::c_str()) | |
228 | { | |
229 | U_NAMESPACE_QUALIFIER Locale l(test_locale::c_str()); | |
230 | if(l.isBogus()) | |
231 | return; | |
232 | r.imbue(l); | |
233 | } | |
234 | ||
235 | std::vector< ::UChar32> expression; | |
236 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
237 | expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end()); | |
238 | #else | |
239 | std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression)); | |
240 | #endif | |
241 | boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options(); | |
242 | #ifndef BOOST_NO_EXCEPTIONS | |
243 | try | |
244 | #endif | |
245 | { | |
246 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) | |
247 | r.assign(expression.begin(), expression.end(), syntax_options); | |
248 | #else | |
249 | if(expression.size()) | |
250 | r.assign(&*expression.begin(), expression.size(), syntax_options); | |
251 | else | |
252 | r.assign(static_cast<UChar32 const*>(0), expression.size(), syntax_options); | |
253 | #endif | |
254 | if(r.status()) | |
255 | { | |
256 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32); | |
257 | } | |
258 | std::vector< ::UChar32> search_text; | |
259 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
260 | search_text.assign(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end()); | |
261 | #else | |
262 | std::copy(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end(), std::back_inserter(search_text)); | |
263 | #endif | |
264 | boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options(); | |
265 | const int* answer_table = test_info<wchar_t>::answer_table(); | |
266 | boost::match_results<std::vector< ::UChar32>::const_iterator> what; | |
267 | if(boost::u32regex_search( | |
268 | const_cast<std::vector< ::UChar32>const&>(search_text).begin(), | |
269 | const_cast<std::vector< ::UChar32>const&>(search_text).end(), | |
270 | what, | |
271 | r, | |
272 | opts)) | |
273 | { | |
274 | test_result(what, const_cast<std::vector< ::UChar32>const&>(search_text).begin(), answer_table); | |
275 | } | |
276 | else if(answer_table[0] >= 0) | |
277 | { | |
278 | // we should have had a match but didn't: | |
279 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); | |
280 | } | |
281 | ||
282 | if(0 == *test_locale::c_str()) | |
283 | { | |
284 | // | |
285 | // Now try UTF-16 construction: | |
286 | // | |
287 | typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv; | |
288 | std::vector<UChar> expression16, text16; | |
289 | boost::match_results<std::vector<UChar>::const_iterator> what16; | |
290 | boost::match_results<const UChar*> what16c; | |
291 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
292 | expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end())); | |
293 | text16.assign(u16_conv(search_text.begin()), u16_conv(search_text.end())); | |
294 | #else | |
295 | expression16.clear(); | |
296 | std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16)); | |
297 | text16.clear(); | |
298 | std::copy(u16_conv(search_text.begin()), u16_conv(search_text.end()), std::back_inserter(text16)); | |
299 | #endif | |
300 | r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); | |
301 | if(boost::u32regex_search(const_cast<const std::vector<UChar>&>(text16).begin(), const_cast<const std::vector<UChar>&>(text16).end(), what16, r, opts)) | |
302 | { | |
303 | compare_result(what, what16, static_cast<boost::mpl::int_<2> const*>(0)); | |
304 | } | |
305 | else if(answer_table[0] >= 0) | |
306 | { | |
307 | // we should have had a match but didn't: | |
308 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); | |
309 | } | |
310 | if(std::find(expression16.begin(), expression16.end(), 0) == expression16.end()) | |
311 | { | |
312 | expression16.push_back(0); | |
313 | r = boost::make_u32regex(&*expression16.begin(), syntax_options); | |
314 | if(std::find(text16.begin(), text16.end(), 0) == text16.end()) | |
315 | { | |
316 | text16.push_back(0); | |
317 | if(boost::u32regex_search((const UChar*)&*text16.begin(), what16c, r, opts)) | |
318 | { | |
319 | compare_result(what, what16c, static_cast<boost::mpl::int_<2> const*>(0)); | |
320 | } | |
321 | else if(answer_table[0] >= 0) | |
322 | { | |
323 | // we should have had a match but didn't: | |
324 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); | |
325 | } | |
326 | } | |
327 | } | |
328 | // | |
329 | // Now try UTF-8 construction: | |
330 | // | |
331 | typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator, unsigned char> u8_conv; | |
332 | std::vector<unsigned char> expression8, text8; | |
333 | boost::match_results<std::vector<unsigned char>::const_iterator> what8; | |
334 | boost::match_results<const unsigned char*> what8c; | |
335 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
336 | expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end())); | |
337 | text8.assign(u8_conv(search_text.begin()), u8_conv(search_text.end())); | |
338 | #else | |
339 | expression8.clear(); | |
340 | std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8)); | |
341 | text8.clear(); | |
342 | std::copy(u8_conv(search_text.begin()), u8_conv(search_text.end()), std::back_inserter(text8)); | |
343 | #endif | |
344 | r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); | |
345 | if(boost::u32regex_search(const_cast<const std::vector<unsigned char>&>(text8).begin(), const_cast<const std::vector<unsigned char>&>(text8).end(), what8, r, opts)) | |
346 | { | |
347 | compare_result(what, what8, static_cast<boost::mpl::int_<1> const*>(0)); | |
348 | } | |
349 | else if(answer_table[0] >= 0) | |
350 | { | |
351 | // we should have had a match but didn't: | |
352 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); | |
353 | } | |
354 | if(std::find(expression8.begin(), expression8.end(), 0) == expression8.end()) | |
355 | { | |
356 | expression8.push_back(0); | |
357 | r = boost::make_u32regex(&*expression8.begin(), syntax_options); | |
358 | if(std::find(text8.begin(), text8.end(), 0) == text8.end()) | |
359 | { | |
360 | text8.push_back(0); | |
361 | if(boost::u32regex_search((const unsigned char*)&*text8.begin(), what8c, r, opts)) | |
362 | { | |
363 | compare_result(what, what8c, static_cast<boost::mpl::int_<1> const*>(0)); | |
364 | } | |
365 | else if(answer_table[0] >= 0) | |
366 | { | |
367 | // we should have had a match but didn't: | |
368 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); | |
369 | } | |
370 | } | |
371 | } | |
372 | } | |
373 | // | |
374 | // finally try a grep: | |
375 | // | |
376 | test_icu_grep(r, search_text); | |
377 | } | |
378 | #ifndef BOOST_NO_EXCEPTIONS | |
379 | catch(const boost::bad_expression& e) | |
380 | { | |
381 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), UChar32); | |
382 | } | |
383 | catch(const std::runtime_error& e) | |
384 | { | |
385 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << e.what(), UChar32); | |
386 | } | |
387 | catch(const std::exception& e) | |
388 | { | |
389 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << e.what(), UChar32); | |
390 | } | |
391 | catch(...) | |
392 | { | |
393 | BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", UChar32); | |
394 | } | |
395 | #endif | |
396 | } | |
397 | ||
398 | void test_icu(const wchar_t&, const test_invalid_regex_tag&) | |
399 | { | |
400 | //typedef boost::u16_to_u32_iterator<std::wstring::const_iterator, ::UChar32> conv_iterator; | |
401 | std::vector< ::UChar32> expression; | |
402 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
403 | expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end()); | |
404 | #else | |
405 | std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression)); | |
406 | #endif | |
407 | boost::regex_constants::syntax_option_type syntax_options = test_info<wchar_t>::syntax_options(); | |
408 | boost::u32regex r; | |
409 | if(*test_locale::c_str()) | |
410 | { | |
411 | U_NAMESPACE_QUALIFIER Locale l(test_locale::c_str()); | |
412 | if(l.isBogus()) | |
413 | return; | |
414 | r.imbue(l); | |
415 | } | |
416 | // | |
417 | // try it with exceptions disabled first: | |
418 | // | |
419 | #ifndef BOOST_NO_EXCEPTIONS | |
420 | try | |
421 | #endif | |
422 | { | |
423 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) | |
424 | if(0 == r.assign(expression.begin(), expression.end(), syntax_options | boost::regex_constants::no_except).status()) | |
425 | #else | |
426 | if(expression.size()) | |
427 | r.assign(&*expression.begin(), expression.size(), syntax_options | boost::regex_constants::no_except); | |
428 | else | |
429 | r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options | boost::regex_constants::no_except); | |
430 | if(0 == r.status()) | |
431 | #endif | |
432 | { | |
433 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); | |
434 | } | |
435 | } | |
436 | #ifndef BOOST_NO_EXCEPTIONS | |
437 | catch(...) | |
438 | { | |
439 | BOOST_REGEX_TEST_ERROR("Unexpected exception thrown.", wchar_t); | |
440 | } | |
441 | #endif | |
442 | // | |
443 | // now try again with exceptions: | |
444 | // | |
445 | bool have_catch = false; | |
446 | #ifndef BOOST_NO_EXCEPTIONS | |
447 | try | |
448 | #endif | |
449 | { | |
450 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) | |
451 | r.assign(expression.begin(), expression.end(), syntax_options); | |
452 | #else | |
453 | if(expression.size()) | |
454 | r.assign(&*expression.begin(), expression.size(), syntax_options); | |
455 | else | |
456 | r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options); | |
457 | #endif | |
458 | #ifdef BOOST_NO_EXCEPTIONS | |
459 | if(r.status()) | |
460 | have_catch = true; | |
461 | #endif | |
462 | } | |
463 | #ifndef BOOST_NO_EXCEPTIONS | |
464 | catch(const boost::bad_expression&) | |
465 | { | |
466 | have_catch = true; | |
467 | } | |
468 | catch(const std::runtime_error& e) | |
469 | { | |
470 | have_catch = true; | |
471 | BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::runtime_error instead: " << e.what(), wchar_t); | |
472 | } | |
473 | catch(const std::exception& e) | |
474 | { | |
475 | have_catch = true; | |
476 | BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::exception instead: " << e.what(), wchar_t); | |
477 | } | |
478 | catch(...) | |
479 | { | |
480 | have_catch = true; | |
481 | BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but got an exception of unknown type instead", wchar_t); | |
482 | } | |
483 | #endif | |
484 | if(!have_catch) | |
485 | { | |
486 | // oops expected exception was not thrown: | |
487 | BOOST_REGEX_TEST_ERROR("Expected an exception, but didn't find one.", wchar_t); | |
488 | } | |
489 | ||
490 | if(0 == *test_locale::c_str()) | |
491 | { | |
492 | // | |
493 | // Now try UTF-16 construction: | |
494 | // | |
495 | typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv; | |
496 | std::vector<UChar> expression16; | |
497 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
498 | expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end())); | |
499 | #else | |
500 | std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16)); | |
501 | #endif | |
502 | if(0 == boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options | boost::regex_constants::no_except).status()) | |
503 | { | |
504 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); | |
505 | } | |
506 | if(std::find(expression16.begin(), expression16.end(), 0) == expression16.end()) | |
507 | { | |
508 | expression16.push_back(0); | |
509 | if(0 == boost::make_u32regex(&*expression16.begin(), syntax_options | boost::regex_constants::no_except).status()) | |
510 | { | |
511 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); | |
512 | } | |
513 | } | |
514 | // | |
515 | // Now try UTF-8 construction: | |
516 | // | |
517 | typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator> u8_conv; | |
518 | std::vector<unsigned char> expression8; | |
519 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
520 | expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end())); | |
521 | #else | |
522 | std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8)); | |
523 | #endif | |
524 | if(0 == boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options | boost::regex_constants::no_except).status()) | |
525 | { | |
526 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); | |
527 | } | |
528 | if(std::find(expression8.begin(), expression8.end(), 0) == expression8.end()) | |
529 | { | |
530 | expression8.push_back(0); | |
531 | if(0 == boost::make_u32regex(&*expression8.begin(), syntax_options | boost::regex_constants::no_except).status()) | |
532 | { | |
533 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); | |
534 | } | |
535 | } | |
536 | } | |
537 | } | |
538 | ||
539 | void test_icu(const wchar_t&, const test_regex_replace_tag&) | |
540 | { | |
541 | std::vector< ::UChar32> expression; | |
542 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
543 | expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end()); | |
544 | #else | |
545 | std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression)); | |
546 | #endif | |
547 | boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options(); | |
548 | boost::u32regex r; | |
549 | #ifndef BOOST_NO_EXCEPTIONS | |
550 | try | |
551 | #endif | |
552 | { | |
553 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) | |
554 | r.assign(expression.begin(), expression.end(), syntax_options); | |
555 | #else | |
556 | if(expression.size()) | |
557 | r.assign(&*expression.begin(), expression.size(), syntax_options); | |
558 | else | |
559 | r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options); | |
560 | #endif | |
561 | if(r.status()) | |
562 | { | |
563 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32); | |
564 | } | |
565 | typedef std::vector<UChar32> string_type; | |
566 | string_type search_text; | |
567 | boost::regex_constants::match_flag_type opts = test_info<UChar32>::match_options(); | |
568 | string_type format_string; | |
569 | string_type result_string; | |
570 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
571 | search_text.assign(test_info<UChar32>::search_text().begin(), test_info<UChar32>::search_text().end()); | |
572 | format_string.assign(test_info<UChar32>::format_string().begin(), test_info<UChar32>::format_string().end()); | |
573 | format_string.push_back(0); | |
574 | result_string.assign(test_info<UChar32>::result_string().begin(), test_info<UChar32>::result_string().end()); | |
575 | #else | |
576 | std::copy(test_info<UChar32>::search_text().begin(), test_info<UChar32>::search_text().end(), std::back_inserter(search_text)); | |
577 | std::copy(test_info<UChar32>::format_string().begin(), test_info<UChar32>::format_string().end(), std::back_inserter(format_string)); | |
578 | format_string.push_back(0); | |
579 | std::copy(test_info<UChar32>::result_string().begin(), test_info<UChar32>::result_string().end(), std::back_inserter(result_string)); | |
580 | #endif | |
581 | string_type result; | |
582 | ||
583 | boost::u32regex_replace(unnecessary_fix::back_inserter(result), search_text.begin(), search_text.end(), r, &*format_string.begin(), opts); | |
584 | if(result != result_string) | |
585 | { | |
586 | BOOST_REGEX_TEST_ERROR("regex_replace generated an incorrect string result", UChar32); | |
587 | } | |
588 | // | |
589 | // Mixed mode character encoding: | |
590 | // | |
591 | if(0 == *test_locale::c_str()) | |
592 | { | |
593 | // | |
594 | // Now try UTF-16 construction: | |
595 | // | |
596 | typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv; | |
597 | std::vector<UChar> expression16, text16, format16, result16, found16; | |
598 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
599 | expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end())); | |
600 | text16.assign(u16_conv(search_text.begin()), u16_conv(search_text.end())); | |
601 | format16.assign(u16_conv(format_string.begin()), u16_conv(format_string.end())); | |
602 | result16.assign(u16_conv(result_string.begin()), u16_conv(result_string.end())); | |
603 | #else | |
604 | std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16)); | |
605 | std::copy(u16_conv(search_text.begin()), u16_conv(search_text.end()), std::back_inserter(text16)); | |
606 | std::copy(u16_conv(format_string.begin()), u16_conv(format_string.end()), std::back_inserter(format16)); | |
607 | std::copy(u16_conv(result_string.begin()), u16_conv(result_string.end()), std::back_inserter(result16)); | |
608 | #endif | |
609 | r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); | |
610 | boost::u32regex_replace(unnecessary_fix::back_inserter(found16), text16.begin(), text16.end(), r, &*format16.begin(), opts); | |
611 | if(result16 != found16) | |
612 | { | |
613 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-16 string returned incorrect result", UChar32); | |
614 | } | |
615 | // | |
616 | // Now with UnicodeString: | |
617 | // | |
618 | U_NAMESPACE_QUALIFIER UnicodeString expression16u, text16u, format16u, result16u, found16u; | |
619 | if(expression16.size()) | |
620 | expression16u.setTo(&*expression16.begin(), expression16.size()); | |
621 | if(text16.size()) | |
622 | text16u.setTo(&*text16.begin(), text16.size()); | |
623 | format16u.setTo(&*format16.begin(), format16.size()-1); | |
624 | if(result16.size()) | |
625 | result16u.setTo(&*result16.begin(), result16.size()); | |
626 | r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); | |
627 | found16u = boost::u32regex_replace(text16u, r, format16u, opts); | |
628 | if(result16u != found16u) | |
629 | { | |
630 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-16 string returned incorrect result", UChar32); | |
631 | } | |
632 | ||
633 | // | |
634 | // Now try UTF-8 construction: | |
635 | // | |
636 | typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator, unsigned char> u8_conv; | |
637 | std::vector<char> expression8, text8, format8, result8, found8; | |
638 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS | |
639 | expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end())); | |
640 | text8.assign(u8_conv(search_text.begin()), u8_conv(search_text.end())); | |
641 | format8.assign(u8_conv(format_string.begin()), u8_conv(format_string.end())); | |
642 | result8.assign(u8_conv(result_string.begin()), u8_conv(result_string.end())); | |
643 | #else | |
644 | std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8)); | |
645 | std::copy(u8_conv(search_text.begin()), u8_conv(search_text.end()), std::back_inserter(text8)); | |
646 | std::copy(u8_conv(format_string.begin()), u8_conv(format_string.end()), std::back_inserter(format8)); | |
647 | std::copy(u8_conv(result_string.begin()), u8_conv(result_string.end()), std::back_inserter(result8)); | |
648 | #endif | |
649 | r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); | |
650 | boost::u32regex_replace(unnecessary_fix::back_inserter(found8), text8.begin(), text8.end(), r, &*format8.begin(), opts); | |
651 | if(result8 != found8) | |
652 | { | |
653 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-8 string returned incorrect result", UChar32); | |
654 | } | |
655 | // | |
656 | // Now with std::string and UTF-8: | |
657 | // | |
658 | std::string expression8s, text8s, format8s, result8s, found8s; | |
659 | if(expression8.size()) | |
660 | expression8s.assign(&*expression8.begin(), expression8.size()); | |
661 | if(text8.size()) | |
662 | text8s.assign(&*text8.begin(), text8.size()); | |
663 | format8s.assign(&*format8.begin(), format8.size()-1); | |
664 | if(result8.size()) | |
665 | result8s.assign(&*result8.begin(), result8.size()); | |
666 | r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); | |
667 | found8s = boost::u32regex_replace(text8s, r, format8s, opts); | |
668 | if(result8s != found8s) | |
669 | { | |
670 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-8 string returned incorrect result", UChar32); | |
671 | } | |
672 | } | |
673 | } | |
674 | #ifndef BOOST_NO_EXCEPTIONS | |
675 | catch(const boost::bad_expression& e) | |
676 | { | |
677 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), UChar32); | |
678 | } | |
679 | catch(const std::runtime_error& e) | |
680 | { | |
681 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << e.what(), UChar32); | |
682 | } | |
683 | catch(const std::exception& e) | |
684 | { | |
685 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << e.what(), UChar32); | |
686 | } | |
687 | catch(...) | |
688 | { | |
689 | BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", UChar32); | |
690 | } | |
691 | #endif | |
692 | } | |
693 | ||
694 | #else | |
695 | ||
696 | #include "test.hpp" | |
697 | ||
698 | void test_icu(const wchar_t&, const test_regex_search_tag&){} | |
699 | void test_icu(const wchar_t&, const test_invalid_regex_tag&){} | |
700 | void test_icu(const wchar_t&, const test_regex_replace_tag&){} | |
701 | ||
702 | #endif | |
703 |