]>
git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/locale/test/test_boundary.cpp
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
9 #ifndef BOOST_LOCALE_WITH_ICU
13 std::cout
<< "ICU is not build... Skipping" << std::endl
;
17 #define BOOST_LOCALE_ERROR_LIMIT 100000
19 #include <boost/locale/boundary.hpp>
20 #include <boost/locale/generator.hpp>
21 #include "test_locale.hpp"
22 #include "test_locale_tools.hpp"
24 #include <unicode/uversion.h>
28 template<typename Char
>
29 void print_str(std::basic_string
<Char
> const &/*s*/)
34 void print_str
<char>(std::basic_string
<char> const &s
)
36 std::cout
<< "[" << s
<<"]" << std::endl
;
40 namespace lb
= boost::locale::boundary
;
42 template<typename Char
,typename Iterator
>
43 void test_word_container(Iterator begin
,Iterator end
,
44 std::vector
<int> const &ipos
,
45 std::vector
<int> const &imasks
,
46 std::vector
<std::basic_string
<Char
> > const &ichunks
,
48 lb::boundary_type bt
=lb::word
51 for(int sm
=(bt
== lb::word
? 31 : 3 ) ;sm
>=0;sm
--) {
53 ((sm
& 1 ) != 0) * 0xF
54 + ((sm
& 2 ) != 0) * 0xF0
55 + ((sm
& 4 ) != 0) * 0xF00
56 + ((sm
& 8 ) != 0) * 0xF000
57 + ((sm
& 16) != 0) * 0xF0000;
59 std::vector
<int> masks
,pos
;
60 std::vector
<unsigned> bmasks
;
61 std::basic_string
<Char
> empty_chunk
;
63 std::vector
<std::basic_string
<Char
> > chunks
;
64 std::vector
<std::basic_string
<Char
> > fchunks
;
65 std::vector
<Iterator
> iters
;
66 iters
.push_back(begin
);
69 for(unsigned i
=0;i
<imasks
.size();i
++) {
70 if(imasks
[i
] & mask
) {
71 masks
.push_back(imasks
[i
]);
72 chunks
.push_back(ichunks
[i
]);
73 fchunks
.push_back(empty_chunk
+ ichunks
[i
]);
75 pos
.push_back(ipos
[i
]);
78 empty_chunk
+=ichunks
[i
];
81 if((imasks
[i
] & mask
) || i
==imasks
.size()-1){
83 std::advance(ptr
,ipos
[i
]);
85 bmasks
.push_back(imasks
[i
]);
90 // segment iterator tests
93 lb::segment_index
<Iterator
> map(bt
,begin
,end
,l
);
94 typedef typename
lb::segment_index
<Iterator
>::iterator iter_type
;
101 map
.full_select(false);
102 for(p
=map
.begin();p
!=map
.end();++p
,i
++) {
103 TEST(p
->str()==chunks
[i
]);
104 TEST(p
->rule() == unsigned(masks
[i
]));
107 TEST(chunks
.size() == i
);
115 TEST(p
->str()==chunks
[--i
]);
116 TEST(p
->rule() == unsigned(masks
[i
]));
119 for(i
=0,p
=map
.end();i
<chunks
.size();i
++){
121 unsigned index
= chunks
.size() - i
- 1;
122 TEST(p
->str()==chunks
[index
]);
123 TEST(p
->rule() == unsigned(masks
[index
]));
125 TEST(p
==map
.begin());
131 map
.full_select(true);
132 for(p
=map
.begin();p
!=map
.end();++p
,i
++) {
133 TEST(p
->str()==fchunks
[i
]);
134 TEST(p
->rule() == unsigned(masks
[i
]));
137 TEST(chunks
.size() == i
);
146 if(p
->str()!=fchunks
[i
-1]) {
148 print_str(fchunks
[i
-1]);
150 TEST(p
->str()==fchunks
[--i
]);
151 TEST(p
->rule() == unsigned(masks
[i
]));
155 for(i
=0,p
=map
.end();i
<chunks
.size();i
++){
157 unsigned index
= chunks
.size() - i
- 1;
158 TEST(p
->str()==fchunks
[index
]);
159 TEST(p
->rule() == unsigned(masks
[index
]));
161 TEST(p
==map
.begin());
166 unsigned chunk_ptr
=0;
168 map
.full_select(false);
169 for(Iterator optr
=begin
;optr
!=end
;optr
++,i
++) {
171 if(chunk_ptr
< pos
.size() && i
>=unsigned(pos
[chunk_ptr
])){
174 if(chunk_ptr
>=pos
.size()) {
178 TEST(p
->str()==chunks
[chunk_ptr
]);
179 TEST(p
->rule()==unsigned(masks
[chunk_ptr
]));
185 unsigned chunk_ptr
=0;
187 map
.full_select(true);
188 for(Iterator optr
=begin
;optr
!=end
;optr
++,i
++) {
190 if(chunk_ptr
< pos
.size() && i
>=unsigned(pos
[chunk_ptr
])){
193 if(chunk_ptr
>=pos
.size()) {
197 TEST(p
->str()==fchunks
[chunk_ptr
]);
198 TEST(p
->rule()==unsigned(masks
[chunk_ptr
]));
203 } // segment iterator tests
205 { // break iterator tests
206 lb::boundary_point_index
<Iterator
> map(bt
,begin
,end
,l
);
207 typedef typename
lb::boundary_point_index
<Iterator
>::iterator iter_type
;
213 for(p
=map
.begin();p
!=map
.end();++p
,i
++) {
214 TEST(p
->iterator()==iters
[i
]);
215 TEST(p
->rule()==bmasks
[i
]);
218 TEST(iters
.size() == i
);
223 TEST(p
->iterator()==iters
.at(i
));
224 } while(p
!=map
.begin());
227 unsigned iters_ptr
=0;
228 for(Iterator optr
=begin
;optr
!=end
;optr
++) {
230 TEST(p
->iterator()==iters
[iters_ptr
]);
231 if(iters
.at(iters_ptr
)==optr
)
235 } // break iterator tests
238 typedef lb::segment_index
<Iterator
> ti_type
;
239 typedef lb::boundary_point_index
<Iterator
> bi_type
;
240 { // segment to bound
241 ti_type
ti(bt
,begin
,end
,l
);
247 typename
bi_type::iterator p
;
248 for(p
=bi
.begin();p
!=bi
.end();++p
,i
++) {
249 TEST(p
->iterator()==iters
[i
]);
250 TEST(p
->rule()==bmasks
[i
]);
258 typename
bi_type::iterator p
;
259 for(p
=bi
.begin();p
!=bi
.end();++p
,i
++) {
260 TEST(p
->iterator()==iters
[i
]);
261 TEST(p
->rule()==bmasks
[i
]);
264 // boundary_point to bound
265 bi_type
bi_2(bt
,begin
,end
,l
);
270 typename
bi_type::iterator p
;
271 for(p
=bi
.begin();p
!=bi
.end();++p
,i
++) {
272 TEST(p
->iterator()==iters
[i
]);
273 TEST(p
->rule()==bmasks
[i
]);
280 typename
bi_type::iterator p
;
281 for(p
=bi
.begin();p
!=bi
.end();++p
,i
++) {
282 TEST(p
->iterator()==iters
[i
]);
283 TEST(p
->rule()==bmasks
[i
]);
287 { // boundary_point to segment
288 bi_type
bi(bt
,begin
,end
,l
);
293 typename
ti_type::iterator p
;
294 for(p
=ti
.begin();p
!=ti
.end();++p
,i
++) {
295 TEST(p
->str()==chunks
[i
]);
296 TEST(p
->rule()==unsigned(masks
[i
]));
304 typename
ti_type::iterator p
;
305 for(p
=ti
.begin();p
!=ti
.end();++p
,i
++) {
306 TEST(p
->str()==chunks
[i
]);
307 TEST(p
->rule()==unsigned(masks
[i
]));
310 ti_type
ti_2(bt
,begin
,end
,l
);
315 typename
ti_type::iterator p
;
316 for(p
=ti
.begin();p
!=ti
.end();++p
,i
++) {
317 TEST(p
->str()==chunks
[i
]);
318 TEST(p
->rule()==unsigned(masks
[i
]));
325 typename
ti_type::iterator p
;
326 for(p
=ti
.begin();p
!=ti
.end();++p
,i
++) {
327 TEST(p
->str()==chunks
[i
]);
328 TEST(p
->rule()==unsigned(masks
[i
]));
337 template<typename Char
>
338 void run_word(std::string
*original
,int *none
,int *num
,int *word
,int *kana
,int *ideo
,std::locale l
,lb::boundary_type b
=lb::word
)
340 std::vector
<int> pos
;
341 std::vector
<std::basic_string
<Char
> > chunks
;
342 std::vector
<int> masks
;
343 std::basic_string
<Char
> test_string
;
344 for(int i
=0;!original
[i
].empty();i
++) {
345 chunks
.push_back(to_correct_string
<Char
>(original
[i
],l
));
346 test_string
+=chunks
.back();
347 pos
.push_back(test_string
.size());
349 ( none
? none
[i
]*15 : 0)
350 | ( num
? ((num
[i
]*15) << 4) : 0)
351 | ( word
? ((word
[i
]*15) << 8) : 0)
352 | ( kana
? ((kana
[i
]*15) << 12) : 0)
353 | ( ideo
? ((ideo
[i
]*15) << 16) : 0)
357 std::list
<Char
> lst(test_string
.begin(),test_string
.end());
358 test_word_container
<Char
>(lst
.begin(),lst
.end(),pos
,masks
,chunks
,l
,b
);
359 test_word_container
<Char
>(test_string
.begin(),test_string
.end(),pos
,masks
,chunks
,l
,b
);
362 std::string character
[]={"שָ","ל","וֹ","ם","!",""};
363 int nones
[]={1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
365 std::string sentence1
[]={"To be\n","or not\n","to be?\n"," That is the question. ","Or maybe not",""};
366 int sentence1a
[]={ 0, 0, 1, 1, 0, 0};
367 int sentence1b
[]={ 1, 1, 0, 0, 1, 0};
369 std::string line1
[]={"To ","be\n","or ","not\n","to ","be",""};
370 int line1a
[]={ 1, 0, 1 , 0, 1, 1 , 0 };
371 int line1b
[]={ 0, 1, 0 , 1, 0, 0 , 0 };
374 void test_boundaries(std::string
*all
,int *first
,int *second
,lb::boundary_type t
)
376 boost::locale::generator g
;
377 std::cout
<< " char UTF-8" << std::endl
;
378 run_word
<char>(all
,first
,second
,0,0,0,g("he_IL.UTF-8"),t
);
379 std::cout
<< " char CP1255" << std::endl
;
380 run_word
<char>(all
,first
,second
,0,0,0,g("he_IL.cp1255"),t
);
381 std::cout
<< " wchar_t"<<std::endl
;
382 run_word
<wchar_t>(all
,first
,second
,0,0,0,g("he_IL.UTF-8"),t
);
383 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
384 std::cout
<< " char16_t"<<std::endl
;
385 run_word
<char16_t
>(all
,first
,second
,0,0,0,g("he_IL.UTF-8"),t
);
387 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
388 std::cout
<< " char32_t"<<std::endl
;
389 run_word
<char32_t
>(all
,first
,second
,0,0,0,g("he_IL.UTF-8"),t
);
396 boost::locale::generator g
;
397 //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひらがな","ヒラガナ",""};
398 //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひん","アヒル",""};
399 std::string all1
[]={"10"," ","Hello"," ","Windows7"," ","平仮名","アヒル",""};
400 int none1
[]={ 0, 1, 0, 1, 0, 1, 0, 0, 0};
401 int num1
[]={ 1, 0, 0, 0, 1, 0, 0 , 0 , 0};
402 int word1
[]={ 0, 0, 1, 0, 1, 0, 0 , 0 , 0};
403 #if U_ICU_VERSION_MAJOR_NUM >= 50
404 int kana1
[]={ 0, 0, 0, 0, 0, 0, 0, 0 , 0};
405 int ideo1
[]={ 0, 0, 0, 0, 0, 0, 1, 1 , 1};
407 int kana1
[]={ 0, 0, 0, 0, 0, 0, 0, 1 , 1};
408 int ideo1
[]={ 0, 0, 0, 0, 0, 0, 1, 0 , 0};
413 std::string all2
[]={""};
415 std::string all3
[]={" "," ","Hello",",","World","!"," ",""};
416 int none3
[]={ 1, 1, 0, 1, 0, 1, 1, 0};
417 int word3
[]={ 0, 0, 1, 0, 1, 0, 0, 0};
419 std::cout
<< " char UTF-8" << std::endl
;
420 run_word
<char>(all1
,none1
,num1
,word1
,kana1
,ideo1
,g("ja_JP.UTF-8"));
421 run_word
<char>(all2
,zero
,zero
,zero
,zero
,zero
,g("en_US.UTF-8"));
422 run_word
<char>(all3
,none3
,zero
,word3
,zero
,zero
,g("en_US.UTF-8"));
424 std::cout
<< " char Shift-JIS" << std::endl
;
425 run_word
<char>(all1
,none1
,num1
,word1
,kana1
,ideo1
,g("ja_JP.Shift-JIS"));
426 run_word
<char>(all2
,zero
,zero
,zero
,zero
,zero
,g("ja_JP.Shift-JIS"));
427 run_word
<char>(all3
,none3
,zero
,word3
,zero
,zero
,g("ja_JP.Shift-JIS"));
429 std::cout
<< " wchar_t"<<std::endl
;
430 run_word
<wchar_t>(all1
,none1
,num1
,word1
,kana1
,ideo1
,g("ja_JP.UTF-8"));
431 run_word
<wchar_t>(all2
,zero
,zero
,zero
,zero
,zero
,g("en_US.UTF-8"));
432 run_word
<wchar_t>(all3
,none3
,zero
,word3
,zero
,zero
,g("en_US.UTF-8"));
434 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
435 std::cout
<< " char16_t"<<std::endl
;
436 run_word
<char16_t
>(all1
,none1
,num1
,word1
,kana1
,ideo1
,g("ja_JP.UTF-8"));
437 run_word
<char16_t
>(all2
,zero
,zero
,zero
,zero
,zero
,g("en_US.UTF-8"));
438 run_word
<char16_t
>(all3
,none3
,zero
,word3
,zero
,zero
,g("en_US.UTF-8"));
441 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
442 std::cout
<< " char32_t"<<std::endl
;
443 run_word
<char32_t
>(all1
,none1
,num1
,word1
,kana1
,ideo1
,g("ja_JP.UTF-8"));
444 run_word
<char32_t
>(all2
,zero
,zero
,zero
,zero
,zero
,g("en_US.UTF-8"));
445 run_word
<char32_t
>(all3
,none3
,zero
,word3
,zero
,zero
,g("en_US.UTF-8"));
448 void test_op_one_side(std::string
const &sl
,std::string
const &sr
,int val
)
450 boost::locale::boundary::ssegment
l(sl
.begin(),sl
.end(),0),r(sr
.begin(),sr
.end(),0);
453 TEST( (l
==r
) == (val
==0));
454 TEST( (l
!=r
) == (val
!=0));
455 TEST( (l
<=r
) == (val
<=0));
456 TEST( (l
< r
) == (val
<0));
457 TEST( (l
>=r
) == (val
>=0));
458 TEST( (l
> r
) == (val
>0));
461 TEST( (l
==sr
.c_str()) == (val
==0));
462 TEST( (l
!=sr
.c_str()) == (val
!=0));
463 TEST( (l
<=sr
.c_str()) == (val
<=0));
464 TEST( (l
< sr
.c_str()) == (val
<0));
465 TEST( (l
>=sr
.c_str()) == (val
>=0));
466 TEST( (l
> sr
.c_str()) == (val
>0));
468 TEST( (sl
.c_str()==r
) == (val
==0));
469 TEST( (sl
.c_str()!=r
) == (val
!=0));
470 TEST( (sl
.c_str()<=r
) == (val
<=0));
471 TEST( (sl
.c_str()< r
) == (val
<0));
472 TEST( (sl
.c_str()>=r
) == (val
>=0));
473 TEST( (sl
.c_str()> r
) == (val
>0));
477 TEST( (l
==sr
) == (val
==0));
478 TEST( (l
!=sr
) == (val
!=0));
479 TEST( (l
<=sr
) == (val
<=0));
480 TEST( (l
< sr
) == (val
<0));
481 TEST( (l
>=sr
) == (val
>=0));
482 TEST( (l
> sr
) == (val
>0));
484 TEST( (sl
==r
) == (val
==0));
485 TEST( (sl
!=r
) == (val
!=0));
486 TEST( (sl
<=r
) == (val
<=0));
487 TEST( (sl
< r
) == (val
<0));
488 TEST( (sl
>=r
) == (val
>=0));
489 TEST( (sl
> r
) == (val
>0));
491 TEST( (sl
==sr
) == (val
==0));
492 TEST( (sl
!=sr
) == (val
!=0));
493 TEST( (sl
<=sr
) == (val
<=0));
494 TEST( (sl
< sr
) == (val
<0));
495 TEST( (sl
>=sr
) == (val
>=0));
496 TEST( (sl
> sr
) == (val
>0));
500 void test_op(std::string
const &sl
,std::string
const &sr
,int val
)
502 test_op_one_side(sl
,sr
,val
);
503 test_op_one_side(sr
,sl
,-val
);
505 void segment_operator()
509 test_op("aa","aaa",-1);
510 test_op("aa","ab",-1);
516 std::cout
<< "Testing segment operators" << std::endl
;
518 std::cout
<< "Testing word boundary" << std::endl
;
520 std::cout
<< "Testing character boundary" << std::endl
;
521 test_boundaries(character
,nones
,0,lb::character
);
522 std::cout
<< "Testing sentence boundary" << std::endl
;
523 test_boundaries(sentence1
,sentence1a
,sentence1b
,lb::sentence
);
524 std::cout
<< "Testing line boundary" << std::endl
;
525 test_boundaries(line1
,line1a
,line1b
,lb::line
);
527 catch(std::exception
const &e
) {
528 std::cerr
<< "Failed " << e
.what() << std::endl
;
535 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
537 // boostinspect:noascii