]>
git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/detail/test/test_utf8_codecvt.cpp
1 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2 // test_utf8_codecvt.cpp
4 // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5 // Use, modification and distribution is subject to the Boost Software
6 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
9 #include <algorithm> // std::copy
17 #include <cstddef> // size_t
19 #include <boost/config.hpp>
21 #define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail {
22 #define BOOST_UTF8_END_NAMESPACE } }
23 #include <boost/detail/utf8_codecvt_facet.hpp>
24 #include <boost/detail/utf8_codecvt_facet.ipp>
26 #if defined(BOOST_NO_STDC_NAMESPACE)
30 #if !defined(UNDER_CE) && !defined(__PGIC__)
36 // Note: copied from boost/iostreams/char_traits.hpp
38 // Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
39 // the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
40 // Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
41 // NOTE: Use BOOST_WORKAROUND?
42 #if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \
43 || defined(__SUNPRO_CC)
47 #include <boost/core/lightweight_test.hpp>
49 template<std::size_t s
>
52 static unsigned char utf8_encoding
[];
53 static wchar_t wchar_encoding
[];
57 unsigned char test_data
<2>::utf8_encoding
[] = {
67 wchar_t test_data
<2>::wchar_encoding
[] = {
77 unsigned char test_data
<4>::utf8_encoding
[] = {
84 0xf0, 0x90, 0x80, 0x80,
85 0xf4, 0x8f, 0xbf, 0xbf,
86 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
87 * return eof accordlingly. So don't test the whole 32 range
90 0xf7, 0xbf, 0xbf, 0xbf,
91 0xf8, 0x88, 0x80, 0x80, 0x80,
92 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
93 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
94 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
99 wchar_t test_data
<4>::wchar_encoding
[] = {
108 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
109 * return eof accordlingly. So don't test the whole 32 range
121 test_main(int /* argc */, char * /* argv */[]) {
122 std::locale utf8_locale
124 std::locale::classic(),
125 new boost::detail::utf8_codecvt_facet
129 // define test data compatible with the wchar_t implementation
130 // as either ucs-2 or ucs-4 depending on the compiler/library.
131 typedef test_data
<sizeof(wchar_t)> td
;
133 // Send our test UTF-8 data to file
136 ofs
.open("test.dat");
139 td::utf8_encoding
+ sizeof(td::utf8_encoding
) / sizeof(unsigned char),
140 std::ostream_iterator
<utf8_t
>(ofs
)
144 // Read the test data back in, converting to UCS-4 on the way in
145 std::vector
<wchar_t> from_file
;
148 ifs
.imbue(utf8_locale
);
149 ifs
.open("test.dat");
151 std::wint_t item
= 0;
152 // note can't use normal vector from iterator constructor because
153 // dinkumware doesn't have it.
161 from_file
.push_back(item
);
165 BOOST_TEST(std::equal(from_file
.begin(), from_file
.end(), td::wchar_encoding
));
167 // Send the UCS4_data back out, converting to UTF-8
170 ofs
.imbue(utf8_locale
);
171 ofs
.open("test2.dat");
175 std::ostream_iterator
<wchar_t, wchar_t>(ofs
)
179 // Make sure that both files are the same
181 typedef std::istream_iterator
<utf8_t
> is_iter
;
184 std::ifstream
ifs1("test.dat");
186 std::vector
<utf8_t
> data1
;
187 std::copy(it1
, end_iter
, std::back_inserter(data1
));
189 std::ifstream
ifs2("test2.dat");
191 std::vector
<utf8_t
> data2
;
192 std::copy(it2
, end_iter
, std::back_inserter(data2
));
194 BOOST_TEST(data1
== data2
);
197 // some libraries have trouble that only shows up with longer strings
199 const wchar_t * test3_data
= L
"\
200 <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
201 <!DOCTYPE boost_serialization>\
202 <boost_serialization signature=\"serialization::archive\" version=\"3\">\
203 <a class_id=\"0\" tracking_level=\"0\">\
220 <x>1.0170664757130923</x>\
222 <z>cuwjentqpkejp</z>\
224 </boost_serialization>\
227 // Send the UCS4_data back out, converting to UTF-8
228 std::size_t l
= std::wcslen(test3_data
);
231 ofs
.imbue(utf8_locale
);
232 ofs
.open("test3.dat");
236 std::ostream_iterator
<wchar_t, wchar_t>(ofs
)
240 // Make sure that both files are the same
243 ifs
.imbue(utf8_locale
);
244 ifs
.open("test3.dat");
245 ifs
>> std::noskipws
;
250 std::istream_iterator
<wchar_t, wchar_t>(ifs
)
255 // Test length calculation
257 std::codecvt
<wchar_t, char, std::mbstate_t> const& fac
= std::use_facet
< std::codecvt
<wchar_t, char, std::mbstate_t> >(utf8_locale
);
258 std::mbstate_t mbs
= std::mbstate_t();
259 const int utf8_len
= sizeof(td::utf8_encoding
) / sizeof(*td::utf8_encoding
);
260 int res
= fac
.length(mbs
, reinterpret_cast< const char* >(td::utf8_encoding
), reinterpret_cast< const char* >(td::utf8_encoding
+ utf8_len
), ~static_cast< std::size_t >(0u));
261 BOOST_TEST_EQ(utf8_len
, res
);
264 // Test that length calculation detects character boundaries
266 std::codecvt
<wchar_t, char, std::mbstate_t> const& fac
= std::use_facet
< std::codecvt
<wchar_t, char, std::mbstate_t> >(utf8_locale
);
267 std::mbstate_t mbs
= std::mbstate_t();
268 // The first 5 bytes of utf8_encoding contain 3 complete UTF-8 characters (taking 4 bytes in total) and 1 byte of an incomplete character.
269 // This last byte should not be accounted by length().
270 const int input_len
= 5;
271 const int utf8_len
= 4;
272 int res
= fac
.length(mbs
, reinterpret_cast< const char* >(td::utf8_encoding
), reinterpret_cast< const char* >(td::utf8_encoding
+ input_len
), ~static_cast< std::size_t >(0u));
273 BOOST_TEST_EQ(utf8_len
, res
);
280 main(int argc
, char * argv
[]){
284 retval
= test_main(argc
, argv
);
286 #ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE
287 BOOST_CATCH(const std::exception
& e
){
288 BOOST_ERROR(e
.what());
292 BOOST_ERROR("failed with uncaught exception:");
296 int error_count
= boost::report_errors();
298 retval
= error_count
;