]>
git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/detail/test/test_utf8_codecvt.cpp
1 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2 // test_utf8_codecvt.cpp
4 // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5 // Use, modification and distribution is subject to the Boost Software
6 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
9 #include <algorithm> // std::copy
17 #include <cstddef> // size_t
19 #include <boost/config.hpp>
20 #include <boost/core/no_exceptions_support.hpp>
22 #define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail {
23 #define BOOST_UTF8_END_NAMESPACE } }
24 #include <boost/detail/utf8_codecvt_facet.hpp>
25 #include <boost/detail/utf8_codecvt_facet.ipp>
27 #if defined(BOOST_NO_STDC_NAMESPACE)
31 #if !defined(UNDER_CE) && !defined(__PGIC__)
37 // Note: copied from boost/iostreams/char_traits.hpp
39 // Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
40 // the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
41 // Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
42 // NOTE: Use BOOST_WORKAROUND?
43 #if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \
44 || defined(__SUNPRO_CC)
48 #include <boost/core/lightweight_test.hpp>
50 template<std::size_t s
>
53 static unsigned char utf8_encoding
[];
54 static wchar_t wchar_encoding
[];
58 unsigned char test_data
<2>::utf8_encoding
[] = {
68 wchar_t test_data
<2>::wchar_encoding
[] = {
78 unsigned char test_data
<4>::utf8_encoding
[] = {
85 0xf0, 0x90, 0x80, 0x80,
86 0xf4, 0x8f, 0xbf, 0xbf,
87 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
88 * return eof accordlingly. So don't test the whole 32 range
91 0xf7, 0xbf, 0xbf, 0xbf,
92 0xf8, 0x88, 0x80, 0x80, 0x80,
93 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
94 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
95 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
100 wchar_t test_data
<4>::wchar_encoding
[] = {
109 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
110 * return eof accordlingly. So don't test the whole 32 range
122 test_main(int /* argc */, char * /* argv */[]) {
123 std::locale utf8_locale
125 std::locale::classic(),
126 new boost::detail::utf8_codecvt_facet
130 // define test data compatible with the wchar_t implementation
131 // as either ucs-2 or ucs-4 depending on the compiler/library.
132 typedef test_data
<sizeof(wchar_t)> td
;
134 // Send our test UTF-8 data to file
137 ofs
.open("test.dat");
140 td::utf8_encoding
+ sizeof(td::utf8_encoding
) / sizeof(unsigned char),
141 std::ostream_iterator
<utf8_t
>(ofs
)
145 // Read the test data back in, converting to UCS-4 on the way in
146 std::vector
<wchar_t> from_file
;
149 ifs
.imbue(utf8_locale
);
150 ifs
.open("test.dat");
152 std::wint_t item
= 0;
153 // note can't use normal vector from iterator constructor because
154 // dinkumware doesn't have it.
162 from_file
.push_back(item
);
166 BOOST_TEST(std::equal(from_file
.begin(), from_file
.end(), td::wchar_encoding
));
168 // Send the UCS4_data back out, converting to UTF-8
171 ofs
.imbue(utf8_locale
);
172 ofs
.open("test2.dat");
176 std::ostream_iterator
<wchar_t, wchar_t>(ofs
)
180 // Make sure that both files are the same
182 typedef std::istream_iterator
<utf8_t
> is_iter
;
185 std::ifstream
ifs1("test.dat");
187 std::vector
<utf8_t
> data1
;
188 std::copy(it1
, end_iter
, std::back_inserter(data1
));
190 std::ifstream
ifs2("test2.dat");
192 std::vector
<utf8_t
> data2
;
193 std::copy(it2
, end_iter
, std::back_inserter(data2
));
195 BOOST_TEST(data1
== data2
);
198 // some libraries have trouble that only shows up with longer strings
200 const wchar_t * test3_data
= L
"\
201 <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
202 <!DOCTYPE boost_serialization>\
203 <boost_serialization signature=\"serialization::archive\" version=\"3\">\
204 <a class_id=\"0\" tracking_level=\"0\">\
221 <x>1.0170664757130923</x>\
223 <z>cuwjentqpkejp</z>\
225 </boost_serialization>\
228 // Send the UCS4_data back out, converting to UTF-8
229 std::size_t l
= std::wcslen(test3_data
);
232 ofs
.imbue(utf8_locale
);
233 ofs
.open("test3.dat");
237 std::ostream_iterator
<wchar_t, wchar_t>(ofs
)
241 // Make sure that both files are the same
244 ifs
.imbue(utf8_locale
);
245 ifs
.open("test3.dat");
246 ifs
>> std::noskipws
;
251 std::istream_iterator
<wchar_t, wchar_t>(ifs
)
256 // Test length calculation
258 std::codecvt
<wchar_t, char, std::mbstate_t> const& fac
= std::use_facet
< std::codecvt
<wchar_t, char, std::mbstate_t> >(utf8_locale
);
259 std::mbstate_t mbs
= std::mbstate_t();
260 const int utf8_len
= sizeof(td::utf8_encoding
) / sizeof(*td::utf8_encoding
);
261 int res
= fac
.length(mbs
, reinterpret_cast< const char* >(td::utf8_encoding
), reinterpret_cast< const char* >(td::utf8_encoding
+ utf8_len
), ~static_cast< std::size_t >(0u));
262 BOOST_TEST_EQ(utf8_len
, res
);
265 // Test that length calculation detects character boundaries
267 std::codecvt
<wchar_t, char, std::mbstate_t> const& fac
= std::use_facet
< std::codecvt
<wchar_t, char, std::mbstate_t> >(utf8_locale
);
268 std::mbstate_t mbs
= std::mbstate_t();
269 // The first 5 bytes of utf8_encoding contain 3 complete UTF-8 characters (taking 4 bytes in total) and 1 byte of an incomplete character.
270 // This last byte should not be accounted by length().
271 const int input_len
= 5;
272 const int utf8_len
= 4;
273 int res
= fac
.length(mbs
, reinterpret_cast< const char* >(td::utf8_encoding
), reinterpret_cast< const char* >(td::utf8_encoding
+ input_len
), ~static_cast< std::size_t >(0u));
274 BOOST_TEST_EQ(utf8_len
, res
);
281 main(int argc
, char * argv
[]){
285 retval
= test_main(argc
, argv
);
287 #ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE
288 BOOST_CATCH(const std::exception
& e
){
289 BOOST_ERROR(e
.what());
293 BOOST_ERROR("failed with uncaught exception:");
297 int error_count
= boost::report_errors();
299 retval
= error_count
;