]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/detail/test/test_utf8_codecvt.cpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / libs / detail / test / test_utf8_codecvt.cpp
1 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2 // test_utf8_codecvt.cpp
3
4 // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5 // Use, modification and distribution is subject to the Boost Software
6 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8
9 #include <algorithm> // std::copy
10 #include <fstream>
11 #include <iostream>
12 #include <iterator>
13 #include <locale>
14 #include <vector>
15 #include <string>
16
17 #include <cstddef> // size_t
18 #include <cwchar>
19 #include <boost/config.hpp>
20 #include <boost/core/no_exceptions_support.hpp>
21
22 #define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail {
23 #define BOOST_UTF8_END_NAMESPACE } }
24 #include <boost/detail/utf8_codecvt_facet.hpp>
25 #include <boost/detail/utf8_codecvt_facet.ipp>
26
27 #if defined(BOOST_NO_STDC_NAMESPACE)
28 namespace std{
29 using ::size_t;
30 using ::wcslen;
31 #if !defined(UNDER_CE) && !defined(__PGIC__)
32 using ::w_int;
33 #endif
34 } // namespace std
35 #endif
36
37 // Note: copied from boost/iostreams/char_traits.hpp
38 //
39 // Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
40 // the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
41 // Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
42 // NOTE: Use BOOST_WORKAROUND?
43 #if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \
44 || defined(__SUNPRO_CC)
45 using ::std::wint_t;
46 #endif
47
48 #include <boost/core/lightweight_test.hpp>
49
50 template<std::size_t s>
51 struct test_data
52 {
53 static unsigned char utf8_encoding[];
54 static wchar_t wchar_encoding[];
55 };
56
57 template<>
58 unsigned char test_data<2>::utf8_encoding[] = {
59 0x01,
60 0x7f,
61 0xc2, 0x80,
62 0xdf, 0xbf,
63 0xe0, 0xa0, 0x80,
64 0xe7, 0xbf, 0xbf
65 };
66
67 template<>
68 wchar_t test_data<2>::wchar_encoding[] = {
69 0x0001,
70 0x007f,
71 0x0080,
72 0x07ff,
73 0x0800,
74 0x7fff
75 };
76
77 template<>
78 unsigned char test_data<4>::utf8_encoding[] = {
79 0x01,
80 0x7f,
81 0xc2, 0x80,
82 0xdf, 0xbf,
83 0xe0, 0xa0, 0x80,
84 0xef, 0xbf, 0xbf,
85 0xf0, 0x90, 0x80, 0x80,
86 0xf4, 0x8f, 0xbf, 0xbf,
87 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
88 * return eof accordlingly. So don't test the whole 32 range
89 */
90 /*
91 0xf7, 0xbf, 0xbf, 0xbf,
92 0xf8, 0x88, 0x80, 0x80, 0x80,
93 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
94 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
95 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
96 */
97 };
98
99 template<>
100 wchar_t test_data<4>::wchar_encoding[] = {
101 (wchar_t)0x00000001,
102 (wchar_t)0x0000007f,
103 (wchar_t)0x00000080,
104 (wchar_t)0x000007ff,
105 (wchar_t)0x00000800,
106 (wchar_t)0x0000ffff,
107 (wchar_t)0x00010000,
108 (wchar_t)0x0010ffff,
109 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
110 * return eof accordlingly. So don't test the whole 32 range
111 */
112 /*
113 (wchar_t)0x001fffff,
114 (wchar_t)0x00200000,
115 (wchar_t)0x03ffffff,
116 (wchar_t)0x04000000,
117 (wchar_t)0x7fffffff
118 */
119 };
120
121 int
122 test_main(int /* argc */, char * /* argv */[]) {
123 std::locale utf8_locale
124 = std::locale(
125 std::locale::classic(),
126 new boost::detail::utf8_codecvt_facet
127 );
128
129 typedef char utf8_t;
130 // define test data compatible with the wchar_t implementation
131 // as either ucs-2 or ucs-4 depending on the compiler/library.
132 typedef test_data<sizeof(wchar_t)> td;
133
134 // Send our test UTF-8 data to file
135 {
136 std::ofstream ofs;
137 ofs.open("test.dat");
138 std::copy(
139 td::utf8_encoding,
140 td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
141 std::ostream_iterator<utf8_t>(ofs)
142 );
143 }
144
145 // Read the test data back in, converting to UCS-4 on the way in
146 std::vector<wchar_t> from_file;
147 {
148 std::wifstream ifs;
149 ifs.imbue(utf8_locale);
150 ifs.open("test.dat");
151
152 std::wint_t item = 0;
153 // note can't use normal vector from iterator constructor because
154 // dinkumware doesn't have it.
155 for(;;){
156 item = ifs.get();
157 if(item == WEOF)
158 break;
159 //ifs >> item;
160 //if(ifs.eof())
161 // break;
162 from_file.push_back(item);
163 }
164 }
165
166 BOOST_TEST(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
167
168 // Send the UCS4_data back out, converting to UTF-8
169 {
170 std::wofstream ofs;
171 ofs.imbue(utf8_locale);
172 ofs.open("test2.dat");
173 std::copy(
174 from_file.begin(),
175 from_file.end(),
176 std::ostream_iterator<wchar_t, wchar_t>(ofs)
177 );
178 }
179
180 // Make sure that both files are the same
181 {
182 typedef std::istream_iterator<utf8_t> is_iter;
183 is_iter end_iter;
184
185 std::ifstream ifs1("test.dat");
186 is_iter it1(ifs1);
187 std::vector<utf8_t> data1;
188 std::copy(it1, end_iter, std::back_inserter(data1));
189
190 std::ifstream ifs2("test2.dat");
191 is_iter it2(ifs2);
192 std::vector<utf8_t> data2;
193 std::copy(it2, end_iter, std::back_inserter(data2));
194
195 BOOST_TEST(data1 == data2);
196 }
197
198 // some libraries have trouble that only shows up with longer strings
199
200 const wchar_t * test3_data = L"\
201 <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
202 <!DOCTYPE boost_serialization>\
203 <boost_serialization signature=\"serialization::archive\" version=\"3\">\
204 <a class_id=\"0\" tracking_level=\"0\">\
205 <b>1</b>\
206 <f>96953204</f>\
207 <g>177129195</g>\
208 <l>1</l>\
209 <m>5627</m>\
210 <n>23010</n>\
211 <o>7419</o>\
212 <p>16212</p>\
213 <q>4086</q>\
214 <r>2749</r>\
215 <c>-33</c>\
216 <s>124</s>\
217 <t>28</t>\
218 <u>32225</u>\
219 <v>17543</v>\
220 <w>0.84431422</w>\
221 <x>1.0170664757130923</x>\
222 <y>tjbx</y>\
223 <z>cuwjentqpkejp</z>\
224 </a>\
225 </boost_serialization>\
226 ";
227
228 // Send the UCS4_data back out, converting to UTF-8
229 std::size_t l = std::wcslen(test3_data);
230 {
231 std::wofstream ofs;
232 ofs.imbue(utf8_locale);
233 ofs.open("test3.dat");
234 std::copy(
235 test3_data,
236 test3_data + l,
237 std::ostream_iterator<wchar_t, wchar_t>(ofs)
238 );
239 }
240
241 // Make sure that both files are the same
242 {
243 std::wifstream ifs;
244 ifs.imbue(utf8_locale);
245 ifs.open("test3.dat");
246 ifs >> std::noskipws;
247 BOOST_TEST(
248 std::equal(
249 test3_data,
250 test3_data + l,
251 std::istream_iterator<wchar_t, wchar_t>(ifs)
252 )
253 );
254 }
255
256 // Test length calculation
257 {
258 std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
259 std::mbstate_t mbs = std::mbstate_t();
260 const int utf8_len = sizeof(td::utf8_encoding) / sizeof(*td::utf8_encoding);
261 int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + utf8_len), ~static_cast< std::size_t >(0u));
262 BOOST_TEST_EQ(utf8_len, res);
263 }
264
265 // Test that length calculation detects character boundaries
266 {
267 std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
268 std::mbstate_t mbs = std::mbstate_t();
269 // The first 5 bytes of utf8_encoding contain 3 complete UTF-8 characters (taking 4 bytes in total) and 1 byte of an incomplete character.
270 // This last byte should not be accounted by length().
271 const int input_len = 5;
272 const int utf8_len = 4;
273 int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + input_len), ~static_cast< std::size_t >(0u));
274 BOOST_TEST_EQ(utf8_len, res);
275 }
276
277 return EXIT_SUCCESS;
278 }
279
280 int
281 main(int argc, char * argv[]){
282
283 int retval = 1;
284 BOOST_TRY{
285 retval = test_main(argc, argv);
286 }
287 #ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE
288 BOOST_CATCH(const std::exception & e){
289 BOOST_ERROR(e.what());
290 }
291 #endif
292 BOOST_CATCH(...){
293 BOOST_ERROR("failed with uncaught exception:");
294 }
295 BOOST_CATCH_END
296
297 int error_count = boost::report_errors();
298 if(error_count > 0)
299 retval = error_count;
300 return retval;
301 }
302