]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/serialization/test/test_utf8_codecvt.cpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / serialization / test / test_utf8_codecvt.cpp
1 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2 // test_utf8_codecvt.cpp
3
4 // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5 // Use, modification and distribution is subject to the Boost Software
6 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8
9 #include <algorithm> // std::copy
10 #include <fstream>
11 #include <iostream>
12 #include <iterator>
13 #include <locale>
14 #include <vector>
15 #include <string>
16
17 #include <cstddef> // size_t
18 #include <cwchar>
19 #include <boost/config.hpp>
20
21 #include <boost/archive/detail/utf8_codecvt_facet.hpp>
22
23 #if defined(BOOST_NO_STDC_NAMESPACE)
24 namespace std{
25 using ::size_t;
26 using ::wcslen;
27 #if !defined(UNDER_CE) && !defined(__PGIC__)
28 using ::w_int;
29 #endif
30 } // namespace std
31 #endif
32
33 // Note: copied from boost/iostreams/char_traits.hpp
34 //
35 // Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
36 // the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
37 // Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
38 // NOTE: Use BOOST_WORKAROUND?
39 #if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \
40 || defined(__SUNPRO_CC)
41 using ::std::wint_t;
42 #endif
43
44 #include "test_tools.hpp"
45
46 template<std::size_t s>
47 struct test_data
48 {
49 static unsigned char utf8_encoding[];
50 static wchar_t wchar_encoding[];
51 };
52
53 template<>
54 unsigned char test_data<2>::utf8_encoding[] = {
55 0x01,
56 0x7f,
57 0xc2, 0x80,
58 0xdf, 0xbf,
59 0xe0, 0xa0, 0x80,
60 0xe7, 0xbf, 0xbf
61 };
62
63 template<>
64 wchar_t test_data<2>::wchar_encoding[] = {
65 0x0001,
66 0x007f,
67 0x0080,
68 0x07ff,
69 0x0800,
70 0x7fff
71 };
72
73 template<>
74 unsigned char test_data<4>::utf8_encoding[] = {
75 0x01,
76 0x7f,
77 0xc2, 0x80,
78 0xdf, 0xbf,
79 0xe0, 0xa0, 0x80,
80 0xef, 0xbf, 0xbf,
81 0xf0, 0x90, 0x80, 0x80,
82 0xf4, 0x8f, 0xbf, 0xbf,
83 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
84 * return eof accordlingly. So don't test the whole 32 range
85 */
86 /*
87 0xf7, 0xbf, 0xbf, 0xbf,
88 0xf8, 0x88, 0x80, 0x80, 0x80,
89 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
90 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
91 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
92 */
93 };
94
95 template<>
96 wchar_t test_data<4>::wchar_encoding[] = {
97 (wchar_t)0x00000001,
98 (wchar_t)0x0000007f,
99 (wchar_t)0x00000080,
100 (wchar_t)0x000007ff,
101 (wchar_t)0x00000800,
102 (wchar_t)0x0000ffff,
103 (wchar_t)0x00010000,
104 (wchar_t)0x0010ffff,
105 /* codecvt implementations for clang and gcc don't handle more than 21 bits and
106 * return eof accordlingly. So don't test the whole 32 range
107 */
108 /*
109 (wchar_t)0x001fffff,
110 (wchar_t)0x00200000,
111 (wchar_t)0x03ffffff,
112 (wchar_t)0x04000000,
113 (wchar_t)0x7fffffff
114 */
115 };
116
117 int
118 test_main(int /* argc */, char * /* argv */[]) {
119 std::locale utf8_locale
120 = std::locale(
121 std::locale::classic(),
122 new boost::archive::detail::utf8_codecvt_facet
123 );
124
125 typedef char utf8_t;
126 // define test data compatible with the wchar_t implementation
127 // as either ucs-2 or ucs-4 depending on the compiler/library.
128 typedef test_data<sizeof(wchar_t)> td;
129
130 // Send our test UTF-8 data to file
131 {
132 std::ofstream ofs;
133 ofs.open("test.dat");
134 std::copy(
135 td::utf8_encoding,
136 td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
137 std::ostream_iterator<utf8_t>(ofs)
138 );
139 }
140
141 // Read the test data back in, converting to UCS-4 on the way in
142 std::vector<wchar_t> from_file;
143 {
144 std::wifstream ifs;
145 ifs.imbue(utf8_locale);
146 ifs.open("test.dat");
147
148 std::wint_t item = 0;
149 // note can't use normal vector from iterator constructor because
150 // dinkumware doesn't have it.
151 for(;;){
152 item = ifs.get();
153 if(item == WEOF)
154 break;
155 //ifs >> item;
156 //if(ifs.eof())
157 // break;
158 from_file.push_back(item);
159 }
160 }
161
162 BOOST_CHECK(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
163
164 // Send the UCS4_data back out, converting to UTF-8
165 {
166 std::wofstream ofs;
167 ofs.imbue(utf8_locale);
168 ofs.open("test2.dat");
169 std::copy(
170 from_file.begin(),
171 from_file.end(),
172 std::ostream_iterator<wchar_t, wchar_t>(ofs)
173 );
174 }
175
176 // Make sure that both files are the same
177 {
178 typedef std::istream_iterator<utf8_t> is_iter;
179 is_iter end_iter;
180
181 std::ifstream ifs1("test.dat");
182 is_iter it1(ifs1);
183 std::vector<utf8_t> data1;
184 std::copy(it1, end_iter, std::back_inserter(data1));
185
186 std::ifstream ifs2("test2.dat");
187 is_iter it2(ifs2);
188 std::vector<utf8_t> data2;
189 std::copy(it2, end_iter, std::back_inserter(data2));
190
191 BOOST_CHECK(data1 == data2);
192 }
193
194 // some libraries have trouble that only shows up with longer strings
195
196 const wchar_t * test3_data = L"\
197 <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
198 <!DOCTYPE boost_serialization>\
199 <boost_serialization signature=\"serialization::archive\" version=\"3\">\
200 <a class_id=\"0\" tracking_level=\"0\">\
201 <b>1</b>\
202 <f>96953204</f>\
203 <g>177129195</g>\
204 <l>1</l>\
205 <m>5627</m>\
206 <n>23010</n>\
207 <o>7419</o>\
208 <p>16212</p>\
209 <q>4086</q>\
210 <r>2749</r>\
211 <c>-33</c>\
212 <s>124</s>\
213 <t>28</t>\
214 <u>32225</u>\
215 <v>17543</v>\
216 <w>0.84431422</w>\
217 <x>1.0170664757130923</x>\
218 <y>tjbx</y>\
219 <z>cuwjentqpkejp</z>\
220 </a>\
221 </boost_serialization>\
222 ";
223
224 // Send the UCS4_data back out, converting to UTF-8
225 std::size_t l = std::wcslen(test3_data);
226 {
227 std::wofstream ofs;
228 ofs.imbue(utf8_locale);
229 ofs.open("test3.dat");
230 std::copy(
231 test3_data,
232 test3_data + l,
233 std::ostream_iterator<wchar_t, wchar_t>(ofs)
234 );
235 }
236
237 // Make sure that both files are the same
238 {
239 std::wifstream ifs;
240 ifs.imbue(utf8_locale);
241 ifs.open("test3.dat");
242 ifs >> std::noskipws;
243 BOOST_CHECK(
244 std::equal(
245 test3_data,
246 test3_data + l,
247 std::istream_iterator<wchar_t, wchar_t>(ifs)
248 )
249 );
250 }
251 return EXIT_SUCCESS;
252 }