]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/nowide/test/test_codecvt.cpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / libs / nowide / test / test_codecvt.cpp
CommitLineData
f67539c2
TL
1//
2// Copyright (c) 2015 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0. (See
20effc67 5// accompanying file LICENSE or copy at
f67539c2
TL
6// http://www.boost.org/LICENSE_1_0.txt)
7//
8
9#include <boost/nowide/utf8_codecvt.hpp>
10
11#include <boost/nowide/convert.hpp>
1e59de90
TL
12#include "test.hpp"
13#include "test_sets.hpp"
f67539c2
TL
14#include <cstring>
15#include <iomanip>
16#include <iostream>
17#include <locale>
18#include <vector>
19
1e59de90
TL
20// MSVC has problems with an undefined symbol std::codecvt::id in some versions if the utf char types are used. See
21// https://social.msdn.microsoft.com/Forums/vstudio/en-US/8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error?forum=vcgeneral
22// Workaround: use int16_t instead of char16_t
23#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER <= 1916
24#define BOOST_NOWIDE_REQUIRE_UTF_CHAR_WORKAROUND 1
25#else
26#define BOOST_NOWIDE_REQUIRE_UTF_CHAR_WORKAROUND 0
27#endif
f67539c2
TL
28
29static const char* utf8_name =
30 "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt";
31static const std::wstring wide_name_str = boost::nowide::widen(utf8_name);
32static const wchar_t* wide_name = wide_name_str.c_str();
33
20effc67 34using cvt_type = std::codecvt<wchar_t, char, std::mbstate_t>;
f67539c2 35
1e59de90
TL
36#if BOOST_NOWIDE_REQUIRE_UTF_CHAR_WORKAROUND
37using utf16_char_t = int16_t;
38using utf32_char_t = int32_t;
39#else
40using utf16_char_t = char16_t;
41using utf32_char_t = char32_t;
42#endif
43
44BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
45using cvt_type16 = std::codecvt<utf16_char_t, char, std::mbstate_t>;
46using cvt_type32 = std::codecvt<utf32_char_t, char, std::mbstate_t>;
47using utf8_utf16_codecvt = boost::nowide::utf8_codecvt<utf16_char_t>;
48using utf8_utf32_codecvt = boost::nowide::utf8_codecvt<utf32_char_t>;
49BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
50
51void test_codecvt_basic()
52{
53 // UTF-16
54 {
55 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
56 std::locale l(std::locale::classic(), new utf8_utf16_codecvt());
57 const cvt_type16& cvt = std::use_facet<cvt_type16>(l);
58 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
59 TEST_EQ(cvt.encoding(), 0); // Characters have a variable width
60 TEST_EQ(cvt.max_length(), 4); // At most 4 UTF-8 code units are one internal char (one or two UTF-16 code units)
61 TEST(!cvt.always_noconv()); // Always convert
62 }
63 // UTF-32
64 {
65 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
66 std::locale l(std::locale::classic(), new utf8_utf32_codecvt());
67 const cvt_type32& cvt = std::use_facet<cvt_type32>(l);
68 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
69 TEST_EQ(cvt.encoding(), 0); // Characters have a variable width
70 TEST_EQ(cvt.max_length(), 4); // At most 4 UTF-8 code units are one internal char (one UTF-32 code unit)
71 TEST(!cvt.always_noconv()); // Always convert
72 }
73}
74
75void test_codecvt_unshift()
76{
77 char buf[256];
78 // UTF-16
79 {
80 const auto name16 =
81 boost::nowide::utf::convert_string<utf16_char_t>(utf8_name, utf8_name + std::strlen(utf8_name));
82
83 utf8_utf16_codecvt cvt16;
84 // Unshift on initial state does nothing
85 std::mbstate_t mb{};
86 char* to_next;
87 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
88 const cvt_type16& cvt = cvt16;
89 TEST_EQ(cvt.unshift(mb, buf, std::end(buf), to_next), cvt_type16::ok);
90 TEST(to_next == buf);
91 const utf16_char_t* from_next;
92 // Convert into a to small buffer
93 TEST_EQ(cvt.out(mb, &name16.front(), &name16.back(), from_next, buf, buf + 1, to_next), cvt_type16::partial);
94 TEST(from_next == &name16[1]);
95 TEST(to_next == buf);
96 // Unshift on non-default state is not possible
97 TEST_EQ(cvt.unshift(mb, buf, std::end(buf), to_next), cvt_type16::error);
98 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
99 }
100 // UTF-32
101 {
102 const auto name32 =
103 boost::nowide::utf::convert_string<utf32_char_t>(utf8_name, utf8_name + std::strlen(utf8_name));
104
105 utf8_utf32_codecvt cvt32;
106 // Unshift on initial state does nothing
107 std::mbstate_t mb{};
108 char* to_next;
109 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
110 const cvt_type32& cvt = cvt32;
111 TEST_EQ(cvt.unshift(mb, buf, std::end(buf), to_next), cvt_type32::noconv);
112 TEST(to_next == buf);
113 const utf32_char_t* from_next;
114 // Convert into a too small buffer
115 TEST_EQ(cvt.out(mb, &name32.front(), &name32.back(), from_next, buf, buf + 1, to_next), cvt_type32::partial);
116 TEST(from_next == &name32.front()); // Noting consumed
117 TEST(to_next == buf);
118 TEST(std::mbsinit(&mb) != 0); // State unchanged --> Unshift does nothing
119 TEST_EQ(cvt.unshift(mb, buf, std::end(buf), to_next), cvt_type32::noconv);
120 TEST(to_next == buf);
121 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
122 }
123}
124
f67539c2
TL
125void test_codecvt_in_n_m(const cvt_type& cvt, size_t n, size_t m)
126{
127 const wchar_t* wptr = wide_name;
128 size_t wlen = std::wcslen(wide_name);
129 size_t u8len = std::strlen(utf8_name);
130 const char* from = utf8_name;
1e59de90 131 const char* from_end = from;
f67539c2
TL
132 const char* real_end = utf8_name + u8len;
133 const char* from_next = from;
20effc67 134 std::mbstate_t mb{};
f67539c2
TL
135 while(from_next < real_end)
136 {
1e59de90 137 if(from == from_end)
f67539c2 138 {
1e59de90
TL
139 from_end = from + n;
140 if(from_end > real_end)
141 from_end = real_end;
f67539c2
TL
142 }
143
144 wchar_t buf[128];
145 wchar_t* to = buf;
146 wchar_t* to_end = to + m;
147 wchar_t* to_next = to;
148
149 std::mbstate_t mb2 = mb;
1e59de90 150 std::codecvt_base::result r = cvt.in(mb, from, from_end, from_next, to, to_end, to_next);
f67539c2 151
1e59de90
TL
152 int count = cvt.length(mb2, from, from_end, to_end - to);
153 TEST_EQ(std::memcmp(&mb, &mb2, sizeof(mb)), 0);
154 TEST_EQ(count, from_next - from);
f67539c2
TL
155
156 if(r == cvt_type::partial)
157 {
1e59de90
TL
158 from_end += n;
159 if(from_end > real_end)
160 from_end = real_end;
f67539c2 161 } else
1e59de90 162 TEST_EQ(r, cvt_type::ok);
f67539c2
TL
163 while(to != to_next)
164 {
165 TEST(*wptr == *to);
166 wptr++;
167 to++;
168 }
169 to = to_next;
170 from = from_next;
171 }
172 TEST(wptr == wide_name + wlen);
173 TEST(from == real_end);
174}
175
176void test_codecvt_out_n_m(const cvt_type& cvt, size_t n, size_t m)
177{
178 const char* nptr = utf8_name;
179 size_t wlen = std::wcslen(wide_name);
180 size_t u8len = std::strlen(utf8_name);
181
20effc67 182 std::mbstate_t mb{};
f67539c2
TL
183
184 const wchar_t* from_next = wide_name;
185 const wchar_t* real_from_end = wide_name + wlen;
186
187 char buf[256];
188 char* to = buf;
189 char* to_next = to;
190 char* to_end = to + n;
191 char* real_to_end = buf + sizeof(buf);
192
193 while(from_next < real_from_end)
194 {
195 const wchar_t* from = from_next;
196 const wchar_t* from_end = from + m;
197 if(from_end > real_from_end)
198 from_end = real_from_end;
199 if(to_end == to)
200 {
201 to_end = to + n;
202 }
203
204 std::codecvt_base::result r = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
205 if(r == cvt_type::partial)
206 {
207 // If those are equal, then "partial" probably means: Need more input
208 // Otherwise "Need more output"
209 if(from_next != from_end)
210 {
211 TEST(to_end - to_next < cvt.max_length());
212 to_end += n;
1e59de90 213 TEST(to_end <= real_to_end); // Should always be big enough
f67539c2
TL
214 }
215 } else
216 {
1e59de90 217 TEST_EQ(r, cvt_type::ok);
f67539c2
TL
218 }
219
220 while(to != to_next)
221 {
1e59de90 222 TEST_EQ(*nptr, *to);
f67539c2
TL
223 nptr++;
224 to++;
225 }
226 from = from_next;
227 }
228 TEST(nptr == utf8_name + u8len);
229 TEST(from_next == real_from_end);
1e59de90
TL
230 const auto expected = (sizeof(wchar_t) == 2) ? cvt_type::ok : cvt_type::noconv; // UTF-32 is not state-dependent
231 TEST_EQ(cvt.unshift(mb, to, to + n, to_next), expected);
f67539c2
TL
232 TEST(to_next == to);
233}
234
235void test_codecvt_conv()
236{
237 std::cout << "Conversions " << std::endl;
238 std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
239
240 const cvt_type& cvt = std::use_facet<cvt_type>(l);
241 const size_t utf8_len = std::strlen(utf8_name);
242 const size_t wide_len = std::wcslen(wide_name);
243
244 for(size_t i = 1; i <= utf8_len + 1; i++)
245 {
246 for(size_t j = 1; j <= wide_len + 1; j++)
247 {
248 try
249 {
250 test_codecvt_in_n_m(cvt, i, j);
251 test_codecvt_out_n_m(cvt, i, j);
1e59de90 252 } catch(...) // LCOV_EXCL_LINE
f67539c2 253 {
1e59de90
TL
254 std::cerr << "Wlen=" << j << " Nlen=" << i << std::endl; // LCOV_EXCL_LINE
255 throw; // LCOV_EXCL_LINE
f67539c2
TL
256 }
257 }
258 }
259}
260
261void test_codecvt_err()
262{
263 std::cout << "Errors " << std::endl;
264 std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
265
266 const cvt_type& cvt = std::use_facet<cvt_type>(l);
267
268 std::cout << "- UTF-8" << std::endl;
269 {
270 {
271 wchar_t buf[4];
272 wchar_t* const to = buf;
273 wchar_t* const to_end = buf + 4;
274 const char* err_utf = "1\xFF\xFF\xd7\xa9";
20effc67 275 std::mbstate_t mb{};
f67539c2
TL
276 const char* from = err_utf;
277 const char* from_end = from + std::strlen(from);
278 const char* from_next = from;
279 wchar_t* to_next = to;
1e59de90 280 TEST_EQ(cvt.in(mb, from, from_end, from_next, to, to_end, to_next), cvt_type::ok);
f67539c2
TL
281 TEST(from_next == from + 5);
282 TEST(to_next == to + 4);
283 TEST(std::wstring(to, to_end) == boost::nowide::widen(err_utf));
284 }
285 {
286 wchar_t buf[4];
287 wchar_t* const to = buf;
288 wchar_t* const to_end = buf + 4;
289 const char* err_utf = "1\xd7"; // 1 valid, 1 incomplete UTF-8 char
20effc67 290 std::mbstate_t mb{};
f67539c2
TL
291 const char* from = err_utf;
292 const char* from_end = from + std::strlen(from);
293 const char* from_next = from;
294 wchar_t* to_next = to;
1e59de90 295 TEST_EQ(cvt.in(mb, from, from_end, from_next, to, to_end, to_next), cvt_type::partial);
f67539c2
TL
296 TEST(from_next == from + 1);
297 TEST(to_next == to + 1);
298 TEST(std::wstring(to, to_next) == std::wstring(L"1"));
299 }
300 {
301 char buf[4] = {};
302 char* const to = buf;
303 char* const to_end = buf + 4;
304 char* to_next = to;
305 const wchar_t* err_utf = L"\xD800"; // Trailing UTF-16 surrogate
20effc67 306 std::mbstate_t mb{};
f67539c2
TL
307 const wchar_t* from = err_utf;
308 const wchar_t* from_end = from + 1;
309 const wchar_t* from_next = from;
310 cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
311#ifdef BOOST_MSVC
312#pragma warning(disable : 4127) // Constant expression detected
313#endif
314 if(sizeof(wchar_t) == 2)
315 {
1e59de90 316 TEST_EQ(res, cvt_type::partial);
f67539c2
TL
317 TEST(from_next == from_end);
318 TEST(to_next == to);
1e59de90 319 TEST_EQ(buf[0], 0);
f67539c2
TL
320 } else
321 {
1e59de90 322 TEST_EQ(res, cvt_type::ok);
f67539c2
TL
323 TEST(from_next == from_end);
324 TEST(to_next == to + 3);
325 // surrogate is invalid
1e59de90 326 TEST_EQ(std::string(to, to_next), boost::nowide::narrow(wreplacement_str));
f67539c2
TL
327 }
328 }
329 }
330
331 std::cout << "- UTF-16/32" << std::endl;
332 {
333 char buf[32];
334 char* to = buf;
335 char* to_end = buf + 32;
336 char* to_next = to;
337 wchar_t err_buf[3] = {'1', 0xDC9E, 0}; // second surrogate not works both for UTF-16 and 32
338 const wchar_t* err_utf = err_buf;
339 {
20effc67 340 std::mbstate_t mb{};
f67539c2
TL
341 const wchar_t* from = err_utf;
342 const wchar_t* from_end = from + std::wcslen(from);
343 const wchar_t* from_next = from;
1e59de90 344 TEST_EQ(cvt.out(mb, from, from_end, from_next, to, to_end, to_next), cvt_type::ok);
f67539c2
TL
345 TEST(from_next == from + 2);
346 TEST(to_next == to + 4);
1e59de90 347 TEST_EQ(std::string(to, to_next), "1" + boost::nowide::narrow(wreplacement_str));
f67539c2
TL
348 }
349 }
350}
351
352std::wstring codecvt_to_wide(const std::string& s)
353{
354 std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
355
356 const cvt_type& cvt = std::use_facet<cvt_type>(l);
357
20effc67 358 std::mbstate_t mb{};
f67539c2
TL
359 const char* const from = s.c_str();
360 const char* const from_end = from + s.size();
361 const char* from_next = from;
362
363 std::vector<wchar_t> buf(s.size() + 2); // +1 for possible incomplete char, +1 for NULL
364 wchar_t* const to = &buf[0];
365 wchar_t* const to_end = to + buf.size();
366 wchar_t* to_next = to;
367
1e59de90 368 const auto expected_consumed = cvt.length(mb, from, from_end, buf.size());
f67539c2 369 cvt_type::result res = cvt.in(mb, from, from_end, from_next, to, to_end, to_next);
1e59de90 370 TEST_EQ(expected_consumed, from_next - from);
f67539c2
TL
371 if(res == cvt_type::partial)
372 {
373 TEST(to_next < to_end);
374 *(to_next++) = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
375 } else
1e59de90 376 TEST_EQ(res, cvt_type::ok);
f67539c2
TL
377
378 return std::wstring(to, to_next);
379}
380
381std::string codecvt_to_narrow(const std::wstring& s)
382{
383 std::locale l(std::locale::classic(), new boost::nowide::utf8_codecvt<wchar_t>());
384
385 const cvt_type& cvt = std::use_facet<cvt_type>(l);
386
20effc67 387 std::mbstate_t mb{};
f67539c2
TL
388 const wchar_t* const from = s.c_str();
389 const wchar_t* const from_end = from + s.size();
390 const wchar_t* from_next = from;
391
392 std::vector<char> buf((s.size() + 1) * 4 + 1); // +1 for possible incomplete char, +1 for NULL
393 char* const to = &buf[0];
394 char* const to_end = to + buf.size();
395 char* to_next = to;
396
397 cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
398 if(res == cvt_type::partial)
399 {
400 TEST(to_next < to_end);
401 return std::string(to, to_next) + boost::nowide::narrow(wreplacement_str);
402 } else
1e59de90 403 TEST_EQ(res, cvt_type::ok);
f67539c2
TL
404
405 return std::string(to, to_next);
406}
407
408void test_codecvt_subst()
409{
410 std::cout << "Substitutions " << std::endl;
411 run_all(codecvt_to_wide, codecvt_to_narrow);
412}
413
1e59de90 414// coverity [root_function]
f67539c2
TL
415void test_main(int, char**, char**)
416{
1e59de90
TL
417 test_codecvt_basic();
418 test_codecvt_unshift();
f67539c2
TL
419 test_codecvt_conv();
420 test_codecvt_err();
421 test_codecvt_subst();
422}