]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // |
2 | // Copyright (c) 2012 Artyom Beilis (Tonkikh) | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See | |
20effc67 | 5 | // accompanying file LICENSE or copy at |
f67539c2 TL |
6 | // http://www.boost.org/LICENSE_1_0.txt) |
7 | // | |
8 | #ifndef BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED | |
9 | #define BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED | |
10 | ||
11 | #include <boost/nowide/config.hpp> | |
12 | #include <iostream> | |
13 | #include <string> | |
14 | ||
15 | struct utf8_to_wide | |
16 | { | |
17 | const char* utf8; | |
18 | const wchar_t* wide; | |
19 | }; | |
20 | ||
21 | struct wide_to_utf8 | |
22 | { | |
23 | const wchar_t* wide; | |
24 | const char* utf8; | |
25 | }; | |
26 | ||
27 | #if defined(BOOST_MSVC) && BOOST_MSVC < 1700 | |
28 | #pragma warning(disable : 4428) // universal-character-name encountered in source | |
29 | #endif | |
30 | ||
31 | const std::wstring wreplacement_str(1, wchar_t(BOOST_NOWIDE_REPLACEMENT_CHARACTER)); | |
32 | ||
33 | // clang-format off | |
34 | const utf8_to_wide roundtrip_tests[] = { | |
35 | {"", L""}, | |
1e59de90 TL |
36 | // Ascii |
37 | {"a", L"a"}, | |
38 | // 2 Octet | |
39 | {"\xc3\xb1", L"\u00F1"}, | |
40 | // 3 Octet | |
41 | {"\xe2\x82\xa1", L"\u20A1"}, | |
42 | // 4 Octet | |
43 | {"\xf0\x90\x8c\xbc", L"\U0001033C"}, | |
44 | // Last valid codepoint | |
45 | {"\xf4\x8f\xbf\xbf", L"\U0010FFFF"}, | |
46 | // Misc | |
f67539c2 TL |
47 | {"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt", |
48 | L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt"}, | |
49 | {"\xd7\xa9-\xd0\xbc-\xce\xbd.txt", | |
50 | L"\u05e9-\u043c-\u03bd.txt"}, | |
51 | {"\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d", | |
52 | L"\u05e9\u05dc\u05d5\u05dd"}, | |
53 | }; | |
54 | ||
55 | const utf8_to_wide invalid_utf8_tests[] = { | |
1e59de90 TL |
56 | // 2 Octet |
57 | {"\xc3\x28", L"\ufffd"}, | |
58 | {"\xa0\xa1", L"\ufffd\ufffd"}, | |
59 | // 3 Octet | |
60 | {"\xe2\x28\xa1", L"\ufffd\ufffd"}, | |
61 | {"\xe2\x82\x28", L"\ufffd"}, | |
62 | // 4 Octet | |
63 | {"\xf0\x28\x8c\xbc", L"\ufffd\ufffd\ufffd"}, | |
64 | {"\xf0\x90\x28\xbc", L"\ufffd\ufffd"}, | |
65 | {"\xf0\x90\x8c\x28", L"\ufffd"}, | |
66 | // 5 and 6 byte possible but invalid UTF | |
67 | {"\xf8\xa1\xa1\xa1\xa1", L"\ufffd\ufffd\ufffd\ufffd\ufffd"}, | |
68 | {"\xfc\xa1\xa1\xa1\xa1\xa1", L"\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"}, | |
69 | // First invalid codepoint | |
70 | {"\xf4\x90\x80\x80", L"\ufffd\ufffd\ufffd\ufffd"}, | |
71 | // Overlong ascii (0x2F), | |
72 | {"\xc0\xaf", L"\ufffd\ufffd"}, | |
73 | {"\xe0\x80\xaf", L"\ufffd\ufffd\ufffd"}, | |
74 | {"\xf0\x80\x80\xaf", L"\ufffd\ufffd\ufffd\ufffd"}, | |
75 | {"\xf8\x80\x80\x80\xaf", L"\ufffd\ufffd\ufffd\ufffd\ufffd"}, | |
76 | {"\xfc\x80\x80\x80\x80\xaf", L"\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"}, | |
77 | // Misc | |
f67539c2 TL |
78 | {"\xFF\xFF", L"\ufffd\ufffd"}, |
79 | {"\xd7\xa9\xFF", L"\u05e9\ufffd"}, | |
80 | {"\xd7", L"\ufffd"}, | |
81 | {"\xFF\xd7\xa9", L"\ufffd\u05e9"}, | |
82 | {"\xFF\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82", L"\uFFFD\u043F\u0440\u0438\u0432\u0435\u0442"}, | |
83 | {"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\xFF", L"\u043F\u0440\u0438\u0432\u0435\u0442\uFFFD"}, | |
84 | {"\xE3\x82\xFF\xE3\x81\x82", L"\ufffd\u3042"}, | |
85 | {"\xE3\xFF\x84\xE3\x81\x82", L"\ufffd\ufffd\u3042"}, | |
86 | }; | |
87 | ||
88 | const wide_to_utf8 invalid_wide_tests[] = { | |
89 | {L"\xDC01\x05e9", "\xEF\xBF\xBD\xd7\xa9"}, | |
90 | {L"\x05e9\xD800", "\xd7\xa9\xEF\xBF\xBD"}, | |
91 | {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", | |
92 | "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, | |
93 | {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", | |
94 | "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, | |
95 | }; | |
96 | ||
97 | ||
98 | const wide_to_utf8 invalid_utf16_tests[] = { | |
99 | {L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", | |
100 | "\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, | |
101 | {L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", | |
102 | "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, | |
103 | }; | |
104 | ||
105 | const wide_to_utf8 invalid_utf32_tests[] = { | |
106 | {L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", | |
107 | "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, | |
108 | {L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042", | |
109 | "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}, | |
110 | }; | |
111 | ||
112 | // clang-format on | |
113 | ||
114 | #ifdef BOOST_MSVC | |
115 | #pragma warning(push) | |
116 | #pragma warning(disable : 4127) // Constant expression detected | |
117 | #endif | |
118 | ||
119 | template<typename T, size_t N> | |
1e59de90 | 120 | constexpr size_t array_size(const T (&)[N]) |
f67539c2 TL |
121 | { |
122 | return N; | |
123 | } | |
124 | ||
125 | void run_all(std::wstring (*to_wide)(const std::string&), std::string (*to_narrow)(const std::wstring&)) | |
126 | { | |
127 | for(size_t i = 0; i < array_size(roundtrip_tests); i++) | |
128 | { | |
129 | std::cout << " Roundtrip " << i << std::endl; | |
130 | TEST(roundtrip_tests[i].utf8 == to_narrow(roundtrip_tests[i].wide)); | |
131 | TEST(to_wide(roundtrip_tests[i].utf8) == roundtrip_tests[i].wide); | |
132 | } | |
133 | ||
134 | for(size_t i = 0; i < array_size(invalid_utf8_tests); i++) | |
135 | { | |
136 | std::cout << " Invalid UTF8 " << i << std::endl; | |
1e59de90 | 137 | const auto f3 = to_wide(invalid_utf8_tests[i].utf8); |
f67539c2 TL |
138 | TEST(to_wide(invalid_utf8_tests[i].utf8) == invalid_utf8_tests[i].wide); |
139 | } | |
140 | ||
141 | for(size_t i = 0; i < array_size(invalid_wide_tests); i++) | |
142 | { | |
143 | std::cout << " Invalid Wide " << i << std::endl; | |
144 | TEST(to_narrow(invalid_wide_tests[i].wide) == invalid_wide_tests[i].utf8); | |
145 | } | |
146 | ||
147 | size_t total = 0; | |
148 | const wide_to_utf8* ptr = 0; | |
149 | if(sizeof(wchar_t) == 2) | |
150 | { | |
151 | ptr = invalid_utf16_tests; | |
152 | total = array_size(invalid_utf16_tests); | |
153 | } else | |
154 | { | |
155 | ptr = invalid_utf32_tests; | |
156 | total = array_size(invalid_utf32_tests); | |
157 | } | |
158 | for(size_t i = 0; i < total; i++) | |
159 | { | |
160 | std::cout << " Invalid UTF16/32 " << i << std::endl; | |
161 | TEST(to_narrow(ptr[i].wide) == ptr[i].utf8); | |
162 | } | |
163 | } | |
164 | ||
165 | #endif | |
166 | ||
167 | #ifdef BOOST_MSVC | |
168 | #pragma warning(pop) | |
169 | #endif |