]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/beast/test/beast/websocket/utf8_checker.cpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / libs / beast / test / beast / websocket / utf8_checker.cpp
1 //
2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9
10 // Test that header file is self-contained.
11 #include <boost/beast/websocket/detail/utf8_checker.hpp>
12
13 #include <boost/beast/core/buffers_suffix.hpp>
14 #include <boost/beast/core/multi_buffer.hpp>
15 #include <boost/beast/unit_test/suite.hpp>
16 #include <array>
17
18 namespace boost {
19 namespace beast {
20 namespace websocket {
21 namespace detail {
22
23 class utf8_checker_test : public beast::unit_test::suite
24 {
25 public:
26 void
27 testOneByteSequence()
28 {
29 // valid single-char code points
30 for(unsigned char c = 0; c < 128; ++c)
31 {
32 utf8_checker u;
33 BEAST_EXPECT(u.write(&c, 1));
34 BEAST_EXPECT(u.finish());
35 }
36
37 // invalid lead bytes
38 for(unsigned char c = 128; c < 192; ++c)
39 {
40 utf8_checker u;
41 BEAST_EXPECT(! u.write(&c, 1));
42 }
43
44 // two byte sequences
45 for(unsigned char c = 192; c < 224; ++c)
46 {
47 // fail fast
48 utf8_checker u;
49 if (c < 194)
50 BEAST_EXPECT(! u.write(&c, 1));
51 else
52 {
53 BEAST_EXPECT(u.write(&c, 1));
54 BEAST_EXPECT(! u.finish());
55 }
56 }
57
58 // three byte sequences
59 for(unsigned char c = 224; c < 240; ++c)
60 {
61 // fail fast
62 utf8_checker u;
63 if (c == 224)
64 BEAST_EXPECT(! u.write(&c, 1));
65 else
66 {
67 BEAST_EXPECT(u.write(&c, 1));
68 BEAST_EXPECT(! u.finish());
69 }
70 }
71
72 // four byte sequences
73 for(unsigned char c = 240; c < 245; ++c)
74 {
75 // fail fast
76 utf8_checker u;
77 if (c == 240)
78 BEAST_EXPECT(! u.write(&c, 1));
79 else
80 {
81 BEAST_EXPECT(u.write(&c, 1));
82 BEAST_EXPECT(! u.finish());
83 }
84 }
85
86 // invalid lead bytes
87 for(unsigned char c = 245; c; ++c)
88 {
89 utf8_checker u;
90 BEAST_EXPECT(! u.write(&c, 1));
91 }
92 }
93
94 void
95 testTwoByteSequence()
96 {
97 // Autobahn 6.18.1
98 {
99 utf8_checker u;
100 BEAST_EXPECT(! u.write(boost::asio::buffer("\xc1\xbf", 2)));
101 }
102
103 utf8_checker u;
104 std::uint8_t buf[2];
105 // First byte valid range 194-223
106 for(auto i : {194, 223})
107 {
108 buf[0] = static_cast<std::uint8_t>(i);
109
110 // Second byte valid range 128-191
111 for(auto j : {128, 191})
112 {
113 buf[1] = static_cast<std::uint8_t>(j);
114 BEAST_EXPECT(u.write(buf, 2));
115 BEAST_EXPECT(u.finish());
116 }
117
118 // Second byte invalid range 0-127
119 for(auto j : {0, 127})
120 {
121 buf[1] = static_cast<std::uint8_t>(j);
122 BEAST_EXPECT(! u.write(buf, 2));
123 u.reset();
124 }
125
126 // Second byte invalid range 192-255
127 for(auto j : {192, 255})
128 {
129 buf[1] = static_cast<std::uint8_t>(j);
130 BEAST_EXPECT(! u.write(buf, 2));
131 u.reset();
132 }
133
134 // Segmented sequence second byte invalid
135 BEAST_EXPECT(u.write(buf, 1));
136 BEAST_EXPECT(! u.write(&buf[1], 1));
137 u.reset();
138 }
139 }
140
141 void
142 testThreeByteSequence()
143 {
144 {
145 utf8_checker u;
146 BEAST_EXPECT(u.write(boost::asio::buffer("\xef\xbf\xbf", 3)));
147 BEAST_EXPECT(u.finish());
148 }
149 utf8_checker u;
150 std::uint8_t buf[3];
151 // First byte valid range 224-239
152 for(auto i : {224, 239})
153 {
154 buf[0] = static_cast<std::uint8_t>(i);
155
156 // Second byte valid range 128-191 or 160-191 or 128-159
157 std::int32_t const b = (i == 224 ? 160 : 128);
158 std::int32_t const e = (i == 237 ? 159 : 191);
159 for(auto j : {b, e})
160 {
161 buf[1] = static_cast<std::uint8_t>(j);
162
163 // Third byte valid range 128-191
164 for(auto k : {128, 191})
165 {
166 buf[2] = static_cast<std::uint8_t>(k);
167 BEAST_EXPECT(u.write(buf, 3));
168 BEAST_EXPECT(u.finish());
169 // Segmented sequence
170 if (i == 224)
171 BEAST_EXPECT(! u.write(buf, 1));
172 else
173 {
174 BEAST_EXPECT(u.write(buf, 1));
175 BEAST_EXPECT(u.write(&buf[1], 2));
176 }
177 u.reset();
178 // Segmented sequence
179 BEAST_EXPECT(u.write(buf, 2));
180 BEAST_EXPECT(u.write(&buf[2], 1));
181 u.reset();
182
183 if (i == 224)
184 {
185 // Second byte invalid range 0-159
186 for (auto l : {0, 159})
187 {
188 buf[1] = static_cast<std::uint8_t>(l);
189 BEAST_EXPECT(! u.write(buf, 3));
190 u.reset();
191 // Segmented sequence second byte invalid
192 BEAST_EXPECT(!u.write(buf, 2));
193 u.reset();
194 }
195 // Second byte invalid range 192-255
196 for(auto l : {192, 255})
197 {
198 buf[1] = static_cast<std::uint8_t>(l);
199 BEAST_EXPECT(! u.write(buf, 3));
200 u.reset();
201 // Segmented sequence second byte invalid
202 BEAST_EXPECT(!u.write(buf, 2));
203 u.reset();
204 }
205 buf[1] = static_cast<std::uint8_t>(j);
206 }
207 else if (i == 237)
208 {
209 // Second byte invalid range 0-127
210 for(auto l : {0, 127})
211 {
212 buf[1] = static_cast<std::uint8_t>(l);
213 BEAST_EXPECT(! u.write(buf, 3));
214 u.reset();
215 // Segmented sequence second byte invalid
216 BEAST_EXPECT(!u.write(buf, 2));
217 u.reset();
218 }
219
220 // Second byte invalid range 160-255
221 for(auto l : {160, 255})
222 {
223 buf[1] = static_cast<std::uint8_t>(l);
224 BEAST_EXPECT(! u.write(buf, 3));
225 u.reset();
226 // Segmented sequence second byte invalid
227 BEAST_EXPECT(! u.write(buf, 2));
228 u.reset();
229 }
230 buf[1] = static_cast<std::uint8_t>(j);
231 }
232 }
233
234 // Third byte invalid range 0-127
235 for(auto k : {0, 127})
236 {
237 buf[2] = static_cast<std::uint8_t>(k);
238 BEAST_EXPECT(! u.write(buf, 3));
239 u.reset();
240 }
241
242 // Third byte invalid range 192-255
243 for(auto k : {192, 255})
244 {
245 buf[2] = static_cast<std::uint8_t>(k);
246 BEAST_EXPECT(! u.write(buf, 3));
247 u.reset();
248 }
249
250 // Segmented sequence third byte invalid
251 BEAST_EXPECT(u.write(buf, 2));
252 BEAST_EXPECT(! u.write(&buf[2], 1));
253 u.reset();
254 }
255
256 // Second byte invalid range 0-127 or 0-159
257 for(auto j : {0, b - 1})
258 {
259 buf[1] = static_cast<std::uint8_t>(j);
260 BEAST_EXPECT(! u.write(buf, 3));
261 u.reset();
262 }
263
264 // Second byte invalid range 160-255 or 192-255
265 for(auto j : {e + 1, 255})
266 {
267 buf[1] = static_cast<std::uint8_t>(j);
268 BEAST_EXPECT(! u.write(buf, 3));
269 u.reset();
270 }
271
272 // Segmented sequence second byte invalid
273 if (i == 224)
274 BEAST_EXPECT(! u.write(buf, 1));
275 else
276 {
277 BEAST_EXPECT(u.write(buf, 1));
278 BEAST_EXPECT(!u.write(&buf[1], 1));
279 }
280 u.reset();
281 }
282 }
283
284 void
285 testFourByteSequence()
286 {
287 using boost::asio::const_buffer;
288 utf8_checker u;
289 std::uint8_t buf[4];
290 // First byte valid range 240-244
291 for(auto i : {240, 244})
292 {
293 buf[0] = static_cast<std::uint8_t>(i);
294
295 std::int32_t const b = (i == 240 ? 144 : 128);
296 std::int32_t const e = (i == 244 ? 143 : 191);
297 for(auto j = b; j <= e; ++j)
298 {
299 buf[1] = static_cast<std::uint8_t>(j);
300
301 // Second byte valid range 144-191 or 128-191 or 128-143
302 for(auto k : {128, 191})
303 {
304 buf[2] = static_cast<std::uint8_t>(k);
305
306 // Third byte valid range 128-191
307 for(auto n : {128, 191})
308 {
309 // Fourth byte valid range 128-191
310 buf[3] = static_cast<std::uint8_t>(n);
311 BEAST_EXPECT(u.write(buf, 4));
312 BEAST_EXPECT(u.finish());
313 // Segmented sequence
314 if (i == 240)
315 BEAST_EXPECT(! u.write(buf, 1));
316 else
317 {
318 BEAST_EXPECT(u.write(buf, 1));
319 BEAST_EXPECT(u.write(&buf[1], 3));
320 }
321 u.reset();
322 // Segmented sequence
323 BEAST_EXPECT(u.write(buf, 2));
324 BEAST_EXPECT(u.write(&buf[2], 2));
325 u.reset();
326 // Segmented sequence
327 BEAST_EXPECT(u.write(buf, 3));
328 BEAST_EXPECT(u.write(&buf[3], 1));
329 u.reset();
330
331 if (i == 240)
332 {
333 // Second byte invalid range 0-143
334 for(auto r : {0, 143})
335 {
336 buf[1] = static_cast<std::uint8_t>(r);
337 BEAST_EXPECT(! u.write(buf, 4));
338 u.reset();
339 // Segmented sequence second byte invalid
340 BEAST_EXPECT(! u.write(buf, 2));
341 u.reset();
342 }
343
344 // Second byte invalid range 192-255
345 for(auto r : {192, 255})
346 {
347 buf[1] = static_cast<std::uint8_t>(r);
348 BEAST_EXPECT(! u.write(buf, 4));
349 u.reset();
350 // Segmented sequence second byte invalid
351 BEAST_EXPECT(!u.write(buf, 2));
352 u.reset();
353 }
354 buf[1] = static_cast<std::uint8_t>(j);
355 }
356 else if (i == 244)
357 {
358 // Second byte invalid range 0-127
359 for(auto r : {0, 127})
360 {
361 buf[1] = static_cast<std::uint8_t>(r);
362 BEAST_EXPECT(! u.write(buf, 4));
363 u.reset();
364 // Segmented sequence second byte invalid
365 BEAST_EXPECT(! u.write(buf, 2));
366 u.reset();
367 }
368 // Second byte invalid range 144-255
369 for(auto r : {144, 255})
370 {
371 buf[1] = static_cast<std::uint8_t>(r);
372 BEAST_EXPECT(! u.write(buf, 4));
373 u.reset();
374 // Segmented sequence second byte invalid
375 BEAST_EXPECT(! u.write(buf, 2));
376 u.reset();
377 }
378 buf[1] = static_cast<std::uint8_t>(j);
379 }
380 }
381
382 // Fourth byte invalid ranges 0-127, 192-255
383 for(auto r : {0, 127, 192, 255})
384 {
385 buf[3] = static_cast<std::uint8_t>(r);
386 BEAST_EXPECT(! u.write(buf, 4));
387 u.reset();
388 }
389
390 // Segmented sequence fourth byte invalid
391 BEAST_EXPECT(u.write(buf, 3));
392 BEAST_EXPECT(! u.write(&buf[3], 1));
393 u.reset();
394 }
395
396 // Third byte invalid ranges 0-127, 192-255
397 for(auto r : {0, 127, 192, 255})
398 {
399 buf[2] = static_cast<std::uint8_t>(r);
400 BEAST_EXPECT(! u.write(buf, 4));
401 u.reset();
402 }
403
404 // Segmented sequence third byte invalid
405 BEAST_EXPECT(u.write(buf, 2));
406 BEAST_EXPECT(! u.write(&buf[2], 1));
407 u.reset();
408 }
409
410 // Second byte invalid range 0-127 or 0-143
411 for(auto r : {0, b - 1})
412 {
413 buf[1] = static_cast<std::uint8_t>(r);
414 BEAST_EXPECT(! u.write(buf, 4));
415 u.reset();
416 }
417
418 // Second byte invalid range 144-255 or 192-255
419 for(auto r : {e + 1, 255})
420 {
421 buf[1] = static_cast<std::uint8_t>(r);
422 BEAST_EXPECT(! u.write(buf, 4));
423 u.reset();
424 }
425
426 // Segmented sequence second byte invalid
427 if (i == 240)
428 BEAST_EXPECT(! u.write(buf, 1));
429 else
430 {
431 BEAST_EXPECT(u.write(buf, 1));
432 BEAST_EXPECT(! u.write(&buf[1], 1));
433 }
434 u.reset();
435 }
436
437 // First byte invalid range 245-255
438 for(auto r : {245, 255})
439 {
440 buf[0] = static_cast<std::uint8_t>(r);
441 BEAST_EXPECT(! u.write(buf, 4));
442 u.reset();
443 }
444 }
445
446 void
447 testWithStreamBuffer()
448 {
449 {
450 // Valid UTF8 encoded text
451 std::vector<std::vector<std::uint8_t>> const data{{
452 0x48,0x65,0x69,0x7A,0xC3,0xB6,0x6C,0x72,0xC3,0xBC,0x63,0x6B,
453 0x73,0x74,0x6F,0xC3,0x9F,0x61,0x62,0x64,0xC3,0xA4,0x6D,0x70,
454 0x66,0x75,0x6E,0x67
455 }, {
456 0xCE,0x93,0xCE,0xB1,0xCE,0xB6,0xCE,0xAD,0xCE,0xB5,0xCF,0x82,
457 0x20,0xCE,0xBA,0xCE,0xB1,0xE1,0xBD,0xB6,0x20,0xCE,0xBC,0xCF,
458 0x85,0xCF,0x81,0xCF,0x84,0xCE,0xB9,0xE1,0xBD,0xB2,0xCF,0x82,
459 0x20,0xCE,0xB4,0xE1,0xBD,0xB2,0xCE,0xBD,0x20,0xCE,0xB8,0xE1,
460 0xBD,0xB0,0x20,0xCE,0xB2,0xCF,0x81,0xE1,0xBF,0xB6,0x20,0xCF,
461 0x80,0xCE,0xB9,0xE1,0xBD,0xB0,0x20,0xCF,0x83,0xCF,0x84,0xE1,
462 0xBD,0xB8,0x20,0xCF,0x87,0xCF,0x81,0xCF,0x85,0xCF,0x83,0xCE,
463 0xB1,0xCF,0x86,0xE1,0xBD,0xB6,0x20,0xCE,0xBE,0xCE,0xAD,0xCF,
464 0x86,0xCF,0x89,0xCF,0x84,0xCE,0xBF
465 }, {
466 0xC3,0x81,0x72,0x76,0xC3,0xAD,0x7A,0x74,0xC5,0xB1,0x72,0xC5,
467 0x91,0x20,0x74,0xC3,0xBC,0x6B,0xC3,0xB6,0x72,0x66,0xC3,0xBA,
468 0x72,0xC3,0xB3,0x67,0xC3,0xA9,0x70
469 }, {
470 240, 144, 128, 128
471 }
472 };
473 utf8_checker u;
474 for(auto const& s : data)
475 {
476 static std::size_t constexpr size = 3;
477 std::size_t n = s.size();
478 buffers_suffix<
479 boost::asio::const_buffer> cb{
480 boost::asio::const_buffer(s.data(), n)};
481 multi_buffer b;
482 while(n)
483 {
484 auto const amount = (std::min)(n, size);
485 b.commit(boost::asio::buffer_copy(
486 b.prepare(amount), cb));
487 cb.consume(amount);
488 n -= amount;
489 }
490 BEAST_EXPECT(u.write(b.data()));
491 BEAST_EXPECT(u.finish());
492 }
493 }
494 }
495
496 void
497 testBranches()
498 {
499 // switch to slow loop from alignment loop
500 {
501 char buf[32];
502 for(unsigned i = 0; i < sizeof(buf); i += 2)
503 {
504 buf[i ] = '\xc2';
505 buf[i+1] = '\x80';
506 }
507 auto p = reinterpret_cast<char const*>(sizeof(std::size_t) * (
508 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) /
509 sizeof(std::size_t))) + 2;
510 utf8_checker u;
511 BEAST_EXPECT(u.write(
512 reinterpret_cast<std::uint8_t const*>(p),
513 sizeof(buf)-(p-buf)));
514 BEAST_EXPECT(u.finish());
515 }
516
517 // invalid code point in the last dword of a fast run
518 {
519 char buf[20];
520 auto p = reinterpret_cast<char*>(sizeof(std::size_t) * (
521 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) /
522 sizeof(std::size_t)));
523 BOOST_ASSERT(p + 12 <= buf + sizeof(buf));
524 auto const in = p;
525 *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*';
526 *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*';
527 p[0] = '\x80'; // invalid
528 p[1] = '*';
529 p[2] = '*';
530 p[3] = '*';
531 utf8_checker u;
532 BEAST_EXPECT(! u.write(reinterpret_cast<
533 std::uint8_t const*>(in), 12));
534 }
535 }
536
537 void
538 run() override
539 {
540 testOneByteSequence();
541 testTwoByteSequence();
542 testThreeByteSequence();
543 testFourByteSequence();
544 testWithStreamBuffer();
545 testBranches();
546 }
547 };
548
549 BEAST_DEFINE_TESTSUITE(beast,websocket,utf8_checker);
550
551 } // detail
552 } // websocket
553 } // beast
554 } // boost