]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/beast/test/beast/websocket/utf8_checker.cpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / libs / beast / test / beast / websocket / utf8_checker.cpp
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9
10 // Test that header file is self-contained.
11 #include <boost/beast/websocket/detail/utf8_checker.hpp>
12
13 #include <boost/beast/core/buffers_suffix.hpp>
14 #include <boost/beast/core/multi_buffer.hpp>
15 #include <boost/beast/_experimental/unit_test/suite.hpp>
16 #include <array>
17
18 namespace boost {
19 namespace beast {
20 namespace websocket {
21 namespace detail {
22
23 class utf8_checker_test : public beast::unit_test::suite
24 {
25 public:
26 void
27 testOneByteSequence()
28 {
29 // valid single-char code points
30 for(unsigned char c = 0; c < 128; ++c)
31 {
32 utf8_checker u;
33 BEAST_EXPECT(u.write(&c, 1));
34 BEAST_EXPECT(u.finish());
35 }
36
37 // invalid lead bytes
38 for(unsigned char c = 128; c < 192; ++c)
39 {
40 utf8_checker u;
41 BEAST_EXPECT(! u.write(&c, 1));
42 }
43
44 // two byte sequences
45 for(unsigned char c = 192; c < 224; ++c)
46 {
47 // fail fast
48 utf8_checker u;
49 if (c < 194)
50 BEAST_EXPECT(! u.write(&c, 1));
51 else
52 {
53 BEAST_EXPECT(u.write(&c, 1));
54 BEAST_EXPECT(! u.finish());
55 }
56 }
57
58 // three byte sequences
59 for(unsigned char c = 224; c < 240; ++c)
60 {
61 utf8_checker u;
62 BEAST_EXPECT(u.write(&c, 1));
63 BEAST_EXPECT(! u.finish());
64 }
65
66 // four byte sequences
67 for(unsigned char c = 240; c < 245; ++c)
68 {
69 // fail fast
70 utf8_checker u;
71 BEAST_EXPECT(u.write(&c, 1));
72 BEAST_EXPECT(! u.finish());
73 }
74
75 // invalid lead bytes
76 for(unsigned char c = 245; c; ++c)
77 {
78 utf8_checker u;
79 BEAST_EXPECT(! u.write(&c, 1));
80 }
81 }
82
83 void
84 testTwoByteSequence()
85 {
86 // Autobahn 6.18.1
87 {
88 utf8_checker u;
89 BEAST_EXPECT(! u.write(net::buffer("\xc1\xbf", 2)));
90 }
91
92 utf8_checker u;
93 std::uint8_t buf[2];
94 // First byte valid range 194-223
95 for(auto i : {194, 223})
96 {
97 buf[0] = static_cast<std::uint8_t>(i);
98
99 // Second byte valid range 128-191
100 for(auto j : {128, 191})
101 {
102 buf[1] = static_cast<std::uint8_t>(j);
103 BEAST_EXPECT(u.write(buf, 2));
104 BEAST_EXPECT(u.finish());
105 }
106
107 // Second byte invalid range 0-127
108 for(auto j : {0, 127})
109 {
110 buf[1] = static_cast<std::uint8_t>(j);
111 BEAST_EXPECT(! u.write(buf, 2));
112 u.reset();
113 }
114
115 // Second byte invalid range 192-255
116 for(auto j : {192, 255})
117 {
118 buf[1] = static_cast<std::uint8_t>(j);
119 BEAST_EXPECT(! u.write(buf, 2));
120 u.reset();
121 }
122
123 // Segmented sequence second byte invalid
124 BEAST_EXPECT(u.write(buf, 1));
125 BEAST_EXPECT(! u.write(&buf[1], 1));
126 u.reset();
127 }
128 }
129
130 void
131 testThreeByteSequence()
132 {
133 {
134 utf8_checker u;
135 BEAST_EXPECT(u.write(net::buffer("\xef\xbf\xbf", 3)));
136 BEAST_EXPECT(u.finish());
137 }
138 utf8_checker u;
139 std::uint8_t buf[3];
140 // First byte valid range 224-239
141 for(auto i : {224, 239})
142 {
143 buf[0] = static_cast<std::uint8_t>(i);
144
145 // Second byte valid range 128-191 or 160-191 or 128-159
146 std::int32_t const b = (i == 224 ? 160 : 128);
147 std::int32_t const e = (i == 237 ? 159 : 191);
148 for(auto j : {b, e})
149 {
150 buf[1] = static_cast<std::uint8_t>(j);
151
152 // Third byte valid range 128-191
153 for(auto k : {128, 191})
154 {
155 buf[2] = static_cast<std::uint8_t>(k);
156 BEAST_EXPECT(u.write(buf, 3));
157 BEAST_EXPECT(u.finish());
158 // Segmented sequence
159 if (i == 224)
160 {
161 BEAST_EXPECT(u.write(buf, 1));
162 BEAST_EXPECT(!u.finish());
163 }
164 else
165 {
166 BEAST_EXPECT(u.write(buf, 1));
167 BEAST_EXPECT(u.write(&buf[1], 2));
168 }
169 u.reset();
170 // Segmented sequence
171 BEAST_EXPECT(u.write(buf, 2));
172 BEAST_EXPECT(u.write(&buf[2], 1));
173 u.reset();
174
175 if (i == 224)
176 {
177 // Second byte invalid range 0-159
178 for (auto l : {0, 159})
179 {
180 buf[1] = static_cast<std::uint8_t>(l);
181 BEAST_EXPECT(! u.write(buf, 3));
182 u.reset();
183 // Segmented sequence second byte invalid
184 BEAST_EXPECT(!u.write(buf, 2));
185 u.reset();
186 }
187 // Second byte invalid range 192-255
188 for(auto l : {192, 255})
189 {
190 buf[1] = static_cast<std::uint8_t>(l);
191 BEAST_EXPECT(! u.write(buf, 3));
192 u.reset();
193 // Segmented sequence second byte invalid
194 BEAST_EXPECT(!u.write(buf, 2));
195 u.reset();
196 }
197 buf[1] = static_cast<std::uint8_t>(j);
198 }
199 else if (i == 237)
200 {
201 // Second byte invalid range 0-127
202 for(auto l : {0, 127})
203 {
204 buf[1] = static_cast<std::uint8_t>(l);
205 BEAST_EXPECT(! u.write(buf, 3));
206 u.reset();
207 // Segmented sequence second byte invalid
208 BEAST_EXPECT(!u.write(buf, 2));
209 u.reset();
210 }
211
212 // Second byte invalid range 160-255
213 for(auto l : {160, 255})
214 {
215 buf[1] = static_cast<std::uint8_t>(l);
216 BEAST_EXPECT(! u.write(buf, 3));
217 u.reset();
218 // Segmented sequence second byte invalid
219 BEAST_EXPECT(! u.write(buf, 2));
220 u.reset();
221 }
222 buf[1] = static_cast<std::uint8_t>(j);
223 }
224 }
225
226 // Third byte invalid range 0-127
227 for(auto k : {0, 127})
228 {
229 buf[2] = static_cast<std::uint8_t>(k);
230 BEAST_EXPECT(! u.write(buf, 3));
231 u.reset();
232 }
233
234 // Third byte invalid range 192-255
235 for(auto k : {192, 255})
236 {
237 buf[2] = static_cast<std::uint8_t>(k);
238 BEAST_EXPECT(! u.write(buf, 3));
239 u.reset();
240 }
241
242 // Segmented sequence third byte invalid
243 BEAST_EXPECT(u.write(buf, 2));
244 BEAST_EXPECT(! u.write(&buf[2], 1));
245 u.reset();
246 }
247
248 // Second byte invalid range 0-127 or 0-159
249 for(auto j : {0, b - 1})
250 {
251 buf[1] = static_cast<std::uint8_t>(j);
252 BEAST_EXPECT(! u.write(buf, 3));
253 u.reset();
254 }
255
256 // Second byte invalid range 160-255 or 192-255
257 for(auto j : {e + 1, 255})
258 {
259 buf[1] = static_cast<std::uint8_t>(j);
260 BEAST_EXPECT(! u.write(buf, 3));
261 u.reset();
262 }
263
264 // Segmented sequence second byte invalid
265 if (i == 224) {
266 BEAST_EXPECT(u.write(buf, 1));
267 BEAST_EXPECT(!u.finish());
268 }
269 else
270 {
271 BEAST_EXPECT(u.write(buf, 1));
272 BEAST_EXPECT(!u.write(&buf[1], 1));
273 }
274 u.reset();
275 }
276 }
277
278 void
279 testFourByteSequence()
280 {
281 using net::const_buffer;
282 utf8_checker u;
283 std::uint8_t buf[4];
284 // First byte valid range 240-244
285 for(auto i : {240, 244})
286 {
287 buf[0] = static_cast<std::uint8_t>(i);
288
289 std::int32_t const b = (i == 240 ? 144 : 128);
290 std::int32_t const e = (i == 244 ? 143 : 191);
291 for(auto j = b; j <= e; ++j)
292 {
293 buf[1] = static_cast<std::uint8_t>(j);
294
295 // Second byte valid range 144-191 or 128-191 or 128-143
296 for(auto k : {128, 191})
297 {
298 buf[2] = static_cast<std::uint8_t>(k);
299
300 // Third byte valid range 128-191
301 for(auto n : {128, 191})
302 {
303 // Fourth byte valid range 128-191
304 buf[3] = static_cast<std::uint8_t>(n);
305 BEAST_EXPECT(u.write(buf, 4));
306 BEAST_EXPECT(u.finish());
307 // Segmented sequence
308 BEAST_EXPECT(u.write(buf, 1));
309 BEAST_EXPECT(u.write(&buf[1], 3));
310 u.reset();
311 // Segmented sequence
312 BEAST_EXPECT(u.write(buf, 2));
313 BEAST_EXPECT(u.write(&buf[2], 2));
314 u.reset();
315 // Segmented sequence
316 BEAST_EXPECT(u.write(buf, 3));
317 BEAST_EXPECT(u.write(&buf[3], 1));
318 u.reset();
319
320 if (i == 240)
321 {
322 // Second byte invalid range 0-143
323 for(auto r : {0, 143})
324 {
325 buf[1] = static_cast<std::uint8_t>(r);
326 BEAST_EXPECT(! u.write(buf, 4));
327 u.reset();
328 // Segmented sequence second byte invalid
329 BEAST_EXPECT(! u.write(buf, 2));
330 u.reset();
331 }
332
333 // Second byte invalid range 192-255
334 for(auto r : {192, 255})
335 {
336 buf[1] = static_cast<std::uint8_t>(r);
337 BEAST_EXPECT(! u.write(buf, 4));
338 u.reset();
339 // Segmented sequence second byte invalid
340 BEAST_EXPECT(!u.write(buf, 2));
341 u.reset();
342 }
343 buf[1] = static_cast<std::uint8_t>(j);
344 }
345 else if (i == 244)
346 {
347 // Second byte invalid range 0-127
348 for(auto r : {0, 127})
349 {
350 buf[1] = static_cast<std::uint8_t>(r);
351 BEAST_EXPECT(! u.write(buf, 4));
352 u.reset();
353 // Segmented sequence second byte invalid
354 BEAST_EXPECT(! u.write(buf, 2));
355 u.reset();
356 }
357 // Second byte invalid range 144-255
358 for(auto r : {144, 255})
359 {
360 buf[1] = static_cast<std::uint8_t>(r);
361 BEAST_EXPECT(! u.write(buf, 4));
362 u.reset();
363 // Segmented sequence second byte invalid
364 BEAST_EXPECT(! u.write(buf, 2));
365 u.reset();
366 }
367 buf[1] = static_cast<std::uint8_t>(j);
368 }
369 }
370
371 // Fourth byte invalid ranges 0-127, 192-255
372 for(auto r : {0, 127, 192, 255})
373 {
374 buf[3] = static_cast<std::uint8_t>(r);
375 BEAST_EXPECT(! u.write(buf, 4));
376 u.reset();
377 }
378
379 // Segmented sequence fourth byte invalid
380 BEAST_EXPECT(u.write(buf, 3));
381 BEAST_EXPECT(! u.write(&buf[3], 1));
382 u.reset();
383 }
384
385 // Third byte invalid ranges 0-127, 192-255
386 for(auto r : {0, 127, 192, 255})
387 {
388 buf[2] = static_cast<std::uint8_t>(r);
389 BEAST_EXPECT(! u.write(buf, 4));
390 u.reset();
391 }
392
393 // Segmented sequence third byte invalid
394 BEAST_EXPECT(u.write(buf, 2));
395 BEAST_EXPECT(! u.write(&buf[2], 1));
396 u.reset();
397 }
398
399 // Second byte invalid range 0-127 or 0-143
400 for(auto r : {0, b - 1})
401 {
402 buf[1] = static_cast<std::uint8_t>(r);
403 BEAST_EXPECT(! u.write(buf, 4));
404 u.reset();
405 }
406
407 // Second byte invalid range 144-255 or 192-255
408 for(auto r : {e + 1, 255})
409 {
410 buf[1] = static_cast<std::uint8_t>(r);
411 BEAST_EXPECT(! u.write(buf, 4));
412 u.reset();
413 }
414
415 // Segmented sequence second byte invalid
416 BEAST_EXPECT(u.write(buf, 1));
417 BEAST_EXPECT(! u.write(&buf[1], 1));
418
419 u.reset();
420 }
421
422 // First byte invalid range 245-255
423 for(auto r : {245, 255})
424 {
425 buf[0] = static_cast<std::uint8_t>(r);
426 BEAST_EXPECT(! u.write(buf, 4));
427 u.reset();
428 }
429 }
430
431 void
432 testWithStreamBuffer()
433 {
434 {
435 // Valid UTF8 encoded text
436 std::vector<std::vector<std::uint8_t>> const data{{
437 0x48,0x65,0x69,0x7A,0xC3,0xB6,0x6C,0x72,0xC3,0xBC,0x63,0x6B,
438 0x73,0x74,0x6F,0xC3,0x9F,0x61,0x62,0x64,0xC3,0xA4,0x6D,0x70,
439 0x66,0x75,0x6E,0x67
440 }, {
441 0xCE,0x93,0xCE,0xB1,0xCE,0xB6,0xCE,0xAD,0xCE,0xB5,0xCF,0x82,
442 0x20,0xCE,0xBA,0xCE,0xB1,0xE1,0xBD,0xB6,0x20,0xCE,0xBC,0xCF,
443 0x85,0xCF,0x81,0xCF,0x84,0xCE,0xB9,0xE1,0xBD,0xB2,0xCF,0x82,
444 0x20,0xCE,0xB4,0xE1,0xBD,0xB2,0xCE,0xBD,0x20,0xCE,0xB8,0xE1,
445 0xBD,0xB0,0x20,0xCE,0xB2,0xCF,0x81,0xE1,0xBF,0xB6,0x20,0xCF,
446 0x80,0xCE,0xB9,0xE1,0xBD,0xB0,0x20,0xCF,0x83,0xCF,0x84,0xE1,
447 0xBD,0xB8,0x20,0xCF,0x87,0xCF,0x81,0xCF,0x85,0xCF,0x83,0xCE,
448 0xB1,0xCF,0x86,0xE1,0xBD,0xB6,0x20,0xCE,0xBE,0xCE,0xAD,0xCF,
449 0x86,0xCF,0x89,0xCF,0x84,0xCE,0xBF
450 }, {
451 0xC3,0x81,0x72,0x76,0xC3,0xAD,0x7A,0x74,0xC5,0xB1,0x72,0xC5,
452 0x91,0x20,0x74,0xC3,0xBC,0x6B,0xC3,0xB6,0x72,0x66,0xC3,0xBA,
453 0x72,0xC3,0xB3,0x67,0xC3,0xA9,0x70
454 }, {
455 240, 144, 128, 128
456 }
457 };
458 utf8_checker u;
459 for(auto const& s : data)
460 {
461 static std::size_t constexpr size = 3;
462 std::size_t n = s.size();
463 buffers_suffix<
464 net::const_buffer> cb{
465 net::const_buffer(s.data(), n)};
466 multi_buffer b;
467 while(n)
468 {
469 auto const amount = (std::min)(n, size);
470 b.commit(net::buffer_copy(
471 b.prepare(amount), cb));
472 cb.consume(amount);
473 n -= amount;
474 }
475 BEAST_EXPECT(u.write(b.data()));
476 BEAST_EXPECT(u.finish());
477 }
478 }
479 }
480
481 void
482 testBranches()
483 {
484 // switch to slow loop from alignment loop
485 {
486 char buf[32];
487 for(unsigned i = 0; i < sizeof(buf); i += 2)
488 {
489 buf[i ] = '\xc2';
490 buf[i+1] = '\x80';
491 }
492 auto p = reinterpret_cast<char const*>(sizeof(std::size_t) * (
493 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) /
494 sizeof(std::size_t))) + 2;
495 utf8_checker u;
496 BEAST_EXPECT(u.write(
497 reinterpret_cast<std::uint8_t const*>(p),
498 sizeof(buf)-(p-buf)));
499 BEAST_EXPECT(u.finish());
500 }
501
502 // invalid code point in the last dword of a fast run
503 {
504 char buf[20];
505 auto p = reinterpret_cast<char*>(sizeof(std::size_t) * (
506 (std::uintptr_t(buf) + sizeof(std::size_t) - 1) /
507 sizeof(std::size_t)));
508 BOOST_ASSERT(p + 12 <= buf + sizeof(buf));
509 auto const in = p;
510 *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*';
511 *p++ = '*'; *p++ = '*'; *p++ = '*'; *p++ = '*';
512 p[0] = '\x80'; // invalid
513 p[1] = '*';
514 p[2] = '*';
515 p[3] = '*';
516 utf8_checker u;
517 BEAST_EXPECT(! u.write(reinterpret_cast<
518 std::uint8_t const*>(in), 12));
519 }
520 }
521
522 void
523 AutodeskTests()
524 {
525 std::vector<std::vector<std::uint8_t>> const data{
526 { 's','t','a','r','t', 0xE0 },
527 { 0xA6, 0x81, 'e','n','d' } };
528 utf8_checker u;
529 for(auto const& s : data)
530 {
531 std::size_t n = s.size();
532 buffers_suffix<net::const_buffer> cb{net::const_buffer(s.data(), n)};
533 multi_buffer b;
534 while(n)
535 {
536 auto const amount = (std::min)(n, std::size_t(3)/*size*/);
537 b.commit(net::buffer_copy(b.prepare(amount), cb));
538 cb.consume(amount);
539 n -= amount;
540 }
541 BEAST_EXPECT(u.write(b.data()));
542 }
543 BEAST_EXPECT(u.finish());
544 }
545
546 void
547 AutobahnTest(std::vector<std::vector<std::uint8_t>>&& data, std::vector<bool> result)
548 {
549 BEAST_EXPECT(data.size() == result.size());
550 utf8_checker u;
551 for(std::size_t i = 0; i < data.size(); ++i)
552 {
553 auto const& s = data[i];
554
555 std::size_t n = s.size();
556 buffers_suffix<net::const_buffer> cb{net::const_buffer(s.data(), n)};
557 multi_buffer b;
558 while(n)
559 {
560 auto const amount = (std::min)(n, std::size_t(3)/*size*/);
561 b.commit(net::buffer_copy(b.prepare(amount), cb));
562 cb.consume(amount);
563 n -= amount;
564 }
565 BEAST_EXPECT(u.write(b.data()) == result[i]);
566 }
567 }
568
569 void
570 run() override
571 {
572 testOneByteSequence();
573 testTwoByteSequence();
574 testThreeByteSequence();
575 testFourByteSequence();
576 testWithStreamBuffer();
577 testBranches();
578 AutodeskTests();
579 // 6.4.2
580 AutobahnTest(std::vector<std::vector<std::uint8_t>>{
581 { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
582 { 0x90 }, { 0x80, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 } },
583 { true, false, false});
584 // 6.4.4
585 AutobahnTest(std::vector<std::vector<std::uint8_t>>{
586 { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
587 { 0x90 } },
588 { true, false });
589 }
590 };
591
592 BEAST_DEFINE_TESTSUITE(beast,websocket,utf8_checker);
593
594 } // detail
595 } // websocket
596 } // beast
597 } // boost