]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/beast/http/detail/basic_parser.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / beast / http / detail / basic_parser.hpp
1 //
2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9
10 #ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_HPP
12
13 #include <boost/beast/core/static_string.hpp>
14 #include <boost/beast/core/string.hpp>
15 #include <boost/beast/core/detail/cpu_info.hpp>
16 #include <boost/beast/http/error.hpp>
17 #include <boost/beast/http/detail/rfc7230.hpp>
18 #include <boost/config.hpp>
19 #include <boost/version.hpp>
20 #include <algorithm>
21 #include <cstddef>
22 #include <utility>
23
24 namespace boost {
25 namespace beast {
26 namespace http {
27 namespace detail {
28
29 class basic_parser_base
30 {
31 protected:
32 // limit on the size of the obs-fold buffer
33 //
34 // https://stackoverflow.com/questions/686217/maximum-on-http-header-values
35 //
36 static std::size_t constexpr max_obs_fold = 4096;
37
38 enum class state
39 {
40 nothing_yet = 0,
41 start_line,
42 fields,
43 body0,
44 body,
45 body_to_eof0,
46 body_to_eof,
47 chunk_header0,
48 chunk_header,
49 chunk_body,
50 complete
51 };
52
53 static
54 bool
55 is_pathchar(char c)
56 {
57 // VFALCO This looks the same as the one below...
58
59 // TEXT = <any OCTET except CTLs, and excluding LWS>
60 static bool constexpr tab[256] = {
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
63 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
77 };
78 return tab[static_cast<unsigned char>(c)];
79 }
80
81 static
82 inline
83 bool
84 unhex(unsigned char& d, char c)
85 {
86 static signed char constexpr tab[256] = {
87 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0
88 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16
89 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32
90 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48
91 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64
92 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80
93 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96
94 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 112
95
96 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 128
97 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 144
98 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 160
99 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 176
100 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 192
101 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 208
102 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 224
103 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240
104 };
105 d = static_cast<unsigned char>(
106 tab[static_cast<unsigned char>(c)]);
107 return d != static_cast<unsigned char>(-1);
108 }
109
110 static
111 bool
112 is_digit(char c)
113 {
114 return static_cast<unsigned char>(c-'0') < 10;
115 }
116
117 static
118 bool
119 is_print(char c)
120 {
121 return static_cast<unsigned char>(c-32) < 95;
122 }
123
124 template<class FwdIt>
125 static
126 FwdIt
127 trim_front(FwdIt it, FwdIt const& end)
128 {
129 while(it != end)
130 {
131 if(*it != ' ' && *it != '\t')
132 break;
133 ++it;
134 }
135 return it;
136 }
137
138 template<class RanIt>
139 static
140 RanIt
141 trim_back(
142 RanIt it, RanIt const& first)
143 {
144 while(it != first)
145 {
146 auto const c = it[-1];
147 if(c != ' ' && c != '\t')
148 break;
149 --it;
150 }
151 return it;
152 }
153
154 static
155 string_view
156 make_string(char const* first, char const* last)
157 {
158 return {first, static_cast<
159 std::size_t>(last - first)};
160 }
161
162 //--------------------------------------------------------------------------
163
164 static
165 std::pair<char const*, bool>
166 find_fast(
167 char const* buf,
168 char const* buf_end,
169 char const* ranges,
170 size_t ranges_size)
171 {
172 bool found = false;
173 boost::ignore_unused(buf_end, ranges, ranges_size);
174 return {buf, found};
175 }
176
177 // VFALCO Can SIMD help this?
178 static
179 char const*
180 find_eol(
181 char const* it, char const* last,
182 error_code& ec)
183 {
184 for(;;)
185 {
186 if(it == last)
187 {
188 ec.assign(0, ec.category());
189 return nullptr;
190 }
191 if(*it == '\r')
192 {
193 if(++it == last)
194 {
195 ec.assign(0, ec.category());
196 return nullptr;
197 }
198 if(*it != '\n')
199 {
200 ec = error::bad_line_ending;
201 return nullptr;
202 }
203 ec.assign(0, ec.category());
204 return ++it;
205 }
206 // VFALCO Should we handle the legacy case
207 // for lines terminated with a single '\n'?
208 ++it;
209 }
210 }
211
212 static
213 char const*
214 find_eom(char const* p, char const* last)
215 {
216 for(;;)
217 {
218 if(p + 4 > last)
219 return nullptr;
220 if(p[3] != '\n')
221 {
222 if(p[3] == '\r')
223 ++p;
224 else
225 p += 4;
226 }
227 else if(p[2] != '\r')
228 {
229 p += 4;
230 }
231 else if(p[1] != '\n')
232 {
233 p += 2;
234 }
235 else if(p[0] != '\r')
236 {
237 p += 2;
238 }
239 else
240 {
241 return p + 4;
242 }
243 }
244 }
245
246 //--------------------------------------------------------------------------
247
248 static
249 char const*
250 parse_token_to_eol(
251 char const* p,
252 char const* last,
253 char const*& token_last,
254 error_code& ec)
255 {
256 for(;; ++p)
257 {
258 if(p >= last)
259 {
260 ec = error::need_more;
261 return p;
262 }
263 if(BOOST_UNLIKELY(! is_print(*p)))
264 if((BOOST_LIKELY(static_cast<
265 unsigned char>(*p) < '\040') &&
266 BOOST_LIKELY(*p != '\011')) ||
267 BOOST_UNLIKELY(*p == '\177'))
268 goto found_control;
269 }
270 found_control:
271 if(BOOST_LIKELY(*p == '\r'))
272 {
273 if(++p >= last)
274 {
275 ec = error::need_more;
276 return last;
277 }
278 if(*p++ != '\n')
279 {
280 ec = error::bad_line_ending;
281 return last;
282 }
283 token_last = p - 2;
284 }
285 #if 0
286 // VFALCO This allows `\n` by itself
287 // to terminate a line
288 else if(*p == '\n')
289 {
290 token_last = p;
291 ++p;
292 }
293 #endif
294 else
295 {
296 // invalid character
297 return nullptr;
298 }
299 return p;
300 }
301
302 template<class Iter, class Unsigned>
303 static
304 bool
305 parse_dec(Iter it, Iter last, Unsigned& v)
306 {
307 if(! is_digit(*it))
308 return false;
309 v = *it - '0';
310 for(;;)
311 {
312 if(! is_digit(*++it))
313 break;
314 auto const d = *it - '0';
315 if(v > ((std::numeric_limits<
316 Unsigned>::max)() - 10) / 10)
317 return false;
318 v = 10 * v + d;
319 }
320 return it == last;
321 }
322
323 template<class Iter, class Unsigned>
324 static
325 bool
326 parse_hex(Iter& it, Unsigned& v)
327 {
328 unsigned char d;
329 if(! unhex(d, *it))
330 return false;
331 v = d;
332 for(;;)
333 {
334 if(! unhex(d, *++it))
335 break;
336 auto const v0 = v;
337 v = 16 * v + d;
338 if(v < v0)
339 return false;
340 }
341 return true;
342 }
343
344 static
345 bool
346 parse_crlf(char const*& it)
347 {
348 if( it[0] != '\r' || it[1] != '\n')
349 return false;
350 it += 2;
351 return true;
352 }
353
354 static
355 void
356 parse_method(
357 char const*& it, char const* last,
358 string_view& result, error_code& ec)
359 {
360 // parse token SP
361 auto const first = it;
362 for(;; ++it)
363 {
364 if(it + 1 > last)
365 {
366 ec = error::need_more;
367 return;
368 }
369 if(! detail::is_token_char(*it))
370 break;
371 }
372 if(it + 1 > last)
373 {
374 ec = error::need_more;
375 return;
376 }
377 if(*it != ' ')
378 {
379 ec = error::bad_method;
380 return;
381 }
382 if(it == first)
383 {
384 // cannot be empty
385 ec = error::bad_method;
386 return;
387 }
388 result = make_string(first, it++);
389 }
390
391 static
392 void
393 parse_target(
394 char const*& it, char const* last,
395 string_view& result, error_code& ec)
396 {
397 // parse target SP
398 auto const first = it;
399 for(;; ++it)
400 {
401 if(it + 1 > last)
402 {
403 ec = error::need_more;
404 return;
405 }
406 if(! is_pathchar(*it))
407 break;
408 }
409 if(it + 1 > last)
410 {
411 ec = error::need_more;
412 return;
413 }
414 if(*it != ' ')
415 {
416 ec = error::bad_target;
417 return;
418 }
419 if(it == first)
420 {
421 // cannot be empty
422 ec = error::bad_target;
423 return;
424 }
425 result = make_string(first, it++);
426 }
427
428 static
429 void
430 parse_version(
431 char const*& it, char const* last,
432 int& result, error_code& ec)
433 {
434 if(it + 8 > last)
435 {
436 ec = error::need_more;
437 return;
438 }
439 if(*it++ != 'H')
440 {
441 ec = error::bad_version;
442 return;
443 }
444 if(*it++ != 'T')
445 {
446 ec = error::bad_version;
447 return;
448 }
449 if(*it++ != 'T')
450 {
451 ec = error::bad_version;
452 return;
453 }
454 if(*it++ != 'P')
455 {
456 ec = error::bad_version;
457 return;
458 }
459 if(*it++ != '/')
460 {
461 ec = error::bad_version;
462 return;
463 }
464 if(! is_digit(*it))
465 {
466 ec = error::bad_version;
467 return;
468 }
469 result = 10 * (*it++ - '0');
470 if(*it++ != '.')
471 {
472 ec = error::bad_version;
473 return;
474 }
475 if(! is_digit(*it))
476 {
477 ec = error::bad_version;
478 return;
479 }
480 result += *it++ - '0';
481 }
482
483 static
484 void
485 parse_status(
486 char const*& it, char const* last,
487 unsigned short& result, error_code& ec)
488 {
489 // parse 3(digit) SP
490 if(it + 4 > last)
491 {
492 ec = error::need_more;
493 return;
494 }
495 if(! is_digit(*it))
496 {
497 ec = error::bad_status;
498 return;
499 }
500 result = 100 * (*it++ - '0');
501 if(! is_digit(*it))
502 {
503 ec = error::bad_status;
504 return;
505 }
506 result += 10 * (*it++ - '0');
507 if(! is_digit(*it))
508 {
509 ec = error::bad_status;
510 return;
511 }
512 result += *it++ - '0';
513 if(*it++ != ' ')
514 {
515 ec = error::bad_status;
516 return;
517 }
518 }
519
520 void
521 parse_reason(
522 char const*& it, char const* last,
523 string_view& result, error_code& ec)
524 {
525 auto const first = it;
526 char const* token_last = nullptr;
527 auto p = parse_token_to_eol(
528 it, last, token_last, ec);
529 if(ec)
530 return;
531 if(! p)
532 {
533 ec = error::bad_reason;
534 return;
535 }
536 result = make_string(first, token_last);
537 it = p;
538 }
539
540 template<std::size_t N>
541 void
542 parse_field(
543 char const*& p,
544 char const* last,
545 string_view& name,
546 string_view& value,
547 static_string<N>& buf,
548 error_code& ec)
549 {
550 /* header-field = field-name ":" OWS field-value OWS
551
552 field-name = token
553 field-value = *( field-content / obs-fold )
554 field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
555 field-vchar = VCHAR / obs-text
556
557 obs-fold = CRLF 1*( SP / HTAB )
558 ; obsolete line folding
559 ; see Section 3.2.4
560
561 token = 1*<any CHAR except CTLs or separators>
562 CHAR = <any US-ASCII character (octets 0 - 127)>
563 sep = "(" | ")" | "<" | ">" | "@"
564 | "," | ";" | ":" | "\" | <">
565 | "/" | "[" | "]" | "?" | "="
566 | "{" | "}" | SP | HT
567 */
568 static char const* is_token =
569 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
570 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
571 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
572 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
573 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
574 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
575 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
576 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
577
578 // name
579 BOOST_ALIGNMENT(16) static const char ranges1[] =
580 "\x00 " /* control chars and up to SP */
581 "\"\"" /* 0x22 */
582 "()" /* 0x28,0x29 */
583 ",," /* 0x2c */
584 "//" /* 0x2f */
585 ":@" /* 0x3a-0x40 */
586 "[]" /* 0x5b-0x5d */
587 "{\377"; /* 0x7b-0xff */
588 auto first = p;
589 bool found;
590 std::tie(p, found) = find_fast(
591 p, last, ranges1, sizeof(ranges1)-1);
592 if(! found && p >= last)
593 {
594 ec = error::need_more;
595 return;
596 }
597 for(;;)
598 {
599 if(*p == ':')
600 break;
601 if(! is_token[static_cast<
602 unsigned char>(*p)])
603 {
604 ec = error::bad_field;
605 return;
606 }
607 ++p;
608 if(p >= last)
609 {
610 ec = error::need_more;
611 return;
612 }
613 }
614 if(p == first)
615 {
616 // empty name
617 ec = error::bad_field;
618 return;
619 }
620 name = make_string(first, p);
621 ++p; // eat ':'
622 char const* token_last = nullptr;
623 for(;;)
624 {
625 // eat leading ' ' and '\t'
626 for(;;++p)
627 {
628 if(p + 1 > last)
629 {
630 ec = error::need_more;
631 return;
632 }
633 if(! (*p == ' ' || *p == '\t'))
634 break;
635 }
636 // parse to CRLF
637 first = p;
638 p = parse_token_to_eol(p, last, token_last, ec);
639 if(ec)
640 return;
641 if(! p)
642 {
643 ec = error::bad_value;
644 return;
645 }
646 // Look 1 char past the CRLF to handle obs-fold.
647 if(p + 1 > last)
648 {
649 ec = error::need_more;
650 return;
651 }
652 token_last =
653 trim_back(token_last, first);
654 if(*p != ' ' && *p != '\t')
655 {
656 value = make_string(first, token_last);
657 return;
658 }
659 ++p;
660 if(token_last != first)
661 break;
662 }
663 buf.resize(0);
664 buf.append(first, token_last);
665 BOOST_ASSERT(! buf.empty());
666 try
667 {
668 for(;;)
669 {
670 // eat leading ' ' and '\t'
671 for(;;++p)
672 {
673 if(p + 1 > last)
674 {
675 ec = error::need_more;
676 return;
677 }
678 if(! (*p == ' ' || *p == '\t'))
679 break;
680 }
681 // parse to CRLF
682 first = p;
683 p = parse_token_to_eol(p, last, token_last, ec);
684 if(ec)
685 return;
686 if(! p)
687 {
688 ec = error::bad_value;
689 return;
690 }
691 // Look 1 char past the CRLF to handle obs-fold.
692 if(p + 1 > last)
693 {
694 ec = error::need_more;
695 return;
696 }
697 token_last = trim_back(token_last, first);
698 if(first != token_last)
699 {
700 buf.push_back(' ');
701 buf.append(first, token_last);
702 }
703 if(*p != ' ' && *p != '\t')
704 {
705 value = {buf.data(), buf.size()};
706 return;
707 }
708 ++p;
709 }
710 }
711 catch(std::length_error const&)
712 {
713 ec = error::header_limit;
714 return;
715 }
716 }
717
718 void
719 parse_chunk_extensions(
720 char const*& it,
721 char const* last,
722 error_code& ec)
723 {
724 /*
725 chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
726 BWS = *( SP / HTAB ) ; "Bad White Space"
727 chunk-ext-name = token
728 chunk-ext-val = token / quoted-string
729 token = 1*tchar
730 quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
731 qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
732 quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
733 obs-text = %x80-FF
734
735 https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
736 */
737 loop:
738 if(it == last)
739 {
740 ec = error::need_more;
741 return;
742 }
743 if(*it != ' ' && *it != '\t' && *it != ';')
744 return;
745 // BWS
746 if(*it == ' ' || *it == '\t')
747 {
748 for(;;)
749 {
750 ++it;
751 if(it == last)
752 {
753 ec = error::need_more;
754 return;
755 }
756 if(*it != ' ' && *it != '\t')
757 break;
758 }
759 }
760 // ';'
761 if(*it != ';')
762 {
763 ec = error::bad_chunk_extension;
764 return;
765 }
766 semi:
767 ++it; // skip ';'
768 // BWS
769 for(;;)
770 {
771 if(it == last)
772 {
773 ec = error::need_more;
774 return;
775 }
776 if(*it != ' ' && *it != '\t')
777 break;
778 ++it;
779 }
780 // chunk-ext-name
781 if(! detail::is_token_char(*it))
782 {
783 ec = error::bad_chunk_extension;
784 return;
785 }
786 for(;;)
787 {
788 ++it;
789 if(it == last)
790 {
791 ec = error::need_more;
792 return;
793 }
794 if(! detail::is_token_char(*it))
795 break;
796 }
797 // BWS [ ";" / "=" ]
798 {
799 bool bws;
800 if(*it == ' ' || *it == '\t')
801 {
802 for(;;)
803 {
804 ++it;
805 if(it == last)
806 {
807 ec = error::need_more;
808 return;
809 }
810 if(*it != ' ' && *it != '\t')
811 break;
812 }
813 bws = true;
814 }
815 else
816 {
817 bws = false;
818 }
819 if(*it == ';')
820 goto semi;
821 if(*it != '=')
822 {
823 if(bws)
824 ec = error::bad_chunk_extension;
825 return;
826 }
827 ++it; // skip '='
828 }
829 // BWS
830 for(;;)
831 {
832 if(it == last)
833 {
834 ec = error::need_more;
835 return;
836 }
837 if(*it != ' ' && *it != '\t')
838 break;
839 ++it;
840 }
841 // chunk-ext-val
842 if(*it != '"')
843 {
844 // token
845 if(! detail::is_token_char(*it))
846 {
847 ec = error::bad_chunk_extension;
848 return;
849 }
850 for(;;)
851 {
852 ++it;
853 if(it == last)
854 {
855 ec = error::need_more;
856 return;
857 }
858 if(! detail::is_token_char(*it))
859 break;
860 }
861 }
862 else
863 {
864 // quoted-string
865 for(;;)
866 {
867 ++it;
868 if(it == last)
869 {
870 ec = error::need_more;
871 return;
872 }
873 if(*it == '"')
874 break;
875 if(*it == '\\')
876 {
877 ++it;
878 if(it == last)
879 {
880 ec = error::need_more;
881 return;
882 }
883 }
884 }
885 ++it;
886 }
887 goto loop;
888 }
889 };
890
891 } // detail
892 } // http
893 } // beast
894 } // boost
895
896 #endif