]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/beast/http/detail/basic_parser.ipp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / beast / http / detail / basic_parser.ipp
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9
10 #ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
11 #define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
12
13 #include <boost/beast/http/detail/basic_parser.hpp>
14 #include <limits>
15
16 namespace boost {
17 namespace beast {
18 namespace http {
19 namespace detail {
20
21 char const*
22 basic_parser_base::
23 trim_front(char const* it, char const* end)
24 {
25 while(it != end)
26 {
27 if(*it != ' ' && *it != '\t')
28 break;
29 ++it;
30 }
31 return it;
32 }
33
34 char const*
35 basic_parser_base::
36 trim_back(
37 char const* it, char const* first)
38 {
39 while(it != first)
40 {
41 auto const c = it[-1];
42 if(c != ' ' && c != '\t')
43 break;
44 --it;
45 }
46 return it;
47 }
48
49 bool
50 basic_parser_base::
51 is_pathchar(char c)
52 {
53 // VFALCO This looks the same as the one below...
54
55 // TEXT = <any OCTET except CTLs, and excluding LWS>
56 static bool constexpr tab[256] = {
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
59 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
73 };
74 return tab[static_cast<unsigned char>(c)];
75 }
76
77 bool
78 basic_parser_base::
79 unhex(unsigned char& d, char c)
80 {
81 static signed char constexpr tab[256] = {
82 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0
83 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16
84 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32
85 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48
86 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64
87 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80
88 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96
89 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 112
90
91 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 128
92 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 144
93 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 160
94 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 176
95 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 192
96 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 208
97 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 224
98 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240
99 };
100 d = static_cast<unsigned char>(
101 tab[static_cast<unsigned char>(c)]);
102 return d != static_cast<unsigned char>(-1);
103 }
104
105 //--------------------------------------------------------------------------
106
107 std::pair<char const*, bool>
108 basic_parser_base::
109 find_fast(
110 char const* buf,
111 char const* buf_end,
112 char const* ranges,
113 size_t ranges_size)
114 {
115 bool found = false;
116 boost::ignore_unused(buf_end, ranges, ranges_size);
117 return {buf, found};
118 }
119
120 // VFALCO Can SIMD help this?
121 char const*
122 basic_parser_base::
123 find_eol(
124 char const* it, char const* last,
125 error_code& ec)
126 {
127 for(;;)
128 {
129 if(it == last)
130 {
131 ec = {};
132 return nullptr;
133 }
134 if(*it == '\r')
135 {
136 if(++it == last)
137 {
138 ec = {};
139 return nullptr;
140 }
141 if(*it != '\n')
142 {
143 ec = error::bad_line_ending;
144 return nullptr;
145 }
146 ec = {};
147 return ++it;
148 }
149 // VFALCO Should we handle the legacy case
150 // for lines terminated with a single '\n'?
151 ++it;
152 }
153 }
154
155 bool
156 basic_parser_base::
157 parse_dec(
158 string_view s,
159 std::uint64_t& v)
160 {
161 char const* it = s.data();
162 char const* last = it + s.size();
163 if(it == last)
164 return false;
165 std::uint64_t tmp = 0;
166 do
167 {
168 if((! is_digit(*it)) ||
169 tmp > (std::numeric_limits<std::uint64_t>::max)() / 10)
170 return false;
171 tmp *= 10;
172 std::uint64_t const d = *it - '0';
173 if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
174 return false;
175 tmp += d;
176 }
177 while(++it != last);
178 v = tmp;
179 return true;
180 }
181
182 bool
183 basic_parser_base::
184 parse_hex(char const*& it, std::uint64_t& v)
185 {
186 unsigned char d;
187 if(! unhex(d, *it))
188 return false;
189 std::uint64_t tmp = 0;
190 do
191 {
192 if(tmp > (std::numeric_limits<std::uint64_t>::max)() / 16)
193 return false;
194 tmp *= 16;
195 if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
196 return false;
197 tmp += d;
198 }
199 while(unhex(d, *++it));
200 v = tmp;
201 return true;
202 }
203
204 char const*
205 basic_parser_base::
206 find_eom(char const* p, char const* last)
207 {
208 for(;;)
209 {
210 if(p + 4 > last)
211 return nullptr;
212 if(p[3] != '\n')
213 {
214 if(p[3] == '\r')
215 ++p;
216 else
217 p += 4;
218 }
219 else if(p[2] != '\r')
220 {
221 p += 4;
222 }
223 else if(p[1] != '\n')
224 {
225 p += 2;
226 }
227 else if(p[0] != '\r')
228 {
229 p += 2;
230 }
231 else
232 {
233 return p + 4;
234 }
235 }
236 }
237
238 //--------------------------------------------------------------------------
239
240 char const*
241 basic_parser_base::
242 parse_token_to_eol(
243 char const* p,
244 char const* last,
245 char const*& token_last,
246 error_code& ec)
247 {
248 for(;; ++p)
249 {
250 if(p >= last)
251 {
252 ec = error::need_more;
253 return p;
254 }
255 if(BOOST_UNLIKELY(! is_print(*p)))
256 if((BOOST_LIKELY(static_cast<
257 unsigned char>(*p) < '\040') &&
258 BOOST_LIKELY(*p != 9)) ||
259 BOOST_UNLIKELY(*p == 127))
260 goto found_control;
261 }
262 found_control:
263 if(BOOST_LIKELY(*p == '\r'))
264 {
265 if(++p >= last)
266 {
267 ec = error::need_more;
268 return last;
269 }
270 if(*p++ != '\n')
271 {
272 ec = error::bad_line_ending;
273 return last;
274 }
275 token_last = p - 2;
276 }
277 #if 0
278 // VFALCO This allows `\n` by itself
279 // to terminate a line
280 else if(*p == '\n')
281 {
282 token_last = p;
283 ++p;
284 }
285 #endif
286 else
287 {
288 // invalid character
289 return nullptr;
290 }
291 return p;
292 }
293
294 bool
295 basic_parser_base::
296 parse_crlf(char const*& it)
297 {
298 if( it[0] != '\r' || it[1] != '\n')
299 return false;
300 it += 2;
301 return true;
302 }
303
304 void
305 basic_parser_base::
306 parse_method(
307 char const*& it, char const* last,
308 string_view& result, error_code& ec)
309 {
310 // parse token SP
311 auto const first = it;
312 for(;; ++it)
313 {
314 if(it + 1 > last)
315 {
316 ec = error::need_more;
317 return;
318 }
319 if(! detail::is_token_char(*it))
320 break;
321 }
322 if(it + 1 > last)
323 {
324 ec = error::need_more;
325 return;
326 }
327 if(*it != ' ')
328 {
329 ec = error::bad_method;
330 return;
331 }
332 if(it == first)
333 {
334 // cannot be empty
335 ec = error::bad_method;
336 return;
337 }
338 result = make_string(first, it++);
339 }
340
341 void
342 basic_parser_base::
343 parse_target(
344 char const*& it, char const* last,
345 string_view& result, error_code& ec)
346 {
347 // parse target SP
348 auto const first = it;
349 for(;; ++it)
350 {
351 if(it + 1 > last)
352 {
353 ec = error::need_more;
354 return;
355 }
356 if(! is_pathchar(*it))
357 break;
358 }
359 if(it + 1 > last)
360 {
361 ec = error::need_more;
362 return;
363 }
364 if(*it != ' ')
365 {
366 ec = error::bad_target;
367 return;
368 }
369 if(it == first)
370 {
371 // cannot be empty
372 ec = error::bad_target;
373 return;
374 }
375 result = make_string(first, it++);
376 }
377
378 void
379 basic_parser_base::
380 parse_version(
381 char const*& it, char const* last,
382 int& result, error_code& ec)
383 {
384 if(it + 8 > last)
385 {
386 ec = error::need_more;
387 return;
388 }
389 if(*it++ != 'H')
390 {
391 ec = error::bad_version;
392 return;
393 }
394 if(*it++ != 'T')
395 {
396 ec = error::bad_version;
397 return;
398 }
399 if(*it++ != 'T')
400 {
401 ec = error::bad_version;
402 return;
403 }
404 if(*it++ != 'P')
405 {
406 ec = error::bad_version;
407 return;
408 }
409 if(*it++ != '/')
410 {
411 ec = error::bad_version;
412 return;
413 }
414 if(! is_digit(*it))
415 {
416 ec = error::bad_version;
417 return;
418 }
419 result = 10 * (*it++ - '0');
420 if(*it++ != '.')
421 {
422 ec = error::bad_version;
423 return;
424 }
425 if(! is_digit(*it))
426 {
427 ec = error::bad_version;
428 return;
429 }
430 result += *it++ - '0';
431 }
432
433 void
434 basic_parser_base::
435 parse_status(
436 char const*& it, char const* last,
437 unsigned short& result, error_code& ec)
438 {
439 // parse 3(digit) SP
440 if(it + 4 > last)
441 {
442 ec = error::need_more;
443 return;
444 }
445 if(! is_digit(*it))
446 {
447 ec = error::bad_status;
448 return;
449 }
450 result = 100 * (*it++ - '0');
451 if(! is_digit(*it))
452 {
453 ec = error::bad_status;
454 return;
455 }
456 result += 10 * (*it++ - '0');
457 if(! is_digit(*it))
458 {
459 ec = error::bad_status;
460 return;
461 }
462 result += *it++ - '0';
463 if(*it++ != ' ')
464 {
465 ec = error::bad_status;
466 return;
467 }
468 }
469
470 void
471 basic_parser_base::
472 parse_reason(
473 char const*& it, char const* last,
474 string_view& result, error_code& ec)
475 {
476 auto const first = it;
477 char const* token_last = nullptr;
478 auto p = parse_token_to_eol(
479 it, last, token_last, ec);
480 if(ec)
481 return;
482 if(! p)
483 {
484 ec = error::bad_reason;
485 return;
486 }
487 result = make_string(first, token_last);
488 it = p;
489 }
490
491 void
492 basic_parser_base::
493 parse_field(
494 char const*& p,
495 char const* last,
496 string_view& name,
497 string_view& value,
498 beast::detail::char_buffer<max_obs_fold>& buf,
499 error_code& ec)
500 {
501 /* header-field = field-name ":" OWS field-value OWS
502
503 field-name = token
504 field-value = *( field-content / obs-fold )
505 field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
506 field-vchar = VCHAR / obs-text
507
508 obs-fold = CRLF 1*( SP / HTAB )
509 ; obsolete line folding
510 ; see Section 3.2.4
511
512 token = 1*<any CHAR except CTLs or separators>
513 CHAR = <any US-ASCII character (octets 0 - 127)>
514 sep = "(" | ")" | "<" | ">" | "@"
515 | "," | ";" | ":" | "\" | <">
516 | "/" | "[" | "]" | "?" | "="
517 | "{" | "}" | SP | HT
518 */
519 static char const* is_token =
520 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
521 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
522 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
523 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
524 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
525 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
526 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
527 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
528
529 // name
530 BOOST_ALIGNMENT(16) static const char ranges1[] =
531 "\x00 " /* control chars and up to SP */
532 "\"\"" /* 0x22 */
533 "()" /* 0x28,0x29 */
534 ",," /* 0x2c */
535 "//" /* 0x2f */
536 ":@" /* 0x3a-0x40 */
537 "[]" /* 0x5b-0x5d */
538 "{\377"; /* 0x7b-0xff */
539 auto first = p;
540 bool found;
541 std::tie(p, found) = find_fast(
542 p, last, ranges1, sizeof(ranges1)-1);
543 if(! found && p >= last)
544 {
545 ec = error::need_more;
546 return;
547 }
548 for(;;)
549 {
550 if(*p == ':')
551 break;
552 if(! is_token[static_cast<
553 unsigned char>(*p)])
554 {
555 ec = error::bad_field;
556 return;
557 }
558 ++p;
559 if(p >= last)
560 {
561 ec = error::need_more;
562 return;
563 }
564 }
565 if(p == first)
566 {
567 // empty name
568 ec = error::bad_field;
569 return;
570 }
571 name = make_string(first, p);
572 ++p; // eat ':'
573 char const* token_last = nullptr;
574 for(;;)
575 {
576 // eat leading ' ' and '\t'
577 for(;;++p)
578 {
579 if(p + 1 > last)
580 {
581 ec = error::need_more;
582 return;
583 }
584 if(! (*p == ' ' || *p == '\t'))
585 break;
586 }
587 // parse to CRLF
588 first = p;
589 p = parse_token_to_eol(p, last, token_last, ec);
590 if(ec)
591 return;
592 if(! p)
593 {
594 ec = error::bad_value;
595 return;
596 }
597 // Look 1 char past the CRLF to handle obs-fold.
598 if(p + 1 > last)
599 {
600 ec = error::need_more;
601 return;
602 }
603 token_last =
604 trim_back(token_last, first);
605 if(*p != ' ' && *p != '\t')
606 {
607 value = make_string(first, token_last);
608 return;
609 }
610 ++p;
611 if(token_last != first)
612 break;
613 }
614 buf.clear();
615 if (!buf.try_append(first, token_last))
616 {
617 ec = error::header_limit;
618 return;
619 }
620
621 BOOST_ASSERT(! buf.empty());
622 for(;;)
623 {
624 // eat leading ' ' and '\t'
625 for(;;++p)
626 {
627 if(p + 1 > last)
628 {
629 ec = error::need_more;
630 return;
631 }
632 if(! (*p == ' ' || *p == '\t'))
633 break;
634 }
635 // parse to CRLF
636 first = p;
637 p = parse_token_to_eol(p, last, token_last, ec);
638 if(ec)
639 return;
640 if(! p)
641 {
642 ec = error::bad_value;
643 return;
644 }
645 // Look 1 char past the CRLF to handle obs-fold.
646 if(p + 1 > last)
647 {
648 ec = error::need_more;
649 return;
650 }
651 token_last = trim_back(token_last, first);
652 if(first != token_last)
653 {
654 if (!buf.try_push_back(' ') ||
655 !buf.try_append(first, token_last))
656 {
657 ec = error::header_limit;
658 return;
659 }
660 }
661 if(*p != ' ' && *p != '\t')
662 {
663 value = {buf.data(), buf.size()};
664 return;
665 }
666 ++p;
667 }
668 }
669
670
671 void
672 basic_parser_base::
673 parse_chunk_extensions(
674 char const*& it,
675 char const* last,
676 error_code& ec)
677 {
678 /*
679 chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
680 BWS = *( SP / HTAB ) ; "Bad White Space"
681 chunk-ext-name = token
682 chunk-ext-val = token / quoted-string
683 token = 1*tchar
684 quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
685 qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
686 quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
687 obs-text = %x80-FF
688
689 https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
690 */
691 loop:
692 if(it == last)
693 {
694 ec = error::need_more;
695 return;
696 }
697 if(*it != ' ' && *it != '\t' && *it != ';')
698 return;
699 // BWS
700 if(*it == ' ' || *it == '\t')
701 {
702 for(;;)
703 {
704 ++it;
705 if(it == last)
706 {
707 ec = error::need_more;
708 return;
709 }
710 if(*it != ' ' && *it != '\t')
711 break;
712 }
713 }
714 // ';'
715 if(*it != ';')
716 {
717 ec = error::bad_chunk_extension;
718 return;
719 }
720 semi:
721 ++it; // skip ';'
722 // BWS
723 for(;;)
724 {
725 if(it == last)
726 {
727 ec = error::need_more;
728 return;
729 }
730 if(*it != ' ' && *it != '\t')
731 break;
732 ++it;
733 }
734 // chunk-ext-name
735 if(! detail::is_token_char(*it))
736 {
737 ec = error::bad_chunk_extension;
738 return;
739 }
740 for(;;)
741 {
742 ++it;
743 if(it == last)
744 {
745 ec = error::need_more;
746 return;
747 }
748 if(! detail::is_token_char(*it))
749 break;
750 }
751 // BWS [ ";" / "=" ]
752 {
753 bool bws;
754 if(*it == ' ' || *it == '\t')
755 {
756 for(;;)
757 {
758 ++it;
759 if(it == last)
760 {
761 ec = error::need_more;
762 return;
763 }
764 if(*it != ' ' && *it != '\t')
765 break;
766 }
767 bws = true;
768 }
769 else
770 {
771 bws = false;
772 }
773 if(*it == ';')
774 goto semi;
775 if(*it != '=')
776 {
777 if(bws)
778 ec = error::bad_chunk_extension;
779 return;
780 }
781 ++it; // skip '='
782 }
783 // BWS
784 for(;;)
785 {
786 if(it == last)
787 {
788 ec = error::need_more;
789 return;
790 }
791 if(*it != ' ' && *it != '\t')
792 break;
793 ++it;
794 }
795 // chunk-ext-val
796 if(*it != '"')
797 {
798 // token
799 if(! detail::is_token_char(*it))
800 {
801 ec = error::bad_chunk_extension;
802 return;
803 }
804 for(;;)
805 {
806 ++it;
807 if(it == last)
808 {
809 ec = error::need_more;
810 return;
811 }
812 if(! detail::is_token_char(*it))
813 break;
814 }
815 }
816 else
817 {
818 // quoted-string
819 for(;;)
820 {
821 ++it;
822 if(it == last)
823 {
824 ec = error::need_more;
825 return;
826 }
827 if(*it == '"')
828 break;
829 if(*it == '\\')
830 {
831 ++it;
832 if(it == last)
833 {
834 ec = error::need_more;
835 return;
836 }
837 }
838 }
839 ++it;
840 }
841 goto loop;
842 }
843
844 } // detail
845 } // http
846 } // beast
847 } // boost
848
849 #endif