2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
10 #ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
11 #define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP
13 #include <boost/beast/http/detail/basic_parser.hpp>
23 trim_front(char const* it, char const* end)
27 if(*it != ' ' && *it != '\t')
37 char const* it, char const* first)
41 auto const c = it[-1];
42 if(c != ' ' && c != '\t')
53 // VFALCO This looks the same as the one below...
55 // TEXT = <any OCTET except CTLs, and excluding LWS>
56 static bool constexpr tab[256] = {
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
59 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
74 return tab[static_cast<unsigned char>(c)];
79 unhex(unsigned char& d, char c)
81 static signed char constexpr tab[256] = {
82 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0
83 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16
84 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32
85 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48
86 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64
87 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80
88 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96
89 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 112
91 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 128
92 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 144
93 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 160
94 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 176
95 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 192
96 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 208
97 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 224
98 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240
100 d = static_cast<unsigned char>(
101 tab[static_cast<unsigned char>(c)]);
102 return d != static_cast<unsigned char>(-1);
105 //--------------------------------------------------------------------------
107 std::pair<char const*, bool>
116 boost::ignore_unused(buf_end, ranges, ranges_size);
120 // VFALCO Can SIMD help this?
124 char const* it, char const* last,
143 ec = error::bad_line_ending;
149 // VFALCO Should we handle the legacy case
150 // for lines terminated with a single '\n'?
161 char const* it = s.data();
162 char const* last = it + s.size();
165 std::uint64_t tmp = 0;
168 if((! is_digit(*it)) ||
169 tmp > (std::numeric_limits<std::uint64_t>::max)() / 10)
172 std::uint64_t const d = *it - '0';
173 if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
184 parse_hex(char const*& it, std::uint64_t& v)
189 std::uint64_t tmp = 0;
192 if(tmp > (std::numeric_limits<std::uint64_t>::max)() / 16)
195 if((std::numeric_limits<std::uint64_t>::max)() - tmp < d)
199 while(unhex(d, *++it));
206 find_eom(char const* p, char const* last)
219 else if(p[2] != '\r')
223 else if(p[1] != '\n')
227 else if(p[0] != '\r')
238 //--------------------------------------------------------------------------
245 char const*& token_last,
252 ec = error::need_more;
255 if(BOOST_UNLIKELY(! is_print(*p)))
256 if((BOOST_LIKELY(static_cast<
257 unsigned char>(*p) < '\040') &&
258 BOOST_LIKELY(*p != 9)) ||
259 BOOST_UNLIKELY(*p == 127))
263 if(BOOST_LIKELY(*p == '\r'))
267 ec = error::need_more;
272 ec = error::bad_line_ending;
278 // VFALCO This allows `\n` by itself
279 // to terminate a line
296 parse_crlf(char const*& it)
298 if( it[0] != '\r' || it[1] != '\n')
307 char const*& it, char const* last,
308 string_view& result, error_code& ec)
311 auto const first = it;
316 ec = error::need_more;
319 if(! detail::is_token_char(*it))
324 ec = error::need_more;
329 ec = error::bad_method;
335 ec = error::bad_method;
338 result = make_string(first, it++);
344 char const*& it, char const* last,
345 string_view& result, error_code& ec)
348 auto const first = it;
353 ec = error::need_more;
356 if(! is_pathchar(*it))
361 ec = error::need_more;
366 ec = error::bad_target;
372 ec = error::bad_target;
375 result = make_string(first, it++);
381 char const*& it, char const* last,
382 int& result, error_code& ec)
386 ec = error::need_more;
391 ec = error::bad_version;
396 ec = error::bad_version;
401 ec = error::bad_version;
406 ec = error::bad_version;
411 ec = error::bad_version;
416 ec = error::bad_version;
419 result = 10 * (*it++ - '0');
422 ec = error::bad_version;
427 ec = error::bad_version;
430 result += *it++ - '0';
436 char const*& it, char const* last,
437 unsigned short& result, error_code& ec)
442 ec = error::need_more;
447 ec = error::bad_status;
450 result = 100 * (*it++ - '0');
453 ec = error::bad_status;
456 result += 10 * (*it++ - '0');
459 ec = error::bad_status;
462 result += *it++ - '0';
465 ec = error::bad_status;
473 char const*& it, char const* last,
474 string_view& result, error_code& ec)
476 auto const first = it;
477 char const* token_last = nullptr;
478 auto p = parse_token_to_eol(
479 it, last, token_last, ec);
484 ec = error::bad_reason;
487 result = make_string(first, token_last);
498 beast::detail::char_buffer<max_obs_fold>& buf,
501 /* header-field = field-name ":" OWS field-value OWS
504 field-value = *( field-content / obs-fold )
505 field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
506 field-vchar = VCHAR / obs-text
508 obs-fold = CRLF 1*( SP / HTAB )
509 ; obsolete line folding
512 token = 1*<any CHAR except CTLs or separators>
513 CHAR = <any US-ASCII character (octets 0 - 127)>
514 sep = "(" | ")" | "<" | ">" | "@"
515 | "," | ";" | ":" | "\" | <">
516 | "/" | "[" | "]" | "?" | "="
517 | "{" | "}" | SP | HT
519 static char const* is_token =
520 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
521 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
522 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
523 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
524 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
525 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
526 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
527 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
530 BOOST_ALIGNMENT(16) static const char ranges1[] =
531 "\x00 " /* control chars and up to SP */
538 "{\377"; /* 0x7b-0xff */
541 std::tie(p, found) = find_fast(
542 p, last, ranges1, sizeof(ranges1)-1);
543 if(! found && p >= last)
545 ec = error::need_more;
552 if(! is_token[static_cast<
555 ec = error::bad_field;
561 ec = error::need_more;
568 ec = error::bad_field;
571 name = make_string(first, p);
573 char const* token_last = nullptr;
576 // eat leading ' ' and '\t'
581 ec = error::need_more;
584 if(! (*p == ' ' || *p == '\t'))
589 p = parse_token_to_eol(p, last, token_last, ec);
594 ec = error::bad_value;
597 // Look 1 char past the CRLF to handle obs-fold.
600 ec = error::need_more;
604 trim_back(token_last, first);
605 if(*p != ' ' && *p != '\t')
607 value = make_string(first, token_last);
611 if(token_last != first)
615 if (!buf.try_append(first, token_last))
617 ec = error::header_limit;
621 BOOST_ASSERT(! buf.empty());
624 // eat leading ' ' and '\t'
629 ec = error::need_more;
632 if(! (*p == ' ' || *p == '\t'))
637 p = parse_token_to_eol(p, last, token_last, ec);
642 ec = error::bad_value;
645 // Look 1 char past the CRLF to handle obs-fold.
648 ec = error::need_more;
651 token_last = trim_back(token_last, first);
652 if(first != token_last)
654 if (!buf.try_push_back(' ') ||
655 !buf.try_append(first, token_last))
657 ec = error::header_limit;
661 if(*p != ' ' && *p != '\t')
663 value = {buf.data(), buf.size()};
673 parse_chunk_extensions(
679 chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
680 BWS = *( SP / HTAB ) ; "Bad White Space"
681 chunk-ext-name = token
682 chunk-ext-val = token / quoted-string
684 quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
685 qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
686 quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
689 https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
694 ec = error::need_more;
697 if(*it != ' ' && *it != '\t' && *it != ';')
700 if(*it == ' ' || *it == '\t')
707 ec = error::need_more;
710 if(*it != ' ' && *it != '\t')
717 ec = error::bad_chunk_extension;
727 ec = error::need_more;
730 if(*it != ' ' && *it != '\t')
735 if(! detail::is_token_char(*it))
737 ec = error::bad_chunk_extension;
745 ec = error::need_more;
748 if(! detail::is_token_char(*it))
754 if(*it == ' ' || *it == '\t')
761 ec = error::need_more;
764 if(*it != ' ' && *it != '\t')
778 ec = error::bad_chunk_extension;
788 ec = error::need_more;
791 if(*it != ' ' && *it != '\t')
799 if(! detail::is_token_char(*it))
801 ec = error::bad_chunk_extension;
809 ec = error::need_more;
812 if(! detail::is_token_char(*it))
824 ec = error::need_more;
834 ec = error::need_more;