2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
10 #ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_HPP
13 #include <boost/beast/core/static_string.hpp>
14 #include <boost/beast/core/string.hpp>
15 #include <boost/beast/core/detail/cpu_info.hpp>
16 #include <boost/beast/http/error.hpp>
17 #include <boost/beast/http/detail/rfc7230.hpp>
18 #include <boost/config.hpp>
19 #include <boost/version.hpp>
29 class basic_parser_base
32 // limit on the size of the obs-fold buffer
34 // https://stackoverflow.com/questions/686217/maximum-on-http-header-values
36 static std::size_t constexpr max_obs_fold = 4096;
57 // VFALCO This looks the same as the one below...
59 // TEXT = <any OCTET except CTLs, and excluding LWS>
60 static bool constexpr tab[256] = {
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
63 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
78 return tab[static_cast<unsigned char>(c)];
84 unhex(unsigned char& d, char c)
86 static signed char constexpr tab[256] = {
87 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0
88 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16
89 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32
90 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48
91 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64
92 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80
93 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96
94 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 112
96 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 128
97 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 144
98 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 160
99 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 176
100 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 192
101 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 208
102 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 224
103 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240
105 d = static_cast<unsigned char>(
106 tab[static_cast<unsigned char>(c)]);
107 return d != static_cast<unsigned char>(-1);
114 return static_cast<unsigned char>(c-'0') < 10;
121 return static_cast<unsigned char>(c-32) < 95;
124 template<class FwdIt>
127 trim_front(FwdIt it, FwdIt const& end)
131 if(*it != ' ' && *it != '\t')
138 template<class RanIt>
142 RanIt it, RanIt const& first)
146 auto const c = it[-1];
147 if(c != ' ' && c != '\t')
156 make_string(char const* first, char const* last)
158 return {first, static_cast<
159 std::size_t>(last - first)};
162 //--------------------------------------------------------------------------
165 std::pair<char const*, bool>
173 boost::ignore_unused(buf_end, ranges, ranges_size);
177 // VFALCO Can SIMD help this?
181 char const* it, char const* last,
188 ec.assign(0, ec.category());
195 ec.assign(0, ec.category());
200 ec = error::bad_line_ending;
203 ec.assign(0, ec.category());
206 // VFALCO Should we handle the legacy case
207 // for lines terminated with a single '\n'?
214 find_eom(char const* p, char const* last)
227 else if(p[2] != '\r')
231 else if(p[1] != '\n')
235 else if(p[0] != '\r')
246 //--------------------------------------------------------------------------
253 char const*& token_last,
260 ec = error::need_more;
263 if(BOOST_UNLIKELY(! is_print(*p)))
264 if((BOOST_LIKELY(static_cast<
265 unsigned char>(*p) < '\040') &&
266 BOOST_LIKELY(*p != '\011')) ||
267 BOOST_UNLIKELY(*p == '\177'))
271 if(BOOST_LIKELY(*p == '\r'))
275 ec = error::need_more;
280 ec = error::bad_line_ending;
286 // VFALCO This allows `\n` by itself
287 // to terminate a line
302 template<class Iter, class Unsigned>
305 parse_dec(Iter it, Iter last, Unsigned& v)
312 if(! is_digit(*++it))
314 auto const d = *it - '0';
315 if(v > ((std::numeric_limits<
316 Unsigned>::max)() - 10) / 10)
323 template<class Iter, class Unsigned>
326 parse_hex(Iter& it, Unsigned& v)
334 if(! unhex(d, *++it))
346 parse_crlf(char const*& it)
348 if( it[0] != '\r' || it[1] != '\n')
357 char const*& it, char const* last,
358 string_view& result, error_code& ec)
361 auto const first = it;
366 ec = error::need_more;
369 if(! detail::is_token_char(*it))
374 ec = error::need_more;
379 ec = error::bad_method;
385 ec = error::bad_method;
388 result = make_string(first, it++);
394 char const*& it, char const* last,
395 string_view& result, error_code& ec)
398 auto const first = it;
403 ec = error::need_more;
406 if(! is_pathchar(*it))
411 ec = error::need_more;
416 ec = error::bad_target;
422 ec = error::bad_target;
425 result = make_string(first, it++);
431 char const*& it, char const* last,
432 int& result, error_code& ec)
436 ec = error::need_more;
441 ec = error::bad_version;
446 ec = error::bad_version;
451 ec = error::bad_version;
456 ec = error::bad_version;
461 ec = error::bad_version;
466 ec = error::bad_version;
469 result = 10 * (*it++ - '0');
472 ec = error::bad_version;
477 ec = error::bad_version;
480 result += *it++ - '0';
486 char const*& it, char const* last,
487 unsigned short& result, error_code& ec)
492 ec = error::need_more;
497 ec = error::bad_status;
500 result = 100 * (*it++ - '0');
503 ec = error::bad_status;
506 result += 10 * (*it++ - '0');
509 ec = error::bad_status;
512 result += *it++ - '0';
515 ec = error::bad_status;
522 char const*& it, char const* last,
523 string_view& result, error_code& ec)
525 auto const first = it;
526 char const* token_last = nullptr;
527 auto p = parse_token_to_eol(
528 it, last, token_last, ec);
533 ec = error::bad_reason;
536 result = make_string(first, token_last);
540 template<std::size_t N>
547 static_string<N>& buf,
550 /* header-field = field-name ":" OWS field-value OWS
553 field-value = *( field-content / obs-fold )
554 field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
555 field-vchar = VCHAR / obs-text
557 obs-fold = CRLF 1*( SP / HTAB )
558 ; obsolete line folding
561 token = 1*<any CHAR except CTLs or separators>
562 CHAR = <any US-ASCII character (octets 0 - 127)>
563 sep = "(" | ")" | "<" | ">" | "@"
564 | "," | ";" | ":" | "\" | <">
565 | "/" | "[" | "]" | "?" | "="
566 | "{" | "}" | SP | HT
568 static char const* is_token =
569 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
570 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
571 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
572 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
573 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
574 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
575 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
576 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
579 BOOST_ALIGNMENT(16) static const char ranges1[] =
580 "\x00 " /* control chars and up to SP */
587 "{\377"; /* 0x7b-0xff */
590 std::tie(p, found) = find_fast(
591 p, last, ranges1, sizeof(ranges1)-1);
592 if(! found && p >= last)
594 ec = error::need_more;
601 if(! is_token[static_cast<
604 ec = error::bad_field;
610 ec = error::need_more;
617 ec = error::bad_field;
620 name = make_string(first, p);
622 char const* token_last = nullptr;
625 // eat leading ' ' and '\t'
630 ec = error::need_more;
633 if(! (*p == ' ' || *p == '\t'))
638 p = parse_token_to_eol(p, last, token_last, ec);
643 ec = error::bad_value;
646 // Look 1 char past the CRLF to handle obs-fold.
649 ec = error::need_more;
653 trim_back(token_last, first);
654 if(*p != ' ' && *p != '\t')
656 value = make_string(first, token_last);
660 if(token_last != first)
664 buf.append(first, token_last);
665 BOOST_ASSERT(! buf.empty());
670 // eat leading ' ' and '\t'
675 ec = error::need_more;
678 if(! (*p == ' ' || *p == '\t'))
683 p = parse_token_to_eol(p, last, token_last, ec);
688 ec = error::bad_value;
691 // Look 1 char past the CRLF to handle obs-fold.
694 ec = error::need_more;
697 token_last = trim_back(token_last, first);
698 if(first != token_last)
701 buf.append(first, token_last);
703 if(*p != ' ' && *p != '\t')
705 value = {buf.data(), buf.size()};
711 catch(std::length_error const&)
713 ec = error::header_limit;
719 parse_chunk_extensions(
725 chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
726 BWS = *( SP / HTAB ) ; "Bad White Space"
727 chunk-ext-name = token
728 chunk-ext-val = token / quoted-string
730 quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
731 qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text
732 quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
735 https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667
740 ec = error::need_more;
743 if(*it != ' ' && *it != '\t' && *it != ';')
746 if(*it == ' ' || *it == '\t')
753 ec = error::need_more;
756 if(*it != ' ' && *it != '\t')
763 ec = error::bad_chunk_extension;
773 ec = error::need_more;
776 if(*it != ' ' && *it != '\t')
781 if(! detail::is_token_char(*it))
783 ec = error::bad_chunk_extension;
791 ec = error::need_more;
794 if(! detail::is_token_char(*it))
800 if(*it == ' ' || *it == '\t')
807 ec = error::need_more;
810 if(*it != ' ' && *it != '\t')
824 ec = error::bad_chunk_extension;
834 ec = error::need_more;
837 if(*it != ' ' && *it != '\t')
845 if(! detail::is_token_char(*it))
847 ec = error::bad_chunk_extension;
855 ec = error::need_more;
858 if(! detail::is_token_char(*it))
870 ec = error::need_more;
880 ec = error::need_more;