// // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // // Official repository: https://github.com/boostorg/beast // #ifndef BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP #define BOOST_BEAST_HTTP_DETAIL_BASIC_PARSER_IPP #include #include namespace boost { namespace beast { namespace http { namespace detail { char const* basic_parser_base:: trim_front(char const* it, char const* end) { while(it != end) { if(*it != ' ' && *it != '\t') break; ++it; } return it; } char const* basic_parser_base:: trim_back( char const* it, char const* first) { while(it != first) { auto const c = it[-1]; if(c != ' ' && c != '\t') break; --it; } return it; } bool basic_parser_base:: is_pathchar(char c) { // VFALCO This looks the same as the one below... // TEXT = static bool constexpr tab[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240 }; return tab[static_cast(c)]; } bool basic_parser_base:: unhex(unsigned char& d, char c) { static signed char constexpr tab[256] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 16 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 32 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 48 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 80 -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 96 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 112 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 128 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 144 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 160 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 176 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 192 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 208 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 224 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 240 }; d = static_cast( tab[static_cast(c)]); return d != static_cast(-1); } //-------------------------------------------------------------------------- std::pair basic_parser_base:: find_fast( char const* buf, char const* buf_end, char const* ranges, size_t ranges_size) { bool found = false; boost::ignore_unused(buf_end, ranges, ranges_size); return {buf, found}; } // VFALCO Can SIMD help this? char const* basic_parser_base:: find_eol( char const* it, char const* last, error_code& ec) { for(;;) { if(it == last) { ec = {}; return nullptr; } if(*it == '\r') { if(++it == last) { ec = {}; return nullptr; } if(*it != '\n') { ec = error::bad_line_ending; return nullptr; } ec = {}; return ++it; } // VFALCO Should we handle the legacy case // for lines terminated with a single '\n'? ++it; } } bool basic_parser_base:: parse_dec( string_view s, std::uint64_t& v) { char const* it = s.data(); char const* last = it + s.size(); if(it == last) return false; std::uint64_t tmp = 0; do { if((! is_digit(*it)) || tmp > (std::numeric_limits::max)() / 10) return false; tmp *= 10; std::uint64_t const d = *it - '0'; if((std::numeric_limits::max)() - tmp < d) return false; tmp += d; } while(++it != last); v = tmp; return true; } bool basic_parser_base:: parse_hex(char const*& it, std::uint64_t& v) { unsigned char d; if(! unhex(d, *it)) return false; std::uint64_t tmp = 0; do { if(tmp > (std::numeric_limits::max)() / 16) return false; tmp *= 16; if((std::numeric_limits::max)() - tmp < d) return false; tmp += d; } while(unhex(d, *++it)); v = tmp; return true; } char const* basic_parser_base:: find_eom(char const* p, char const* last) { for(;;) { if(p + 4 > last) return nullptr; if(p[3] != '\n') { if(p[3] == '\r') ++p; else p += 4; } else if(p[2] != '\r') { p += 4; } else if(p[1] != '\n') { p += 2; } else if(p[0] != '\r') { p += 2; } else { return p + 4; } } } //-------------------------------------------------------------------------- char const* basic_parser_base:: parse_token_to_eol( char const* p, char const* last, char const*& token_last, error_code& ec) { for(;; ++p) { if(p >= last) { ec = error::need_more; return p; } if(BOOST_UNLIKELY(! is_print(*p))) if((BOOST_LIKELY(static_cast< unsigned char>(*p) < '\040') && BOOST_LIKELY(*p != 9)) || BOOST_UNLIKELY(*p == 127)) goto found_control; } found_control: if(BOOST_LIKELY(*p == '\r')) { if(++p >= last) { ec = error::need_more; return last; } if(*p++ != '\n') { ec = error::bad_line_ending; return last; } token_last = p - 2; } #if 0 // VFALCO This allows `\n` by itself // to terminate a line else if(*p == '\n') { token_last = p; ++p; } #endif else { // invalid character return nullptr; } return p; } bool basic_parser_base:: parse_crlf(char const*& it) { if( it[0] != '\r' || it[1] != '\n') return false; it += 2; return true; } void basic_parser_base:: parse_method( char const*& it, char const* last, string_view& result, error_code& ec) { // parse token SP auto const first = it; for(;; ++it) { if(it + 1 > last) { ec = error::need_more; return; } if(! detail::is_token_char(*it)) break; } if(it + 1 > last) { ec = error::need_more; return; } if(*it != ' ') { ec = error::bad_method; return; } if(it == first) { // cannot be empty ec = error::bad_method; return; } result = make_string(first, it++); } void basic_parser_base:: parse_target( char const*& it, char const* last, string_view& result, error_code& ec) { // parse target SP auto const first = it; for(;; ++it) { if(it + 1 > last) { ec = error::need_more; return; } if(! is_pathchar(*it)) break; } if(it + 1 > last) { ec = error::need_more; return; } if(*it != ' ') { ec = error::bad_target; return; } if(it == first) { // cannot be empty ec = error::bad_target; return; } result = make_string(first, it++); } void basic_parser_base:: parse_version( char const*& it, char const* last, int& result, error_code& ec) { if(it + 8 > last) { ec = error::need_more; return; } if(*it++ != 'H') { ec = error::bad_version; return; } if(*it++ != 'T') { ec = error::bad_version; return; } if(*it++ != 'T') { ec = error::bad_version; return; } if(*it++ != 'P') { ec = error::bad_version; return; } if(*it++ != '/') { ec = error::bad_version; return; } if(! is_digit(*it)) { ec = error::bad_version; return; } result = 10 * (*it++ - '0'); if(*it++ != '.') { ec = error::bad_version; return; } if(! is_digit(*it)) { ec = error::bad_version; return; } result += *it++ - '0'; } void basic_parser_base:: parse_status( char const*& it, char const* last, unsigned short& result, error_code& ec) { // parse 3(digit) SP if(it + 4 > last) { ec = error::need_more; return; } if(! is_digit(*it)) { ec = error::bad_status; return; } result = 100 * (*it++ - '0'); if(! is_digit(*it)) { ec = error::bad_status; return; } result += 10 * (*it++ - '0'); if(! is_digit(*it)) { ec = error::bad_status; return; } result += *it++ - '0'; if(*it++ != ' ') { ec = error::bad_status; return; } } void basic_parser_base:: parse_reason( char const*& it, char const* last, string_view& result, error_code& ec) { auto const first = it; char const* token_last = nullptr; auto p = parse_token_to_eol( it, last, token_last, ec); if(ec) return; if(! p) { ec = error::bad_reason; return; } result = make_string(first, token_last); it = p; } void basic_parser_base:: parse_field( char const*& p, char const* last, string_view& name, string_view& value, beast::detail::char_buffer& buf, error_code& ec) { /* header-field = field-name ":" OWS field-value OWS field-name = token field-value = *( field-content / obs-fold ) field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] field-vchar = VCHAR / obs-text obs-fold = CRLF 1*( SP / HTAB ) ; obsolete line folding ; see Section 3.2.4 token = 1* CHAR = sep = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT */ static char const* is_token = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // name BOOST_ALIGNMENT(16) static const char ranges1[] = "\x00 " /* control chars and up to SP */ "\"\"" /* 0x22 */ "()" /* 0x28,0x29 */ ",," /* 0x2c */ "//" /* 0x2f */ ":@" /* 0x3a-0x40 */ "[]" /* 0x5b-0x5d */ "{\377"; /* 0x7b-0xff */ auto first = p; bool found; std::tie(p, found) = find_fast( p, last, ranges1, sizeof(ranges1)-1); if(! found && p >= last) { ec = error::need_more; return; } for(;;) { if(*p == ':') break; if(! is_token[static_cast< unsigned char>(*p)]) { ec = error::bad_field; return; } ++p; if(p >= last) { ec = error::need_more; return; } } if(p == first) { // empty name ec = error::bad_field; return; } name = make_string(first, p); ++p; // eat ':' char const* token_last = nullptr; for(;;) { // eat leading ' ' and '\t' for(;;++p) { if(p + 1 > last) { ec = error::need_more; return; } if(! (*p == ' ' || *p == '\t')) break; } // parse to CRLF first = p; p = parse_token_to_eol(p, last, token_last, ec); if(ec) return; if(! p) { ec = error::bad_value; return; } // Look 1 char past the CRLF to handle obs-fold. if(p + 1 > last) { ec = error::need_more; return; } token_last = trim_back(token_last, first); if(*p != ' ' && *p != '\t') { value = make_string(first, token_last); return; } ++p; if(token_last != first) break; } buf.clear(); if (!buf.try_append(first, token_last)) { ec = error::header_limit; return; } BOOST_ASSERT(! buf.empty()); for(;;) { // eat leading ' ' and '\t' for(;;++p) { if(p + 1 > last) { ec = error::need_more; return; } if(! (*p == ' ' || *p == '\t')) break; } // parse to CRLF first = p; p = parse_token_to_eol(p, last, token_last, ec); if(ec) return; if(! p) { ec = error::bad_value; return; } // Look 1 char past the CRLF to handle obs-fold. if(p + 1 > last) { ec = error::need_more; return; } token_last = trim_back(token_last, first); if(first != token_last) { if (!buf.try_push_back(' ') || !buf.try_append(first, token_last)) { ec = error::header_limit; return; } } if(*p != ' ' && *p != '\t') { value = {buf.data(), buf.size()}; return; } ++p; } } void basic_parser_base:: parse_chunk_extensions( char const*& it, char const* last, error_code& ec) { /* chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) BWS = *( SP / HTAB ) ; "Bad White Space" chunk-ext-name = token chunk-ext-val = token / quoted-string token = 1*tchar quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) obs-text = %x80-FF https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667 */ loop: if(it == last) { ec = error::need_more; return; } if(*it != ' ' && *it != '\t' && *it != ';') return; // BWS if(*it == ' ' || *it == '\t') { for(;;) { ++it; if(it == last) { ec = error::need_more; return; } if(*it != ' ' && *it != '\t') break; } } // ';' if(*it != ';') { ec = error::bad_chunk_extension; return; } semi: ++it; // skip ';' // BWS for(;;) { if(it == last) { ec = error::need_more; return; } if(*it != ' ' && *it != '\t') break; ++it; } // chunk-ext-name if(! detail::is_token_char(*it)) { ec = error::bad_chunk_extension; return; } for(;;) { ++it; if(it == last) { ec = error::need_more; return; } if(! detail::is_token_char(*it)) break; } // BWS [ ";" / "=" ] { bool bws; if(*it == ' ' || *it == '\t') { for(;;) { ++it; if(it == last) { ec = error::need_more; return; } if(*it != ' ' && *it != '\t') break; } bws = true; } else { bws = false; } if(*it == ';') goto semi; if(*it != '=') { if(bws) ec = error::bad_chunk_extension; return; } ++it; // skip '=' } // BWS for(;;) { if(it == last) { ec = error::need_more; return; } if(*it != ' ' && *it != '\t') break; ++it; } // chunk-ext-val if(*it != '"') { // token if(! detail::is_token_char(*it)) { ec = error::bad_chunk_extension; return; } for(;;) { ++it; if(it == last) { ec = error::need_more; return; } if(! detail::is_token_char(*it)) break; } } else { // quoted-string for(;;) { ++it; if(it == last) { ec = error::need_more; return; } if(*it == '"') break; if(*it == '\\') { ++it; if(it == last) { ec = error::need_more; return; } } } ++it; } goto loop; } } // detail } // http } // beast } // boost #endif