2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
13 #include <boost/beast/core/detail/config.hpp>
14 #include <boost/beast/core/error.hpp>
15 #include <boost/beast/core/string.hpp>
16 #include <boost/beast/http/field.hpp>
17 #include <boost/beast/http/verb.hpp>
18 #include <boost/beast/http/detail/basic_parser.hpp>
19 #include <boost/asio/buffer.hpp>
20 #include <boost/optional.hpp>
21 #include <boost/assert.hpp>
24 #include <type_traits>
31 /** A parser for decoding HTTP/1 wire format messages.
33 This parser is designed to efficiently parse messages in the
34 HTTP/1 wire format. It allocates no memory when input is
35 presented as a single contiguous buffer, and uses minimal
36 state. It will handle chunked encoding and it understands
37 the semantics of the Connection, Content-Length, and Upgrade
39 The parser is optimized for the case where the input buffer
40 sequence consists of a single contiguous buffer. The
41 @ref flat_buffer class is provided, which guarantees
42 that the input sequence of the stream buffer will be represented
43 by exactly one contiguous buffer. To ensure the optimum performance
44 of the parser, use @ref flat_buffer with HTTP algorithms
45 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
46 Alternatively, the caller may use custom techniques to ensure that
47 the structured portion of the HTTP message (header or chunk header)
48 is contained in a linear buffer.
50 The interface uses CRTP (Curiously Recurring Template Pattern).
51 To use this class directly, derive from @ref basic_parser. When
52 bytes are presented, the implementation will make a series of zero
53 or more calls to derived class members functions (termed "callbacks"
54 in this context) matching a specific signature.
56 Every callback must be provided by the derived class, or else
57 a compilation error will be generated. This exemplar shows
58 the signature and description of the callbacks required in
60 For each callback, the function will ensure that `!ec` is `true`
61 if there was no error or set to the appropriate error code if
62 there was one. If an error is set, the value is propagated to
63 the caller of the parser.
65 @par Derived Class Requirements
67 template<bool isRequest>
69 : public basic_parser<isRequest, derived<isRequest>>
72 // The friend declaration is needed,
73 // otherwise the callbacks must be made public.
74 friend class basic_parser<isRequest, derived>;
76 /// Called after receiving the request-line (isRequest == true).
79 verb method, // The method verb, verb::unknown if no match
80 string_view method_str, // The method as a string
81 string_view target, // The request-target
82 int version, // The HTTP-version
83 error_code& ec); // The error returned to the caller, if any
85 /// Called after receiving the start-line (isRequest == false).
88 int code, // The status-code
89 string_view reason, // The obsolete reason-phrase
90 int version, // The HTTP-version
91 error_code& ec); // The error returned to the caller, if any
93 /// Called after receiving a header field.
96 field f, // The known-field enumeration constant
97 string_view name, // The field name string.
98 string_view value, // The field value
99 error_code& ec); // The error returned to the caller, if any
101 /// Called after the complete header is received.
104 error_code& ec); // The error returned to the caller, if any
106 /// Called just before processing the body, if a body exists.
110 std::uint64_t> const&
111 content_length, // Content length if known, else `boost::none`
112 error_code& ec); // The error returned to the caller, if any
114 /// Called for each piece of the body, if a body exists.
116 //! This is used when there is no chunked transfer coding.
118 //! The function returns the number of bytes consumed from the
119 //! input buffer. Any input octets not consumed will be will be
120 //! presented on subsequent calls.
124 string_view s, // A portion of the body
125 error_code& ec); // The error returned to the caller, if any
127 /// Called for each chunk header.
129 on_chunk_header_impl(
130 std::uint64_t size, // The size of the upcoming chunk,
131 // or zero for the last chunk
132 string_view extension, // The chunk extensions (may be empty)
133 error_code& ec); // The error returned to the caller, if any
135 /// Called to deliver the chunk body.
137 //! This is used when there is a chunked transfer coding. The
138 //! implementation will automatically remove the encoding before
139 //! calling this function.
141 //! The function returns the number of bytes consumed from the
142 //! input buffer. Any input octets not consumed will be will be
143 //! presented on subsequent calls.
147 std::uint64_t remain, // The number of bytes remaining in the chunk,
148 // including what is being passed here.
149 // or zero for the last chunk
150 string_view body, // The next piece of the chunk body
151 error_code& ec); // The error returned to the caller, if any
153 /// Called when the complete message is parsed.
155 on_finish_impl(error_code& ec);
162 @tparam isRequest A `bool` indicating whether the parser will be
163 presented with request or response message.
165 @tparam Derived The derived class type. This is part of the
166 Curiously Recurring Template Pattern interface.
168 @note If the parser encounters a field value with obs-fold
169 longer than 4 kilobytes in length, an error is generated.
171 template<bool isRequest, class Derived>
173 : private detail::basic_parser_base
175 template<bool OtherIsRequest, class OtherDerived>
176 friend class basic_parser;
178 // limit on the size of the stack flat buffer
179 static std::size_t constexpr max_stack_buffer = 8192;
181 // Message will be complete after reading header
182 static unsigned constexpr flagSkipBody = 1<< 0;
184 // Consume input buffers across semantic boundaries
185 static unsigned constexpr flagEager = 1<< 1;
187 // The parser has read at least one byte
188 static unsigned constexpr flagGotSome = 1<< 2;
190 // Message semantics indicate a body is expected.
191 // cleared if flagSkipBody set
193 static unsigned constexpr flagHasBody = 1<< 3;
195 static unsigned constexpr flagHTTP11 = 1<< 4;
196 static unsigned constexpr flagNeedEOF = 1<< 5;
197 static unsigned constexpr flagExpectCRLF = 1<< 6;
198 static unsigned constexpr flagConnectionClose = 1<< 7;
199 static unsigned constexpr flagConnectionUpgrade = 1<< 8;
200 static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
201 static unsigned constexpr flagContentLength = 1<< 10;
202 static unsigned constexpr flagChunked = 1<< 11;
203 static unsigned constexpr flagUpgrade = 1<< 12;
204 static unsigned constexpr flagFinalChunk = 1<< 13;
208 default_body_limit(std::true_type)
210 // limit for requests
211 return 1 * 1024 * 1024; // 1MB
216 default_body_limit(std::false_type)
218 // limit for responses
219 return 8 * 1024 * 1024; // 8MB
222 std::uint64_t body_limit_ =
223 default_body_limit(is_request{}); // max payload body
224 std::uint64_t len_ = 0; // size of chunk or body
225 std::unique_ptr<char[]> buf_; // temp storage
226 std::size_t buf_len_ = 0; // size of buf_
227 std::size_t skip_ = 0; // resume search here
228 std::uint32_t header_limit_ = 8192; // max header size
229 unsigned short status_ = 0; // response status
230 state state_ = state::nothing_yet; // initial state
231 unsigned f_ = 0; // flags
234 /// Default constructor
235 basic_parser() = default;
238 basic_parser(basic_parser &&) = default;
241 basic_parser& operator=(basic_parser &&) = default;
247 After the move, the only valid operation on the
248 moved-from object is destruction.
250 template<class OtherDerived>
251 basic_parser(basic_parser<isRequest, OtherDerived>&&);
254 /// `true` if this parser parses requests, `false` for responses.
256 std::integral_constant<bool, isRequest>;
259 ~basic_parser() = default;
262 basic_parser(basic_parser const&) = delete;
265 basic_parser& operator=(basic_parser const&) = delete;
267 /** Returns a reference to this object as a @ref basic_parser.
269 This is used to pass a derived class where a base class is
270 expected, to choose a correct function overload when the
271 resolution would be ambiguous.
279 /** Returns a constant reference to this object as a @ref basic_parser.
281 This is used to pass a derived class where a base class is
282 expected, to choose a correct function overload when the
283 resolution would be ambiguous.
291 /// Returns `true` if the parser has received at least one byte of input.
295 return state_ != state::nothing_yet;
298 /** Returns `true` if the message is complete.
300 The message is complete after the full header is prduced
301 and one of the following is true:
303 @li The skip body option was set.
305 @li The semantics of the message indicate there is no body.
307 @li The semantics of the message indicate a body is expected,
308 and the entire body was parsed.
313 return state_ == state::complete;
316 /** Returns `true` if a the parser has produced the full header.
319 is_header_done() const
321 return state_ > state::fields;
324 /** Returns `true` if the message is an upgrade message.
326 @note The return value is undefined unless
327 @ref is_header_done would return `true`.
332 return (f_ & flagConnectionUpgrade) != 0;
335 /** Returns `true` if the last value for Transfer-Encoding is "chunked".
337 @note The return value is undefined unless
338 @ref is_header_done would return `true`.
343 return (f_ & flagChunked) != 0;
346 /** Returns `true` if the message has keep-alive connection semantics.
348 This function always returns `false` if @ref need_eof would return
351 @note The return value is undefined unless
352 @ref is_header_done would return `true`.
357 /** Returns the optional value of Content-Length if known.
359 @note The return value is undefined unless
360 @ref is_header_done would return `true`.
362 boost::optional<std::uint64_t>
363 content_length() const;
365 /** Returns `true` if the message semantics require an end of file.
367 Depending on the contents of the header, the parser may
368 require and end of file notification to know where the end
369 of the body lies. If this function returns `true` it will be
370 necessary to call @ref put_eof when there will never be additional
376 return (f_ & flagNeedEOF) != 0;
379 /** Set the limit on the payload body.
381 This function sets the maximum allowed size of the payload body,
382 before any encodings except chunked have been removed. Depending
383 on the message semantics, one of these cases will apply:
385 @li The Content-Length is specified and exceeds the limit. In
386 this case the result @ref error::body_limit is returned
387 immediately after the header is parsed.
389 @li The Content-Length is unspecified and the chunked encoding
390 is not specified as the last encoding. In this case the end of
391 message is determined by the end of file indicator on the
392 associated stream or input source. If a sufficient number of
393 body payload octets are presented to the parser to exceed the
394 configured limit, the parse fails with the result
395 @ref error::body_limit
397 @li The Transfer-Encoding specifies the chunked encoding as the
398 last encoding. In this case, when the number of payload body
399 octets produced by removing the chunked encoding exceeds
400 the configured limit, the parse fails with the result
401 @ref error::body_limit.
403 Setting the limit after any body octets have been parsed
404 results in undefined behavior.
406 The default limit is 1MB for requests and 8MB for responses.
408 @param v The payload body limit to set
411 body_limit(std::uint64_t v)
416 /** Set a limit on the total size of the header.
418 This function sets the maximum allowed size of the header
419 including all field name, value, and delimiter characters
420 and also including the CRLF sequences in the serialized
421 input. If the end of the header is not found within the
422 limit of the header size, the error @ref error::header_limit
423 is returned by @ref put.
425 Setting the limit after any header octets have been parsed
426 results in undefined behavior.
429 header_limit(std::uint32_t v)
434 /// Returns `true` if the eager parse option is set.
438 return (f_ & flagEager) != 0;
441 /** Set the eager parse option.
443 Normally the parser returns after successfully parsing a structured
444 element (header, chunk header, or chunk body) even if there are octets
445 remaining in the input. This is necessary when attempting to parse the
446 header first, or when the caller wants to inspect information which may
447 be invalidated by subsequent parsing, such as a chunk extension. The
448 `eager` option controls whether the parser keeps going after parsing
449 structured element if there are octets remaining in the buffer and no
450 error occurs. This option is automatically set or cleared during certain
451 stream operations to improve performance with no change in functionality.
453 The default setting is `false`.
455 @param v `true` to set the eager parse option or `false` to disable it.
466 /// Returns `true` if the skip parse option is set.
470 return (f_ & flagSkipBody) != 0;
473 /** Set the skip parse option.
475 This option controls whether or not the parser expects to see an HTTP
476 body, regardless of the presence or absence of certain fields such as
477 Content-Length or a chunked Transfer-Encoding. Depending on the request,
478 some responses do not carry a body. For example, a 200 response to a
479 CONNECT request from a tunneling proxy, or a response to a HEAD request.
480 In these cases, callers may use this function inform the parser that
481 no body is expected. The parser will consider the message complete
482 after the header has been received.
484 @param v `true` to set the skip body option or `false` to disable it.
486 @note This function must called before any bytes are processed.
491 /** Write a buffer sequence to the parser.
493 This function attempts to incrementally parse the HTTP
494 message data stored in the caller provided buffers. Upon
495 success, a positive return value indicates that the parser
496 made forward progress, consuming that number of
499 In some cases there may be an insufficient number of octets
500 in the input buffer in order to make forward progress. This
501 is indicated by the code @ref error::need_more. When
502 this happens, the caller should place additional bytes into
503 the buffer sequence and call @ref put again.
505 The error code @ref error::need_more is special. When this
506 error is returned, a subsequent call to @ref put may succeed
507 if the buffers have been updated. Otherwise, upon error
508 the parser may not be restarted.
510 @param buffers An object meeting the requirements of
511 @b ConstBufferSequence that represents the next chunk of
512 message data. If the length of this buffer sequence is
513 one, the implementation will not allocate additional memory.
514 The class @ref beast::flat_buffer is provided as one way to
515 meet this requirement
517 @param ec Set to the error, if any occurred.
519 @return The number of octets consumed in the buffer
520 sequence. The caller should remove these octets even if the
523 template<class ConstBufferSequence>
525 put(ConstBufferSequence const& buffers, error_code& ec);
527 #if ! BOOST_BEAST_DOXYGEN
529 put(boost::asio::const_buffer const& buffer,
533 /** Inform the parser that the end of stream was reached.
535 In certain cases, HTTP needs to know where the end of
536 the stream is. For example, sometimes servers send
537 responses without Content-Length and expect the client
538 to consume input (for the body) until EOF. Callbacks
539 and errors will still be processed as usual.
541 This is typically called when a read from the
542 underlying stream object sets the error code to
543 `boost::asio::error::eof`.
545 @note Only valid after parsing a complete header.
547 @param ec Set to the error, if any occurred.
550 put_eof(error_code& ec);
557 return *static_cast<Derived*>(this);
560 template<class ConstBufferSequence>
562 put_from_stack(std::size_t size,
563 ConstBufferSequence const& buffers,
568 char const* p, std::size_t n,
573 char const*& p, char const* last,
574 error_code& ec, std::true_type);
578 char const*& p, char const* last,
579 error_code& ec, std::false_type);
583 char const*& p, char const* last,
588 error_code& ec, std::true_type);
592 error_code& ec, std::false_type);
595 parse_body(char const*& p,
596 std::size_t n, error_code& ec);
599 parse_body_to_eof(char const*& p,
600 std::size_t n, error_code& ec);
603 parse_chunk_header(char const*& p,
604 std::size_t n, error_code& ec);
607 parse_chunk_body(char const*& p,
608 std::size_t n, error_code& ec);
612 string_view value, error_code& ec);
619 #include <boost/beast/http/impl/basic_parser.ipp>