2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
13 #include <boost/beast/core/detail/config.hpp>
14 #include <boost/beast/core/error.hpp>
15 #include <boost/beast/core/string.hpp>
16 #include <boost/beast/http/field.hpp>
17 #include <boost/beast/http/verb.hpp>
18 #include <boost/beast/http/detail/basic_parser.hpp>
19 #include <boost/asio/buffer.hpp>
20 #include <boost/optional.hpp>
21 #include <boost/assert.hpp>
24 #include <type_traits>
31 /** A parser for decoding HTTP/1 wire format messages.
33 This parser is designed to efficiently parse messages in the
34 HTTP/1 wire format. It allocates no memory when input is
35 presented as a single contiguous buffer, and uses minimal
36 state. It will handle chunked encoding and it understands
37 the semantics of the Connection, Content-Length, and Upgrade
39 The parser is optimized for the case where the input buffer
40 sequence consists of a single contiguous buffer. The
41 @ref beast::basic_flat_buffer class is provided, which guarantees
42 that the input sequence of the stream buffer will be represented
43 by exactly one contiguous buffer. To ensure the optimum performance
44 of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
45 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
46 Alternatively, the caller may use custom techniques to ensure that
47 the structured portion of the HTTP message (header or chunk header)
48 is contained in a linear buffer.
50 The interface to the parser uses virtual member functions.
51 To use this class, derive your type from @ref basic_parser. When
52 bytes are presented, the implementation will make a series of zero
53 or more calls to virtual functions, which the derived class must
56 Every virtual function must be provided by the derived class,
57 or else a compilation error will be generated. The implementation
58 will make sure that `ec` is clear before each virtual function
59 is invoked. If a virtual function sets an error, it is propagated
60 out of the parser to the caller.
62 @tparam isRequest A `bool` indicating whether the parser will be
63 presented with request or response message.
65 @note If the parser encounters a field value with obs-fold
66 longer than 4 kilobytes in length, an error is generated.
68 template<bool isRequest>
70 : private detail::basic_parser_base
72 boost::optional<std::uint64_t>
74 boost::optional<std::uint64_t>(
75 default_body_limit(is_request{})); // max payload body
76 std::uint64_t len_ = 0; // size of chunk or body
77 std::uint64_t len0_ = 0; // content length if known
78 std::unique_ptr<char[]> buf_; // temp storage
79 std::size_t buf_len_ = 0; // size of buf_
80 std::size_t skip_ = 0; // resume search here
81 std::uint32_t header_limit_ = 8192; // max header size
82 unsigned short status_ = 0; // response status
83 state state_ = state::nothing_yet; // initial state
84 unsigned f_ = 0; // flags
86 // limit on the size of the stack flat buffer
87 static std::size_t constexpr max_stack_buffer = 8192;
89 // Message will be complete after reading header
90 static unsigned constexpr flagSkipBody = 1<< 0;
92 // Consume input buffers across semantic boundaries
93 static unsigned constexpr flagEager = 1<< 1;
95 // The parser has read at least one byte
96 static unsigned constexpr flagGotSome = 1<< 2;
98 // Message semantics indicate a body is expected.
99 // cleared if flagSkipBody set
101 static unsigned constexpr flagHasBody = 1<< 3;
103 static unsigned constexpr flagHTTP11 = 1<< 4;
104 static unsigned constexpr flagNeedEOF = 1<< 5;
105 static unsigned constexpr flagExpectCRLF = 1<< 6;
106 static unsigned constexpr flagConnectionClose = 1<< 7;
107 static unsigned constexpr flagConnectionUpgrade = 1<< 8;
108 static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
109 static unsigned constexpr flagContentLength = 1<< 10;
110 static unsigned constexpr flagChunked = 1<< 11;
111 static unsigned constexpr flagUpgrade = 1<< 12;
112 static unsigned constexpr flagFinalChunk = 1<< 13;
116 default_body_limit(std::true_type)
118 // limit for requests
119 return 1 * 1024 * 1024; // 1MB
124 default_body_limit(std::false_type)
126 // limit for responses
127 return 8 * 1024 * 1024; // 8MB
130 template<bool OtherIsRequest>
131 friend class basic_parser;
133 friend class basic_parser_test;
136 /// Default constructor
137 basic_parser() = default;
143 After the move, the only valid operation on the
144 moved-from object is destruction.
146 basic_parser(basic_parser &&) = default;
149 basic_parser& operator=(basic_parser &&) = default;
152 /// `true` if this parser parses requests, `false` for responses.
154 std::integral_constant<bool, isRequest>;
157 virtual ~basic_parser() = default;
160 basic_parser(basic_parser const&) = delete;
163 basic_parser& operator=(basic_parser const&) = delete;
165 /// Returns `true` if the parser has received at least one byte of input.
169 return state_ != state::nothing_yet;
172 /** Returns `true` if the message is complete.
174 The message is complete after the full header is prduced
175 and one of the following is true:
177 @li The skip body option was set.
179 @li The semantics of the message indicate there is no body.
181 @li The semantics of the message indicate a body is expected,
182 and the entire body was parsed.
187 return state_ == state::complete;
190 /** Returns `true` if a the parser has produced the full header.
193 is_header_done() const
195 return state_ > state::fields;
198 /** Returns `true` if the message is an upgrade message.
200 @note The return value is undefined unless
201 @ref is_header_done would return `true`.
206 return (f_ & flagConnectionUpgrade) != 0;
209 /** Returns `true` if the last value for Transfer-Encoding is "chunked".
211 @note The return value is undefined unless
212 @ref is_header_done would return `true`.
217 return (f_ & flagChunked) != 0;
220 /** Returns `true` if the message has keep-alive connection semantics.
222 This function always returns `false` if @ref need_eof would return
225 @note The return value is undefined unless
226 @ref is_header_done would return `true`.
231 /** Returns the optional value of Content-Length if known.
233 @note The return value is undefined unless
234 @ref is_header_done would return `true`.
236 boost::optional<std::uint64_t>
237 content_length() const;
239 /** Returns the remaining content length if known
241 If the message header specifies a Content-Length,
242 the return value will be the number of bytes remaining
243 in the payload body have not yet been parsed.
245 @note The return value is undefined unless
246 @ref is_header_done would return `true`.
248 boost::optional<std::uint64_t>
249 content_length_remaining() const;
251 /** Returns `true` if the message semantics require an end of file.
253 Depending on the contents of the header, the parser may
254 require and end of file notification to know where the end
255 of the body lies. If this function returns `true` it will be
256 necessary to call @ref put_eof when there will never be additional
262 return (f_ & flagNeedEOF) != 0;
265 /** Set the limit on the payload body.
267 This function sets the maximum allowed size of the payload body,
268 before any encodings except chunked have been removed. Depending
269 on the message semantics, one of these cases will apply:
271 @li The Content-Length is specified and exceeds the limit. In
272 this case the result @ref error::body_limit is returned
273 immediately after the header is parsed.
275 @li The Content-Length is unspecified and the chunked encoding
276 is not specified as the last encoding. In this case the end of
277 message is determined by the end of file indicator on the
278 associated stream or input source. If a sufficient number of
279 body payload octets are presented to the parser to exceed the
280 configured limit, the parse fails with the result
281 @ref error::body_limit
283 @li The Transfer-Encoding specifies the chunked encoding as the
284 last encoding. In this case, when the number of payload body
285 octets produced by removing the chunked encoding exceeds
286 the configured limit, the parse fails with the result
287 @ref error::body_limit.
289 Setting the limit after any body octets have been parsed
290 results in undefined behavior.
292 The default limit is 1MB for requests and 8MB for responses.
294 @param v An optional integral value representing the body limit.
295 If this is equal to `boost::none`, then the body limit is disabled.
298 body_limit(boost::optional<std::uint64_t> v)
303 /** Set a limit on the total size of the header.
305 This function sets the maximum allowed size of the header
306 including all field name, value, and delimiter characters
307 and also including the CRLF sequences in the serialized
308 input. If the end of the header is not found within the
309 limit of the header size, the error @ref error::header_limit
310 is returned by @ref put.
312 Setting the limit after any header octets have been parsed
313 results in undefined behavior.
316 header_limit(std::uint32_t v)
321 /// Returns `true` if the eager parse option is set.
325 return (f_ & flagEager) != 0;
328 /** Set the eager parse option.
330 Normally the parser returns after successfully parsing a structured
331 element (header, chunk header, or chunk body) even if there are octets
332 remaining in the input. This is necessary when attempting to parse the
333 header first, or when the caller wants to inspect information which may
334 be invalidated by subsequent parsing, such as a chunk extension. The
335 `eager` option controls whether the parser keeps going after parsing
336 structured element if there are octets remaining in the buffer and no
337 error occurs. This option is automatically set or cleared during certain
338 stream operations to improve performance with no change in functionality.
340 The default setting is `false`.
342 @param v `true` to set the eager parse option or `false` to disable it.
353 /// Returns `true` if the skip parse option is set.
357 return (f_ & flagSkipBody) != 0;
360 /** Set the skip parse option.
362 This option controls whether or not the parser expects to see an HTTP
363 body, regardless of the presence or absence of certain fields such as
364 Content-Length or a chunked Transfer-Encoding. Depending on the request,
365 some responses do not carry a body. For example, a 200 response to a
366 CONNECT request from a tunneling proxy, or a response to a HEAD request.
367 In these cases, callers may use this function inform the parser that
368 no body is expected. The parser will consider the message complete
369 after the header has been received.
371 @param v `true` to set the skip body option or `false` to disable it.
373 @note This function must called before any bytes are processed.
378 /** Write a buffer sequence to the parser.
380 This function attempts to incrementally parse the HTTP
381 message data stored in the caller provided buffers. Upon
382 success, a positive return value indicates that the parser
383 made forward progress, consuming that number of
386 In some cases there may be an insufficient number of octets
387 in the input buffer in order to make forward progress. This
388 is indicated by the code @ref error::need_more. When
389 this happens, the caller should place additional bytes into
390 the buffer sequence and call @ref put again.
392 The error code @ref error::need_more is special. When this
393 error is returned, a subsequent call to @ref put may succeed
394 if the buffers have been updated. Otherwise, upon error
395 the parser may not be restarted.
397 @param buffers An object meeting the requirements of
398 <em>ConstBufferSequence</em> that represents the next chunk of
399 message data. If the length of this buffer sequence is
400 one, the implementation will not allocate additional memory.
401 The class @ref beast::basic_flat_buffer is provided as one way to
402 meet this requirement
404 @param ec Set to the error, if any occurred.
406 @return The number of octets consumed in the buffer
407 sequence. The caller should remove these octets even if the
410 template<class ConstBufferSequence>
412 put(ConstBufferSequence const& buffers, error_code& ec);
414 #if ! BOOST_BEAST_DOXYGEN
416 put(net::const_buffer buffer,
420 /** Inform the parser that the end of stream was reached.
422 In certain cases, HTTP needs to know where the end of
423 the stream is. For example, sometimes servers send
424 responses without Content-Length and expect the client
425 to consume input (for the body) until EOF. Callbacks
426 and errors will still be processed as usual.
428 This is typically called when a read from the
429 underlying stream object sets the error code to
432 @note Only valid after parsing a complete header.
434 @param ec Set to the error, if any occurred.
437 put_eof(error_code& ec);
440 /** Called after receiving the request-line.
442 This virtual function is invoked after receiving a request-line
443 when parsing HTTP requests.
444 It can only be called when `isRequest == true`.
446 @param method The verb enumeration. If the method string is not
447 one of the predefined strings, this value will be @ref verb::unknown.
449 @param method_str The unmodified string representing the verb.
451 @param target The request-target.
453 @param version The HTTP-version. This will be 10 for HTTP/1.0,
456 @param ec An output parameter which the function may set to indicate
457 an error. The error will be clear before this function is invoked.
463 string_view method_str,
468 /** Called after receiving the status-line.
470 This virtual function is invoked after receiving a status-line
471 when parsing HTTP responses.
472 It can only be called when `isRequest == false`.
474 @param code The numeric status code.
476 @param reason The reason-phrase. Note that this value is
477 now obsolete, and only provided for historical or diagnostic
480 @param version The HTTP-version. This will be 10 for HTTP/1.0,
483 @param ec An output parameter which the function may set to indicate
484 an error. The error will be clear before this function is invoked.
494 /** Called once for each complete field in the HTTP header.
496 This virtual function is invoked for each field that is received
497 while parsing an HTTP message.
499 @param name The known field enum value. If the name of the field
500 is not recognized, this value will be @ref field::unknown.
502 @param name_string The exact name of the field as received from
503 the input, represented as a string.
505 @param value A string holding the value of the field.
507 @param ec An output parameter which the function may set to indicate
508 an error. The error will be clear before this function is invoked.
514 string_view name_string,
518 /** Called once after the complete HTTP header is received.
520 This virtual function is invoked once, after the complete HTTP
521 header is received while parsing a message.
523 @param ec An output parameter which the function may set to indicate
524 an error. The error will be clear before this function is invoked.
528 on_header_impl(error_code& ec) = 0;
530 /** Called once before the body is processed.
532 This virtual function is invoked once, before the content body is
533 processed (but after the complete header is received).
535 @param content_length A value representing the content length in
536 bytes if the length is known (this can include a zero length).
537 Otherwise, the value will be `boost::none`.
539 @param ec An output parameter which the function may set to indicate
540 an error. The error will be clear before this function is invoked.
545 boost::optional<std::uint64_t> const& content_length,
548 /** Called each time additional data is received representing the content body.
550 This virtual function is invoked for each piece of the body which is
551 received while parsing of a message. This function is only used when
552 no chunked transfer encoding is present.
554 @param body A string holding the additional body contents. This may
555 contain nulls or unprintable characters.
557 @param ec An output parameter which the function may set to indicate
558 an error. The error will be clear before this function is invoked.
560 @see on_chunk_body_impl
568 /** Called each time a new chunk header of a chunk encoded body is received.
570 This function is invoked each time a new chunk header is received.
571 The function is only used when the chunked transfer encoding is present.
573 @param size The size of this chunk, in bytes.
575 @param extensions A string containing the entire chunk extensions.
576 This may be empty, indicating no extensions are present.
578 @param ec An output parameter which the function may set to indicate
579 an error. The error will be clear before this function is invoked.
583 on_chunk_header_impl(
585 string_view extensions,
588 /** Called each time additional data is received representing part of a body chunk.
590 This virtual function is invoked for each piece of the body which is
591 received while parsing of a message. This function is only used when
592 no chunked transfer encoding is present.
594 @param remain The number of bytes remaining in this chunk. This includes
595 the contents of passed `body`. If this value is zero, then this represents
598 @param body A string holding the additional body contents. This may
599 contain nulls or unprintable characters.
601 @param ec An output parameter which the function may set to indicate
602 an error. The error will be clear before this function is invoked.
604 @return This function should return the number of bytes actually consumed
605 from the `body` value. Any bytes that are not consumed on this call
606 will be presented in a subsequent call.
613 std::uint64_t remain,
617 /** Called once when the complete message is received.
619 This virtual function is invoked once, after successfully parsing
620 a complete HTTP message.
622 @param ec An output parameter which the function may set to indicate
623 an error. The error will be clear before this function is invoked.
627 on_finish_impl(error_code& ec) = 0;
631 boost::optional<std::uint64_t>
632 content_length_unchecked() const;
634 template<class ConstBufferSequence>
638 ConstBufferSequence const& buffers,
643 char const* p, std::size_t n,
648 char const*& p, char const* last,
649 error_code& ec, std::true_type);
653 char const*& p, char const* last,
654 error_code& ec, std::false_type);
658 char const*& p, char const* last,
663 error_code& ec, std::true_type);
667 error_code& ec, std::false_type);
670 parse_body(char const*& p,
671 std::size_t n, error_code& ec);
674 parse_body_to_eof(char const*& p,
675 std::size_t n, error_code& ec);
678 parse_chunk_header(char const*& p,
679 std::size_t n, error_code& ec);
682 parse_chunk_body(char const*& p,
683 std::size_t n, error_code& ec);
687 string_view value, error_code& ec);
694 #include <boost/beast/http/impl/basic_parser.hpp>
695 #ifdef BOOST_BEAST_HEADER_ONLY
696 #include <boost/beast/http/impl/basic_parser.ipp>