2 // Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8 #ifndef BEAST_HTTP_BASIC_PARSER_HPP
9 #define BEAST_HTTP_BASIC_PARSER_HPP
11 #include <beast/config.hpp>
12 #include <beast/core/error.hpp>
13 #include <beast/http/detail/basic_parser.hpp>
14 #include <boost/asio/buffer.hpp>
15 #include <boost/optional.hpp>
16 #include <boost/assert.hpp>
17 #include <boost/utility/string_ref.hpp>
24 /** Describes the parser's current state.
26 The state is expressed as the type of data that
27 @ref basic_parser is expecting to see in subsequently
30 enum class parse_state
32 /// Expecting one or more header octets
35 /// Expecting one or more body octets
38 /// Expecting zero or more body octets followed by EOF
41 /// Expecting additional chunk header octets
44 /// Expecting one or more chunk body octets
47 /** The parsing is complete.
49 The parse is considered complete when the full header
50 is received and either the full body is received, or
51 the semantics of the message indicate that no body
52 is expected. This includes the case where the caller
53 has indicated to the parser that no body is expected,
54 for example when receiving a response to a HEAD request.
59 /** A parser for decoding HTTP/1 wire format messages.
61 This parser is designed to efficiently parse messages in the
62 HTTP/1 wire format. It allocates no memory when input is
63 presented as a single contiguous buffer, and uses minimal
64 state. It will handle chunked encoding and it understands
65 the semantics of the Connection, Content-Length, and Upgrade
68 The interface uses CRTP (Curiously Recurring Template Pattern).
69 To use this class, derive from @ref basic_parser. When bytes
70 are presented, the implementation will make a series of zero
71 or more calls to derived class members functions (referred to
72 as "callbacks" from here on) matching a specific signature.
74 Every callback must be provided by the derived class, or else
75 a compilation error will be generated. This exemplar shows
76 the signature and description of the callbacks required in
82 template<bool isRequest>
84 : basic_parser<isRequest, derived<isRequest>>
86 // The type used when providing a mutable
87 // buffer sequence in which to store body data.
89 using mutable_buffers_type = ...;
91 // When isRequest == true, called
92 // after the Request Line is received.
96 boost::string_ref const& method,
97 boost::string_ref const& path,
101 // When isRequest == false, called
102 // after the Status Line is received.
107 boost::string_ref const& reason,
111 // Called after receiving a field/value pair.
115 boost::string_ref const& name,
116 boost::string_ref const& value,
119 // Called after the header is complete.
125 // Called once before the body, if any, is started.
126 // This will only be called if the semantics of the
127 // message indicate that a body exists, including
128 // an indicated body of zero length.
133 // Called zero or more times to provide body data.
135 // Only used if isDirect == false
139 boost::string_ref const& s,
142 // Called zero or more times to retrieve a mutable
143 // buffer sequence in which to store body data.
145 // Only used if isDirect == true
151 // Called after body data has been stored in the
152 // buffer returned by the previous call to on_prepare.
154 // Only used if isDirect == true
160 // If the Transfer-Encoding is specified, and the
161 // last item in the list of encodings is "chunked",
162 // called after receiving a chunk header or a final
167 std::uint64_t length, // Length of this chunk
168 boost::string_ref const& ext, // The chunk extensions, if any
171 // Called once when the message is complete.
172 // This will be called even if there is no body.
175 on_complete(error_code& ec);
179 If a callback sets the error code, the error will be propagated
180 to the caller of the parser. Behavior of parsing after an error
181 is returned is undefined.
183 When the parser state is positioned to read bytes belonging to
184 the body, calling @ref write or @ref write will implicitly
185 cause a buffer copy (because bytes are first transferred to the
186 dynamic buffer). To avoid this copy, the additional functions
187 @ref copy_body, @ref prepare_body, and @ref commit_body are
188 provided to allow the caller to read bytes directly into buffers
189 supplied by the parser.
191 The parser is optimized for the case where the input buffer
192 sequence consists of a single contiguous buffer. The
193 @ref beast::flat_streambuf class is provided, which guarantees
194 that the input sequence of the stream buffer will be represented
195 by exactly one contiguous buffer. To ensure the optimum performance
196 of the parser, use @ref beast::flat_streambuf with HTTP algorithms
197 such as @ref beast::http::read, @ref beast::http::read_some,
198 @ref beast::http::async_read, and @ref beast::http::async_read_some.
199 Alternatively, the caller may use custom techniques to ensure that
200 the structured portion of the HTTP message (header or chunk header)
201 is contained in a linear buffer.
203 @tparam isRequest A `bool` indicating whether the parser will be
204 presented with request or response message.
206 @tparam isDirect A `bool` indicating whether the parser interface
207 supports reading body data directly into parser-provided buffers.
209 @tparam Derived The derived class type. This is part of the
210 Curiously Recurring Template Pattern interface.
212 template<bool isRequest, bool isDirect, class Derived>
214 : private detail::basic_parser_base
216 template<bool OtherIsRequest,
217 bool OtherIsDirect, class OtherDerived>
218 friend class basic_parser;
220 // Message will be complete after reading header
221 static unsigned constexpr flagSkipBody = 1<< 0;
225 static unsigned constexpr flagOnBody = 1<< 1;
227 // The parser has read at least one byte
228 static unsigned constexpr flagGotSome = 1<< 2;
230 // Message semantics indicate a body is expected.
231 // cleared if flagSkipBody set
233 static unsigned constexpr flagHasBody = 1<< 3;
235 static unsigned constexpr flagHTTP11 = 1<< 4;
236 static unsigned constexpr flagNeedEOF = 1<< 5;
237 static unsigned constexpr flagExpectCRLF = 1<< 6;
238 static unsigned constexpr flagFinalChunk = 1<< 7;
239 static unsigned constexpr flagConnectionClose = 1<< 8;
240 static unsigned constexpr flagConnectionUpgrade = 1<< 9;
241 static unsigned constexpr flagConnectionKeepAlive = 1<< 10;
242 static unsigned constexpr flagContentLength = 1<< 11;
243 static unsigned constexpr flagChunked = 1<< 12;
244 static unsigned constexpr flagUpgrade = 1<< 13;
246 std::uint64_t len_; // size of chunk or body
247 std::unique_ptr<char[]> buf_;
248 std::size_t buf_len_ = 0;
249 std::size_t skip_ = 0; // search from here
250 std::size_t x_; // scratch variable
251 unsigned f_ = 0; // flags
252 parse_state state_ = parse_state::header;
253 boost::string_ref ext_;
254 boost::string_ref body_;
257 /// Copy constructor (disallowed)
258 basic_parser(basic_parser const&) = delete;
260 /// Copy assignment (disallowed)
261 basic_parser& operator=(basic_parser const&) = delete;
263 /// Default constructor
264 basic_parser() = default;
266 /// `true` if this parser parses requests, `false` for responses.
267 static bool constexpr is_request = isRequest;
270 ~basic_parser() = default;
274 After the move, the only valid operation on the
275 moved-from object is destruction.
277 template<bool OtherIsDirect, class OtherDerived>
278 basic_parser(basic_parser<
279 isRequest, OtherIsDirect, OtherDerived>&&);
281 /** Set the skip body option.
283 The option controls whether or not the parser expects to
284 see an HTTP body, regardless of the presence or absence of
285 certain fields such as Content-Length.
287 Depending on the request, some responses do not carry a body.
288 For example, a 200 response to a CONNECT request from a
289 tunneling proxy. In these cases, callers may use this function
290 inform the parser that no body is expected. The parser will
291 consider the message complete after the header has been received.
293 @note This function must called before any bytes are processed.
298 /** Returns the current parser state.
300 The parser state indicates what octets the parser
301 expects to see next in the input stream.
309 /// Returns `true` if the parser has received at least one byte of input.
313 return (f_ & flagGotSome) != 0;
316 /// Returns `true` if the complete header has been parsed.
320 return state_ != parse_state::header;
323 /** Returns `true` if a Content-Length is specified.
325 @note Only valid after parsing a complete header.
328 got_content_length() const
330 return (f_ & flagContentLength) != 0;
333 /** Returns `true` if the message is complete.
335 The message is complete after a full header is
336 parsed and one of the following is true:
338 @li @ref skip_body was called
340 @li The semantics of the message indicate there is no body.
342 @li The semantics of the message indicate a body is
343 expected, and the entire body was received.
348 return state_ == parse_state::complete;
351 /** Returns `true` if the message is an upgrade message.
353 @note Only valid after parsing a complete header.
358 return (f_ & flagConnectionUpgrade) != 0;
361 /** Returns `true` if keep-alive is specified
363 @note Only valid after parsing a complete header.
366 is_keep_alive() const;
368 /** Returns `true` if the chunked Transfer-Encoding is specified.
370 @note Only valid after parsing a complete header.
375 return (f_ & flagChunked) != 0;
378 /** Write part of a buffer sequence to the parser.
380 This function attempts to parse the HTTP message
381 stored in the caller provided buffers. Upon success,
382 a positive return value indicates that the parser
383 made forward progress, consuming that number of
386 A return value of zero indicates that the parser
387 requires additional input. In this case the caller
388 should append additional bytes to the input buffer
389 sequence and call @ref write again.
391 @param buffers An object meeting the requirements of
392 @b ConstBufferSequence that represents the message.
394 @param ec Set to the error, if any occurred.
396 @return The number of bytes consumed in the buffer
399 template<class ConstBufferSequence>
401 write(ConstBufferSequence const& buffers, error_code& ec);
405 write(boost::asio::const_buffers_1 const& buffer,
409 /** Inform the parser that the end of stream was reached.
411 In certain cases, HTTP needs to know where the end of
412 the stream is. For example, sometimes servers send
413 responses without Content-Length and expect the client
414 to consume input (for the body) until EOF. Callbacks
415 and errors will still be processed as usual.
417 This is typically called when a read from the
418 underlying stream object sets the error code to
419 `boost::asio::error::eof`.
421 @note Only valid after parsing a complete header.
423 @param ec Set to the error, if any occurred.
426 write_eof(error_code& ec);
428 /** Returns the number of bytes remaining in the body or chunk.
430 If a Content-Length is specified and the parser state
431 is equal to @ref beast::http::parse_state::body, this will return
432 the number of bytes remaining in the body. If the
433 chunked Transfer-Encoding is indicated and the parser
434 state is equal to @ref beast::http::parse_state::chunk_body, this
435 will return the number of bytes remaining in the chunk.
436 Otherwise, the function behavior is undefined.
442 state_ == parse_state::body ||
443 state_ == parse_state::chunk_body);
447 /** Returns the body data parsed in the last call to @ref write.
449 This buffer is invalidated after any call to @ref write
452 @note If the last call to @ref write came from the input
453 area of a @b DynamicBuffer object, a call to the dynamic
454 buffer's `consume` function may invalidate this return
457 boost::string_ref const&
460 // This function not available when isDirect==true
461 static_assert(! isDirect, "");
465 /** Returns the chunk extension parsed in the last call to @ref write.
467 This buffer is invalidated after any call to @ref write
470 @note If the last call to @ref write came from the input
471 area of a @b DynamicBuffer object, a call to the dynamic
472 buffer's `consume` function may invalidate this return
475 boost::string_ref const&
476 chunk_extension() const
478 // This function not available when isDirect==true
479 static_assert(! isDirect, "");
483 /** Returns the optional value of Content-Length if known.
485 @note The return value is undefined unless a complete
486 header has been parsed.
488 boost::optional<std::uint64_t>
489 content_length() const
491 BOOST_ASSERT(got_header());
492 if(! (f_ & flagContentLength))
497 /** Copy leftover body data from the dynamic buffer.
499 @note This member function is only available when
502 @return The number of bytes processed from the dynamic
503 buffer. The caller should remove these bytes by calling
504 `consume` on the buffer.
506 template<class DynamicBuffer>
508 copy_body(DynamicBuffer& dynabuf);
510 /** Returns a set of buffers for storing body data.
512 @note This member function is only available when
515 @param limit The maximum number of bytes in the
516 size of the returned buffer sequence. The actual size
517 of the buffer sequence may be lower than this number.
519 template<class MutableBufferSequence>
521 prepare_body(boost::optional<
522 MutableBufferSequence>& buffers, std::size_t limit);
524 /** Commit body data.
526 @note This member function is only available when
530 commit_body(std::size_t n);
532 /** Indicate that body octets have been consumed.
535 consume(std::size_t n)
537 BOOST_ASSERT(n <= len_);
539 state_ == parse_state::body ||
540 state_ == parse_state::chunk_body);
544 if(state_ == parse_state::body)
545 state_ = parse_state::complete;
547 state_ = parse_state::chunk_header;
551 /** Consume all remaining body data.
553 This function instructs the parser to advance the
554 state past any expected body octets. Callers who
555 wish to read and process the body themselves will
559 consume_body(error_code& ec);
566 return *static_cast<Derived*>(this);
569 template<class ConstBufferSequence>
572 ConstBufferSequence const& buffers);
575 do_write(boost::asio::const_buffers_1 const& buffer,
576 error_code& ec, std::true_type);
579 do_write(boost::asio::const_buffers_1 const& buffer,
580 error_code& ec, std::false_type);
583 parse_startline(char const*& it,
584 int& version, int& status,
585 error_code& ec, std::true_type);
588 parse_startline(char const*& it,
589 int& version, int& status,
590 error_code& ec, std::false_type);
593 parse_fields(char const*& it,
594 char const* last, error_code& ec);
598 boost::string_ref const& name,
599 boost::string_ref const& value,
603 parse_header(char const* p,
604 std::size_t n, error_code& ec);
607 do_header(int, std::true_type);
610 do_header(int status, std::false_type);
613 maybe_do_body_direct();
616 maybe_do_body_indirect(error_code& ec);
619 parse_chunk_header(char const* p,
620 std::size_t n, error_code& ec);
623 parse_body(char const* p,
624 std::size_t n, error_code& ec);
627 parse_body_to_eof(char const* p,
628 std::size_t n, error_code& ec);
631 parse_chunk_body(char const* p,
632 std::size_t n, error_code& ec);
635 do_complete(error_code& ec);
641 #include <beast/http/impl/basic_parser.ipp>