]> git.proxmox.com Git - ceph.git/blob - ceph/src/Beast/include/beast/http/basic_parser.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / Beast / include / beast / http / basic_parser.hpp
1 //
2 // Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7
8 #ifndef BEAST_HTTP_BASIC_PARSER_HPP
9 #define BEAST_HTTP_BASIC_PARSER_HPP
10
11 #include <beast/config.hpp>
12 #include <beast/core/error.hpp>
13 #include <beast/http/detail/basic_parser.hpp>
14 #include <boost/asio/buffer.hpp>
15 #include <boost/optional.hpp>
16 #include <boost/assert.hpp>
17 #include <boost/utility/string_ref.hpp>
18 #include <memory>
19 #include <utility>
20
21 namespace beast {
22 namespace http {
23
24 /** Describes the parser's current state.
25
26 The state is expressed as the type of data that
27 @ref basic_parser is expecting to see in subsequently
28 provided octets.
29 */
30 enum class parse_state
31 {
32 /// Expecting one or more header octets
33 header = 0,
34
35 /// Expecting one or more body octets
36 body = 1,
37
38 /// Expecting zero or more body octets followed by EOF
39 body_to_eof = 2,
40
41 /// Expecting additional chunk header octets
42 chunk_header = 3,
43
44 /// Expecting one or more chunk body octets
45 chunk_body = 4,
46
47 /** The parsing is complete.
48
49 The parse is considered complete when the full header
50 is received and either the full body is received, or
51 the semantics of the message indicate that no body
52 is expected. This includes the case where the caller
53 has indicated to the parser that no body is expected,
54 for example when receiving a response to a HEAD request.
55 */
56 complete = 5
57 };
58
59 /** A parser for decoding HTTP/1 wire format messages.
60
61 This parser is designed to efficiently parse messages in the
62 HTTP/1 wire format. It allocates no memory when input is
63 presented as a single contiguous buffer, and uses minimal
64 state. It will handle chunked encoding and it understands
65 the semantics of the Connection, Content-Length, and Upgrade
66 fields.
67
68 The interface uses CRTP (Curiously Recurring Template Pattern).
69 To use this class, derive from @ref basic_parser. When bytes
70 are presented, the implementation will make a series of zero
71 or more calls to derived class members functions (referred to
72 as "callbacks" from here on) matching a specific signature.
73
74 Every callback must be provided by the derived class, or else
75 a compilation error will be generated. This exemplar shows
76 the signature and description of the callbacks required in
77 the derived class.
78
79 @par Derived Example
80
81 @code
82 template<bool isRequest>
83 struct derived
84 : basic_parser<isRequest, derived<isRequest>>
85 {
86 // The type used when providing a mutable
87 // buffer sequence in which to store body data.
88 //
89 using mutable_buffers_type = ...;
90
91 // When isRequest == true, called
92 // after the Request Line is received.
93 //
94 void
95 on_request(
96 boost::string_ref const& method,
97 boost::string_ref const& path,
98 int version,
99 error_code& ec);
100
101 // When isRequest == false, called
102 // after the Status Line is received.
103 //
104 void
105 on_response(
106 int status,
107 boost::string_ref const& reason,
108 int version,
109 error_code& ec);
110
111 // Called after receiving a field/value pair.
112 //
113 void
114 on_field(
115 boost::string_ref const& name,
116 boost::string_ref const& value,
117 error_code& ec);
118
119 // Called after the header is complete.
120 //
121 void
122 on_header(
123 error_code& ec);
124
125 // Called once before the body, if any, is started.
126 // This will only be called if the semantics of the
127 // message indicate that a body exists, including
128 // an indicated body of zero length.
129 //
130 void
131 on_body();
132
133 // Called zero or more times to provide body data.
134 //
135 // Only used if isDirect == false
136 //
137 void
138 on_data(
139 boost::string_ref const& s,
140 error_code& ec);
141
142 // Called zero or more times to retrieve a mutable
143 // buffer sequence in which to store body data.
144 //
145 // Only used if isDirect == true
146 //
147 mutable_buffers_type
148 on_prepare(
149 std::size_t n);
150
151 // Called after body data has been stored in the
152 // buffer returned by the previous call to on_prepare.
153 //
154 // Only used if isDirect == true
155 //
156 void
157 on_commit(
158 std::size_t n);
159
160 // If the Transfer-Encoding is specified, and the
161 // last item in the list of encodings is "chunked",
162 // called after receiving a chunk header or a final
163 // chunk.
164 //
165 void
166 on_chunk(
167 std::uint64_t length, // Length of this chunk
168 boost::string_ref const& ext, // The chunk extensions, if any
169 error_code& ec);
170
171 // Called once when the message is complete.
172 // This will be called even if there is no body.
173 //
174 void
175 on_complete(error_code& ec);
176 };
177 @endcode
178
179 If a callback sets the error code, the error will be propagated
180 to the caller of the parser. Behavior of parsing after an error
181 is returned is undefined.
182
183 When the parser state is positioned to read bytes belonging to
184 the body, calling @ref write or @ref write will implicitly
185 cause a buffer copy (because bytes are first transferred to the
186 dynamic buffer). To avoid this copy, the additional functions
187 @ref copy_body, @ref prepare_body, and @ref commit_body are
188 provided to allow the caller to read bytes directly into buffers
189 supplied by the parser.
190
191 The parser is optimized for the case where the input buffer
192 sequence consists of a single contiguous buffer. The
193 @ref beast::flat_streambuf class is provided, which guarantees
194 that the input sequence of the stream buffer will be represented
195 by exactly one contiguous buffer. To ensure the optimum performance
196 of the parser, use @ref beast::flat_streambuf with HTTP algorithms
197 such as @ref beast::http::read, @ref beast::http::read_some,
198 @ref beast::http::async_read, and @ref beast::http::async_read_some.
199 Alternatively, the caller may use custom techniques to ensure that
200 the structured portion of the HTTP message (header or chunk header)
201 is contained in a linear buffer.
202
203 @tparam isRequest A `bool` indicating whether the parser will be
204 presented with request or response message.
205
206 @tparam isDirect A `bool` indicating whether the parser interface
207 supports reading body data directly into parser-provided buffers.
208
209 @tparam Derived The derived class type. This is part of the
210 Curiously Recurring Template Pattern interface.
211 */
212 template<bool isRequest, bool isDirect, class Derived>
213 class basic_parser
214 : private detail::basic_parser_base
215 {
216 template<bool OtherIsRequest,
217 bool OtherIsDirect, class OtherDerived>
218 friend class basic_parser;
219
220 // Message will be complete after reading header
221 static unsigned constexpr flagSkipBody = 1<< 0;
222
223
224
225 static unsigned constexpr flagOnBody = 1<< 1;
226
227 // The parser has read at least one byte
228 static unsigned constexpr flagGotSome = 1<< 2;
229
230 // Message semantics indicate a body is expected.
231 // cleared if flagSkipBody set
232 //
233 static unsigned constexpr flagHasBody = 1<< 3;
234
235 static unsigned constexpr flagHTTP11 = 1<< 4;
236 static unsigned constexpr flagNeedEOF = 1<< 5;
237 static unsigned constexpr flagExpectCRLF = 1<< 6;
238 static unsigned constexpr flagFinalChunk = 1<< 7;
239 static unsigned constexpr flagConnectionClose = 1<< 8;
240 static unsigned constexpr flagConnectionUpgrade = 1<< 9;
241 static unsigned constexpr flagConnectionKeepAlive = 1<< 10;
242 static unsigned constexpr flagContentLength = 1<< 11;
243 static unsigned constexpr flagChunked = 1<< 12;
244 static unsigned constexpr flagUpgrade = 1<< 13;
245
246 std::uint64_t len_; // size of chunk or body
247 std::unique_ptr<char[]> buf_;
248 std::size_t buf_len_ = 0;
249 std::size_t skip_ = 0; // search from here
250 std::size_t x_; // scratch variable
251 unsigned f_ = 0; // flags
252 parse_state state_ = parse_state::header;
253 boost::string_ref ext_;
254 boost::string_ref body_;
255
256 public:
257 /// Copy constructor (disallowed)
258 basic_parser(basic_parser const&) = delete;
259
260 /// Copy assignment (disallowed)
261 basic_parser& operator=(basic_parser const&) = delete;
262
263 /// Default constructor
264 basic_parser() = default;
265
266 /// `true` if this parser parses requests, `false` for responses.
267 static bool constexpr is_request = isRequest;
268
269 /// Destructor
270 ~basic_parser() = default;
271
272 /** Move constructor
273
274 After the move, the only valid operation on the
275 moved-from object is destruction.
276 */
277 template<bool OtherIsDirect, class OtherDerived>
278 basic_parser(basic_parser<
279 isRequest, OtherIsDirect, OtherDerived>&&);
280
281 /** Set the skip body option.
282
283 The option controls whether or not the parser expects to
284 see an HTTP body, regardless of the presence or absence of
285 certain fields such as Content-Length.
286
287 Depending on the request, some responses do not carry a body.
288 For example, a 200 response to a CONNECT request from a
289 tunneling proxy. In these cases, callers may use this function
290 inform the parser that no body is expected. The parser will
291 consider the message complete after the header has been received.
292
293 @note This function must called before any bytes are processed.
294 */
295 void
296 skip_body();
297
298 /** Returns the current parser state.
299
300 The parser state indicates what octets the parser
301 expects to see next in the input stream.
302 */
303 parse_state
304 state() const
305 {
306 return state_;
307 }
308
309 /// Returns `true` if the parser has received at least one byte of input.
310 bool
311 got_some() const
312 {
313 return (f_ & flagGotSome) != 0;
314 }
315
316 /// Returns `true` if the complete header has been parsed.
317 bool
318 got_header() const
319 {
320 return state_ != parse_state::header;
321 }
322
323 /** Returns `true` if a Content-Length is specified.
324
325 @note Only valid after parsing a complete header.
326 */
327 bool
328 got_content_length() const
329 {
330 return (f_ & flagContentLength) != 0;
331 }
332
333 /** Returns `true` if the message is complete.
334
335 The message is complete after a full header is
336 parsed and one of the following is true:
337
338 @li @ref skip_body was called
339
340 @li The semantics of the message indicate there is no body.
341
342 @li The semantics of the message indicate a body is
343 expected, and the entire body was received.
344 */
345 bool
346 is_complete() const
347 {
348 return state_ == parse_state::complete;
349 }
350
351 /** Returns `true` if the message is an upgrade message.
352
353 @note Only valid after parsing a complete header.
354 */
355 bool
356 is_upgrade() const
357 {
358 return (f_ & flagConnectionUpgrade) != 0;
359 }
360
361 /** Returns `true` if keep-alive is specified
362
363 @note Only valid after parsing a complete header.
364 */
365 bool
366 is_keep_alive() const;
367
368 /** Returns `true` if the chunked Transfer-Encoding is specified.
369
370 @note Only valid after parsing a complete header.
371 */
372 bool
373 is_chunked() const
374 {
375 return (f_ & flagChunked) != 0;
376 }
377
378 /** Write part of a buffer sequence to the parser.
379
380 This function attempts to parse the HTTP message
381 stored in the caller provided buffers. Upon success,
382 a positive return value indicates that the parser
383 made forward progress, consuming that number of
384 bytes.
385
386 A return value of zero indicates that the parser
387 requires additional input. In this case the caller
388 should append additional bytes to the input buffer
389 sequence and call @ref write again.
390
391 @param buffers An object meeting the requirements of
392 @b ConstBufferSequence that represents the message.
393
394 @param ec Set to the error, if any occurred.
395
396 @return The number of bytes consumed in the buffer
397 sequence.
398 */
399 template<class ConstBufferSequence>
400 std::size_t
401 write(ConstBufferSequence const& buffers, error_code& ec);
402
403 #if ! BEAST_DOXYGEN
404 std::size_t
405 write(boost::asio::const_buffers_1 const& buffer,
406 error_code& ec);
407 #endif
408
409 /** Inform the parser that the end of stream was reached.
410
411 In certain cases, HTTP needs to know where the end of
412 the stream is. For example, sometimes servers send
413 responses without Content-Length and expect the client
414 to consume input (for the body) until EOF. Callbacks
415 and errors will still be processed as usual.
416
417 This is typically called when a read from the
418 underlying stream object sets the error code to
419 `boost::asio::error::eof`.
420
421 @note Only valid after parsing a complete header.
422
423 @param ec Set to the error, if any occurred.
424 */
425 void
426 write_eof(error_code& ec);
427
428 /** Returns the number of bytes remaining in the body or chunk.
429
430 If a Content-Length is specified and the parser state
431 is equal to @ref beast::http::parse_state::body, this will return
432 the number of bytes remaining in the body. If the
433 chunked Transfer-Encoding is indicated and the parser
434 state is equal to @ref beast::http::parse_state::chunk_body, this
435 will return the number of bytes remaining in the chunk.
436 Otherwise, the function behavior is undefined.
437 */
438 std::uint64_t
439 size() const
440 {
441 BOOST_ASSERT(
442 state_ == parse_state::body ||
443 state_ == parse_state::chunk_body);
444 return len_;
445 }
446
447 /** Returns the body data parsed in the last call to @ref write.
448
449 This buffer is invalidated after any call to @ref write
450 or @ref write_eof.
451
452 @note If the last call to @ref write came from the input
453 area of a @b DynamicBuffer object, a call to the dynamic
454 buffer's `consume` function may invalidate this return
455 value.
456 */
457 boost::string_ref const&
458 body() const
459 {
460 // This function not available when isDirect==true
461 static_assert(! isDirect, "");
462 return body_;
463 }
464
465 /** Returns the chunk extension parsed in the last call to @ref write.
466
467 This buffer is invalidated after any call to @ref write
468 or @ref write_eof.
469
470 @note If the last call to @ref write came from the input
471 area of a @b DynamicBuffer object, a call to the dynamic
472 buffer's `consume` function may invalidate this return
473 value.
474 */
475 boost::string_ref const&
476 chunk_extension() const
477 {
478 // This function not available when isDirect==true
479 static_assert(! isDirect, "");
480 return ext_;
481 }
482
483 /** Returns the optional value of Content-Length if known.
484
485 @note The return value is undefined unless a complete
486 header has been parsed.
487 */
488 boost::optional<std::uint64_t>
489 content_length() const
490 {
491 BOOST_ASSERT(got_header());
492 if(! (f_ & flagContentLength))
493 return boost::none;
494 return len_;
495 }
496
497 /** Copy leftover body data from the dynamic buffer.
498
499 @note This member function is only available when
500 `isDirect==true`.
501
502 @return The number of bytes processed from the dynamic
503 buffer. The caller should remove these bytes by calling
504 `consume` on the buffer.
505 */
506 template<class DynamicBuffer>
507 std::size_t
508 copy_body(DynamicBuffer& dynabuf);
509
510 /** Returns a set of buffers for storing body data.
511
512 @note This member function is only available when
513 `isDirect==true`.
514
515 @param limit The maximum number of bytes in the
516 size of the returned buffer sequence. The actual size
517 of the buffer sequence may be lower than this number.
518 */
519 template<class MutableBufferSequence>
520 void
521 prepare_body(boost::optional<
522 MutableBufferSequence>& buffers, std::size_t limit);
523
524 /** Commit body data.
525
526 @note This member function is only available when
527 `isDirect==true`.
528 */
529 void
530 commit_body(std::size_t n);
531
532 /** Indicate that body octets have been consumed.
533 */
534 void
535 consume(std::size_t n)
536 {
537 BOOST_ASSERT(n <= len_);
538 BOOST_ASSERT(
539 state_ == parse_state::body ||
540 state_ == parse_state::chunk_body);
541 len_ -= n;
542 if(len_ == 0)
543 {
544 if(state_ == parse_state::body)
545 state_ = parse_state::complete;
546 else
547 state_ = parse_state::chunk_header;
548 }
549 }
550
551 /** Consume all remaining body data.
552
553 This function instructs the parser to advance the
554 state past any expected body octets. Callers who
555 wish to read and process the body themselves will
556 call this function.
557 */
558 void
559 consume_body(error_code& ec);
560
561 private:
562 inline
563 Derived&
564 impl()
565 {
566 return *static_cast<Derived*>(this);
567 }
568
569 template<class ConstBufferSequence>
570 boost::string_ref
571 maybe_flatten(
572 ConstBufferSequence const& buffers);
573
574 std::size_t
575 do_write(boost::asio::const_buffers_1 const& buffer,
576 error_code& ec, std::true_type);
577
578 std::size_t
579 do_write(boost::asio::const_buffers_1 const& buffer,
580 error_code& ec, std::false_type);
581
582 void
583 parse_startline(char const*& it,
584 int& version, int& status,
585 error_code& ec, std::true_type);
586
587 void
588 parse_startline(char const*& it,
589 int& version, int& status,
590 error_code& ec, std::false_type);
591
592 void
593 parse_fields(char const*& it,
594 char const* last, error_code& ec);
595
596 void
597 do_field(
598 boost::string_ref const& name,
599 boost::string_ref const& value,
600 error_code& ec);
601
602 std::size_t
603 parse_header(char const* p,
604 std::size_t n, error_code& ec);
605
606 void
607 do_header(int, std::true_type);
608
609 void
610 do_header(int status, std::false_type);
611
612 void
613 maybe_do_body_direct();
614
615 void
616 maybe_do_body_indirect(error_code& ec);
617
618 std::size_t
619 parse_chunk_header(char const* p,
620 std::size_t n, error_code& ec);
621
622 std::size_t
623 parse_body(char const* p,
624 std::size_t n, error_code& ec);
625
626 std::size_t
627 parse_body_to_eof(char const* p,
628 std::size_t n, error_code& ec);
629
630 std::size_t
631 parse_chunk_body(char const* p,
632 std::size_t n, error_code& ec);
633
634 void
635 do_complete(error_code& ec);
636 };
637
638 } // http
639 } // beast
640
641 #include <beast/http/impl/basic_parser.ipp>
642
643 #endif