]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/beast/http/basic_parser.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / beast / http / basic_parser.hpp
1 //
2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9
10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
12
13 #include <boost/beast/core/detail/config.hpp>
14 #include <boost/beast/core/error.hpp>
15 #include <boost/beast/core/string.hpp>
16 #include <boost/beast/http/field.hpp>
17 #include <boost/beast/http/verb.hpp>
18 #include <boost/beast/http/detail/basic_parser.hpp>
19 #include <boost/asio/buffer.hpp>
20 #include <boost/optional.hpp>
21 #include <boost/assert.hpp>
22 #include <limits>
23 #include <memory>
24 #include <type_traits>
25 #include <utility>
26
27 namespace boost {
28 namespace beast {
29 namespace http {
30
31 /** A parser for decoding HTTP/1 wire format messages.
32
33 This parser is designed to efficiently parse messages in the
34 HTTP/1 wire format. It allocates no memory when input is
35 presented as a single contiguous buffer, and uses minimal
36 state. It will handle chunked encoding and it understands
37 the semantics of the Connection, Content-Length, and Upgrade
38 fields.
39 The parser is optimized for the case where the input buffer
40 sequence consists of a single contiguous buffer. The
41 @ref flat_buffer class is provided, which guarantees
42 that the input sequence of the stream buffer will be represented
43 by exactly one contiguous buffer. To ensure the optimum performance
44 of the parser, use @ref flat_buffer with HTTP algorithms
45 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
46 Alternatively, the caller may use custom techniques to ensure that
47 the structured portion of the HTTP message (header or chunk header)
48 is contained in a linear buffer.
49
50 The interface uses CRTP (Curiously Recurring Template Pattern).
51 To use this class directly, derive from @ref basic_parser. When
52 bytes are presented, the implementation will make a series of zero
53 or more calls to derived class members functions (termed "callbacks"
54 in this context) matching a specific signature.
55
56 Every callback must be provided by the derived class, or else
57 a compilation error will be generated. This exemplar shows
58 the signature and description of the callbacks required in
59 the derived class.
60 For each callback, the function will ensure that `!ec` is `true`
61 if there was no error or set to the appropriate error code if
62 there was one. If an error is set, the value is propagated to
63 the caller of the parser.
64
65 @par Derived Class Requirements
66 @code
67 template<bool isRequest>
68 class derived
69 : public basic_parser<isRequest, derived<isRequest>>
70 {
71 private:
72 // The friend declaration is needed,
73 // otherwise the callbacks must be made public.
74 friend class basic_parser<isRequest, derived>;
75
76 /// Called after receiving the request-line (isRequest == true).
77 void
78 on_request_impl(
79 verb method, // The method verb, verb::unknown if no match
80 string_view method_str, // The method as a string
81 string_view target, // The request-target
82 int version, // The HTTP-version
83 error_code& ec); // The error returned to the caller, if any
84
85 /// Called after receiving the start-line (isRequest == false).
86 void
87 on_response_impl(
88 int code, // The status-code
89 string_view reason, // The obsolete reason-phrase
90 int version, // The HTTP-version
91 error_code& ec); // The error returned to the caller, if any
92
93 /// Called after receiving a header field.
94 void
95 on_field_impl(
96 field f, // The known-field enumeration constant
97 string_view name, // The field name string.
98 string_view value, // The field value
99 error_code& ec); // The error returned to the caller, if any
100
101 /// Called after the complete header is received.
102 void
103 on_header_impl(
104 error_code& ec); // The error returned to the caller, if any
105
106 /// Called just before processing the body, if a body exists.
107 void
108 on_body_init_impl(
109 boost::optional<
110 std::uint64_t> const&
111 content_length, // Content length if known, else `boost::none`
112 error_code& ec); // The error returned to the caller, if any
113
114 /// Called for each piece of the body, if a body exists.
115 //!
116 //! This is used when there is no chunked transfer coding.
117 //!
118 //! The function returns the number of bytes consumed from the
119 //! input buffer. Any input octets not consumed will be will be
120 //! presented on subsequent calls.
121 //!
122 std::size_t
123 on_body_impl(
124 string_view s, // A portion of the body
125 error_code& ec); // The error returned to the caller, if any
126
127 /// Called for each chunk header.
128 void
129 on_chunk_header_impl(
130 std::uint64_t size, // The size of the upcoming chunk,
131 // or zero for the last chunk
132 string_view extension, // The chunk extensions (may be empty)
133 error_code& ec); // The error returned to the caller, if any
134
135 /// Called to deliver the chunk body.
136 //!
137 //! This is used when there is a chunked transfer coding. The
138 //! implementation will automatically remove the encoding before
139 //! calling this function.
140 //!
141 //! The function returns the number of bytes consumed from the
142 //! input buffer. Any input octets not consumed will be will be
143 //! presented on subsequent calls.
144 //!
145 std::size_t
146 on_chunk_body_impl(
147 std::uint64_t remain, // The number of bytes remaining in the chunk,
148 // including what is being passed here.
149 // or zero for the last chunk
150 string_view body, // The next piece of the chunk body
151 error_code& ec); // The error returned to the caller, if any
152
153 /// Called when the complete message is parsed.
154 void
155 on_finish_impl(error_code& ec);
156
157 public:
158 derived() = default;
159 };
160 @endcode
161
162 @tparam isRequest A `bool` indicating whether the parser will be
163 presented with request or response message.
164
165 @tparam Derived The derived class type. This is part of the
166 Curiously Recurring Template Pattern interface.
167
168 @note If the parser encounters a field value with obs-fold
169 longer than 4 kilobytes in length, an error is generated.
170 */
171 template<bool isRequest, class Derived>
172 class basic_parser
173 : private detail::basic_parser_base
174 {
175 template<bool OtherIsRequest, class OtherDerived>
176 friend class basic_parser;
177
178 // limit on the size of the stack flat buffer
179 static std::size_t constexpr max_stack_buffer = 8192;
180
181 // Message will be complete after reading header
182 static unsigned constexpr flagSkipBody = 1<< 0;
183
184 // Consume input buffers across semantic boundaries
185 static unsigned constexpr flagEager = 1<< 1;
186
187 // The parser has read at least one byte
188 static unsigned constexpr flagGotSome = 1<< 2;
189
190 // Message semantics indicate a body is expected.
191 // cleared if flagSkipBody set
192 //
193 static unsigned constexpr flagHasBody = 1<< 3;
194
195 static unsigned constexpr flagHTTP11 = 1<< 4;
196 static unsigned constexpr flagNeedEOF = 1<< 5;
197 static unsigned constexpr flagExpectCRLF = 1<< 6;
198 static unsigned constexpr flagConnectionClose = 1<< 7;
199 static unsigned constexpr flagConnectionUpgrade = 1<< 8;
200 static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
201 static unsigned constexpr flagContentLength = 1<< 10;
202 static unsigned constexpr flagChunked = 1<< 11;
203 static unsigned constexpr flagUpgrade = 1<< 12;
204 static unsigned constexpr flagFinalChunk = 1<< 13;
205
206 static constexpr
207 std::uint64_t
208 default_body_limit(std::true_type)
209 {
210 // limit for requests
211 return 1 * 1024 * 1024; // 1MB
212 }
213
214 static constexpr
215 std::uint64_t
216 default_body_limit(std::false_type)
217 {
218 // limit for responses
219 return 8 * 1024 * 1024; // 8MB
220 }
221
222 std::uint64_t body_limit_ =
223 default_body_limit(is_request{}); // max payload body
224 std::uint64_t len_ = 0; // size of chunk or body
225 std::unique_ptr<char[]> buf_; // temp storage
226 std::size_t buf_len_ = 0; // size of buf_
227 std::size_t skip_ = 0; // resume search here
228 std::uint32_t header_limit_ = 8192; // max header size
229 unsigned short status_ = 0; // response status
230 state state_ = state::nothing_yet; // initial state
231 unsigned f_ = 0; // flags
232
233 protected:
234 /// Default constructor
235 basic_parser() = default;
236
237 /// Move constructor
238 basic_parser(basic_parser &&) = default;
239
240 /// Move assignment
241 basic_parser& operator=(basic_parser &&) = default;
242
243 /** Move constructor
244
245 @note
246
247 After the move, the only valid operation on the
248 moved-from object is destruction.
249 */
250 template<class OtherDerived>
251 basic_parser(basic_parser<isRequest, OtherDerived>&&);
252
253 public:
254 /// `true` if this parser parses requests, `false` for responses.
255 using is_request =
256 std::integral_constant<bool, isRequest>;
257
258 /// Destructor
259 ~basic_parser() = default;
260
261 /// Copy constructor
262 basic_parser(basic_parser const&) = delete;
263
264 /// Copy assignment
265 basic_parser& operator=(basic_parser const&) = delete;
266
267 /** Returns a reference to this object as a @ref basic_parser.
268
269 This is used to pass a derived class where a base class is
270 expected, to choose a correct function overload when the
271 resolution would be ambiguous.
272 */
273 basic_parser&
274 base()
275 {
276 return *this;
277 }
278
279 /** Returns a constant reference to this object as a @ref basic_parser.
280
281 This is used to pass a derived class where a base class is
282 expected, to choose a correct function overload when the
283 resolution would be ambiguous.
284 */
285 basic_parser const&
286 base() const
287 {
288 return *this;
289 }
290
291 /// Returns `true` if the parser has received at least one byte of input.
292 bool
293 got_some() const
294 {
295 return state_ != state::nothing_yet;
296 }
297
298 /** Returns `true` if the message is complete.
299
300 The message is complete after the full header is prduced
301 and one of the following is true:
302
303 @li The skip body option was set.
304
305 @li The semantics of the message indicate there is no body.
306
307 @li The semantics of the message indicate a body is expected,
308 and the entire body was parsed.
309 */
310 bool
311 is_done() const
312 {
313 return state_ == state::complete;
314 }
315
316 /** Returns `true` if a the parser has produced the full header.
317 */
318 bool
319 is_header_done() const
320 {
321 return state_ > state::fields;
322 }
323
324 /** Returns `true` if the message is an upgrade message.
325
326 @note The return value is undefined unless
327 @ref is_header_done would return `true`.
328 */
329 bool
330 upgrade() const
331 {
332 return (f_ & flagConnectionUpgrade) != 0;
333 }
334
335 /** Returns `true` if the last value for Transfer-Encoding is "chunked".
336
337 @note The return value is undefined unless
338 @ref is_header_done would return `true`.
339 */
340 bool
341 chunked() const
342 {
343 return (f_ & flagChunked) != 0;
344 }
345
346 /** Returns `true` if the message has keep-alive connection semantics.
347
348 This function always returns `false` if @ref need_eof would return
349 `false`.
350
351 @note The return value is undefined unless
352 @ref is_header_done would return `true`.
353 */
354 bool
355 keep_alive() const;
356
357 /** Returns the optional value of Content-Length if known.
358
359 @note The return value is undefined unless
360 @ref is_header_done would return `true`.
361 */
362 boost::optional<std::uint64_t>
363 content_length() const;
364
365 /** Returns `true` if the message semantics require an end of file.
366
367 Depending on the contents of the header, the parser may
368 require and end of file notification to know where the end
369 of the body lies. If this function returns `true` it will be
370 necessary to call @ref put_eof when there will never be additional
371 data from the input.
372 */
373 bool
374 need_eof() const
375 {
376 return (f_ & flagNeedEOF) != 0;
377 }
378
379 /** Set the limit on the payload body.
380
381 This function sets the maximum allowed size of the payload body,
382 before any encodings except chunked have been removed. Depending
383 on the message semantics, one of these cases will apply:
384
385 @li The Content-Length is specified and exceeds the limit. In
386 this case the result @ref error::body_limit is returned
387 immediately after the header is parsed.
388
389 @li The Content-Length is unspecified and the chunked encoding
390 is not specified as the last encoding. In this case the end of
391 message is determined by the end of file indicator on the
392 associated stream or input source. If a sufficient number of
393 body payload octets are presented to the parser to exceed the
394 configured limit, the parse fails with the result
395 @ref error::body_limit
396
397 @li The Transfer-Encoding specifies the chunked encoding as the
398 last encoding. In this case, when the number of payload body
399 octets produced by removing the chunked encoding exceeds
400 the configured limit, the parse fails with the result
401 @ref error::body_limit.
402
403 Setting the limit after any body octets have been parsed
404 results in undefined behavior.
405
406 The default limit is 1MB for requests and 8MB for responses.
407
408 @param v The payload body limit to set
409 */
410 void
411 body_limit(std::uint64_t v)
412 {
413 body_limit_ = v;
414 }
415
416 /** Set a limit on the total size of the header.
417
418 This function sets the maximum allowed size of the header
419 including all field name, value, and delimiter characters
420 and also including the CRLF sequences in the serialized
421 input. If the end of the header is not found within the
422 limit of the header size, the error @ref error::header_limit
423 is returned by @ref put.
424
425 Setting the limit after any header octets have been parsed
426 results in undefined behavior.
427 */
428 void
429 header_limit(std::uint32_t v)
430 {
431 header_limit_ = v;
432 }
433
434 /// Returns `true` if the eager parse option is set.
435 bool
436 eager() const
437 {
438 return (f_ & flagEager) != 0;
439 }
440
441 /** Set the eager parse option.
442
443 Normally the parser returns after successfully parsing a structured
444 element (header, chunk header, or chunk body) even if there are octets
445 remaining in the input. This is necessary when attempting to parse the
446 header first, or when the caller wants to inspect information which may
447 be invalidated by subsequent parsing, such as a chunk extension. The
448 `eager` option controls whether the parser keeps going after parsing
449 structured element if there are octets remaining in the buffer and no
450 error occurs. This option is automatically set or cleared during certain
451 stream operations to improve performance with no change in functionality.
452
453 The default setting is `false`.
454
455 @param v `true` to set the eager parse option or `false` to disable it.
456 */
457 void
458 eager(bool v)
459 {
460 if(v)
461 f_ |= flagEager;
462 else
463 f_ &= ~flagEager;
464 }
465
466 /// Returns `true` if the skip parse option is set.
467 bool
468 skip() const
469 {
470 return (f_ & flagSkipBody) != 0;
471 }
472
473 /** Set the skip parse option.
474
475 This option controls whether or not the parser expects to see an HTTP
476 body, regardless of the presence or absence of certain fields such as
477 Content-Length or a chunked Transfer-Encoding. Depending on the request,
478 some responses do not carry a body. For example, a 200 response to a
479 CONNECT request from a tunneling proxy, or a response to a HEAD request.
480 In these cases, callers may use this function inform the parser that
481 no body is expected. The parser will consider the message complete
482 after the header has been received.
483
484 @param v `true` to set the skip body option or `false` to disable it.
485
486 @note This function must called before any bytes are processed.
487 */
488 void
489 skip(bool v);
490
491 /** Write a buffer sequence to the parser.
492
493 This function attempts to incrementally parse the HTTP
494 message data stored in the caller provided buffers. Upon
495 success, a positive return value indicates that the parser
496 made forward progress, consuming that number of
497 bytes.
498
499 In some cases there may be an insufficient number of octets
500 in the input buffer in order to make forward progress. This
501 is indicated by the code @ref error::need_more. When
502 this happens, the caller should place additional bytes into
503 the buffer sequence and call @ref put again.
504
505 The error code @ref error::need_more is special. When this
506 error is returned, a subsequent call to @ref put may succeed
507 if the buffers have been updated. Otherwise, upon error
508 the parser may not be restarted.
509
510 @param buffers An object meeting the requirements of
511 @b ConstBufferSequence that represents the next chunk of
512 message data. If the length of this buffer sequence is
513 one, the implementation will not allocate additional memory.
514 The class @ref beast::flat_buffer is provided as one way to
515 meet this requirement
516
517 @param ec Set to the error, if any occurred.
518
519 @return The number of octets consumed in the buffer
520 sequence. The caller should remove these octets even if the
521 error is set.
522 */
523 template<class ConstBufferSequence>
524 std::size_t
525 put(ConstBufferSequence const& buffers, error_code& ec);
526
527 #if ! BOOST_BEAST_DOXYGEN
528 std::size_t
529 put(boost::asio::const_buffer const& buffer,
530 error_code& ec);
531 #endif
532
533 /** Inform the parser that the end of stream was reached.
534
535 In certain cases, HTTP needs to know where the end of
536 the stream is. For example, sometimes servers send
537 responses without Content-Length and expect the client
538 to consume input (for the body) until EOF. Callbacks
539 and errors will still be processed as usual.
540
541 This is typically called when a read from the
542 underlying stream object sets the error code to
543 `boost::asio::error::eof`.
544
545 @note Only valid after parsing a complete header.
546
547 @param ec Set to the error, if any occurred.
548 */
549 void
550 put_eof(error_code& ec);
551
552 private:
553 inline
554 Derived&
555 impl()
556 {
557 return *static_cast<Derived*>(this);
558 }
559
560 template<class ConstBufferSequence>
561 std::size_t
562 put_from_stack(std::size_t size,
563 ConstBufferSequence const& buffers,
564 error_code& ec);
565
566 void
567 maybe_need_more(
568 char const* p, std::size_t n,
569 error_code& ec);
570
571 void
572 parse_start_line(
573 char const*& p, char const* last,
574 error_code& ec, std::true_type);
575
576 void
577 parse_start_line(
578 char const*& p, char const* last,
579 error_code& ec, std::false_type);
580
581 void
582 parse_fields(
583 char const*& p, char const* last,
584 error_code& ec);
585
586 void
587 finish_header(
588 error_code& ec, std::true_type);
589
590 void
591 finish_header(
592 error_code& ec, std::false_type);
593
594 void
595 parse_body(char const*& p,
596 std::size_t n, error_code& ec);
597
598 void
599 parse_body_to_eof(char const*& p,
600 std::size_t n, error_code& ec);
601
602 void
603 parse_chunk_header(char const*& p,
604 std::size_t n, error_code& ec);
605
606 void
607 parse_chunk_body(char const*& p,
608 std::size_t n, error_code& ec);
609
610 void
611 do_field(field f,
612 string_view value, error_code& ec);
613 };
614
615 } // http
616 } // beast
617 } // boost
618
619 #include <boost/beast/http/impl/basic_parser.ipp>
620
621 #endif