]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/beast/http/basic_parser.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / beast / http / basic_parser.hpp
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9
10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
12
13 #include <boost/beast/core/detail/config.hpp>
14 #include <boost/beast/core/error.hpp>
15 #include <boost/beast/core/string.hpp>
16 #include <boost/beast/http/field.hpp>
17 #include <boost/beast/http/verb.hpp>
18 #include <boost/beast/http/detail/basic_parser.hpp>
19 #include <boost/asio/buffer.hpp>
20 #include <boost/optional.hpp>
21 #include <boost/assert.hpp>
22 #include <limits>
23 #include <memory>
24 #include <type_traits>
25 #include <utility>
26
27 namespace boost {
28 namespace beast {
29 namespace http {
30
31 /** A parser for decoding HTTP/1 wire format messages.
32
33 This parser is designed to efficiently parse messages in the
34 HTTP/1 wire format. It allocates no memory when input is
35 presented as a single contiguous buffer, and uses minimal
36 state. It will handle chunked encoding and it understands
37 the semantics of the Connection, Content-Length, and Upgrade
38 fields.
39 The parser is optimized for the case where the input buffer
40 sequence consists of a single contiguous buffer. The
41 @ref beast::basic_flat_buffer class is provided, which guarantees
42 that the input sequence of the stream buffer will be represented
43 by exactly one contiguous buffer. To ensure the optimum performance
44 of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
45 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
46 Alternatively, the caller may use custom techniques to ensure that
47 the structured portion of the HTTP message (header or chunk header)
48 is contained in a linear buffer.
49
50 The interface to the parser uses virtual member functions.
51 To use this class, derive your type from @ref basic_parser. When
52 bytes are presented, the implementation will make a series of zero
53 or more calls to virtual functions, which the derived class must
54 implement.
55
56 Every virtual function must be provided by the derived class,
57 or else a compilation error will be generated. The implementation
58 will make sure that `ec` is clear before each virtual function
59 is invoked. If a virtual function sets an error, it is propagated
60 out of the parser to the caller.
61
62 @tparam isRequest A `bool` indicating whether the parser will be
63 presented with request or response message.
64
65 @note If the parser encounters a field value with obs-fold
66 longer than 4 kilobytes in length, an error is generated.
67 */
68 template<bool isRequest>
69 class basic_parser
70 : private detail::basic_parser_base
71 {
72 std::uint64_t body_limit_ =
73 default_body_limit(is_request{}); // max payload body
74 std::uint64_t len_ = 0; // size of chunk or body
75 std::uint64_t len0_ = 0; // content length if known
76 std::unique_ptr<char[]> buf_; // temp storage
77 std::size_t buf_len_ = 0; // size of buf_
78 std::size_t skip_ = 0; // resume search here
79 std::uint32_t header_limit_ = 8192; // max header size
80 unsigned short status_ = 0; // response status
81 state state_ = state::nothing_yet; // initial state
82 unsigned f_ = 0; // flags
83
84 // limit on the size of the stack flat buffer
85 static std::size_t constexpr max_stack_buffer = 8192;
86
87 // Message will be complete after reading header
88 static unsigned constexpr flagSkipBody = 1<< 0;
89
90 // Consume input buffers across semantic boundaries
91 static unsigned constexpr flagEager = 1<< 1;
92
93 // The parser has read at least one byte
94 static unsigned constexpr flagGotSome = 1<< 2;
95
96 // Message semantics indicate a body is expected.
97 // cleared if flagSkipBody set
98 //
99 static unsigned constexpr flagHasBody = 1<< 3;
100
101 static unsigned constexpr flagHTTP11 = 1<< 4;
102 static unsigned constexpr flagNeedEOF = 1<< 5;
103 static unsigned constexpr flagExpectCRLF = 1<< 6;
104 static unsigned constexpr flagConnectionClose = 1<< 7;
105 static unsigned constexpr flagConnectionUpgrade = 1<< 8;
106 static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
107 static unsigned constexpr flagContentLength = 1<< 10;
108 static unsigned constexpr flagChunked = 1<< 11;
109 static unsigned constexpr flagUpgrade = 1<< 12;
110 static unsigned constexpr flagFinalChunk = 1<< 13;
111
112 static constexpr
113 std::uint64_t
114 default_body_limit(std::true_type)
115 {
116 // limit for requests
117 return 1 * 1024 * 1024; // 1MB
118 }
119
120 static constexpr
121 std::uint64_t
122 default_body_limit(std::false_type)
123 {
124 // limit for responses
125 return 8 * 1024 * 1024; // 8MB
126 }
127
128 template<bool OtherIsRequest>
129 friend class basic_parser;
130
131 friend class basic_parser_test;
132
133 protected:
134 /// Default constructor
135 basic_parser() = default;
136
137 /** Move constructor
138
139 @note
140
141 After the move, the only valid operation on the
142 moved-from object is destruction.
143 */
144 basic_parser(basic_parser &&) = default;
145
146 /// Move assignment
147 basic_parser& operator=(basic_parser &&) = default;
148
149 public:
150 /// `true` if this parser parses requests, `false` for responses.
151 using is_request =
152 std::integral_constant<bool, isRequest>;
153
154 /// Destructor
155 virtual ~basic_parser() = default;
156
157 /// Copy constructor
158 basic_parser(basic_parser const&) = delete;
159
160 /// Copy assignment
161 basic_parser& operator=(basic_parser const&) = delete;
162
163 /// Returns `true` if the parser has received at least one byte of input.
164 bool
165 got_some() const
166 {
167 return state_ != state::nothing_yet;
168 }
169
170 /** Returns `true` if the message is complete.
171
172 The message is complete after the full header is prduced
173 and one of the following is true:
174
175 @li The skip body option was set.
176
177 @li The semantics of the message indicate there is no body.
178
179 @li The semantics of the message indicate a body is expected,
180 and the entire body was parsed.
181 */
182 bool
183 is_done() const
184 {
185 return state_ == state::complete;
186 }
187
188 /** Returns `true` if a the parser has produced the full header.
189 */
190 bool
191 is_header_done() const
192 {
193 return state_ > state::fields;
194 }
195
196 /** Returns `true` if the message is an upgrade message.
197
198 @note The return value is undefined unless
199 @ref is_header_done would return `true`.
200 */
201 bool
202 upgrade() const
203 {
204 return (f_ & flagConnectionUpgrade) != 0;
205 }
206
207 /** Returns `true` if the last value for Transfer-Encoding is "chunked".
208
209 @note The return value is undefined unless
210 @ref is_header_done would return `true`.
211 */
212 bool
213 chunked() const
214 {
215 return (f_ & flagChunked) != 0;
216 }
217
218 /** Returns `true` if the message has keep-alive connection semantics.
219
220 This function always returns `false` if @ref need_eof would return
221 `false`.
222
223 @note The return value is undefined unless
224 @ref is_header_done would return `true`.
225 */
226 bool
227 keep_alive() const;
228
229 /** Returns the optional value of Content-Length if known.
230
231 @note The return value is undefined unless
232 @ref is_header_done would return `true`.
233 */
234 boost::optional<std::uint64_t>
235 content_length() const;
236
237 /** Returns the remaining content length if known
238
239 If the message header specifies a Content-Length,
240 the return value will be the number of bytes remaining
241 in the payload body have not yet been parsed.
242
243 @note The return value is undefined unless
244 @ref is_header_done would return `true`.
245 */
246 boost::optional<std::uint64_t>
247 content_length_remaining() const;
248
249 /** Returns `true` if the message semantics require an end of file.
250
251 Depending on the contents of the header, the parser may
252 require and end of file notification to know where the end
253 of the body lies. If this function returns `true` it will be
254 necessary to call @ref put_eof when there will never be additional
255 data from the input.
256 */
257 bool
258 need_eof() const
259 {
260 return (f_ & flagNeedEOF) != 0;
261 }
262
263 /** Set the limit on the payload body.
264
265 This function sets the maximum allowed size of the payload body,
266 before any encodings except chunked have been removed. Depending
267 on the message semantics, one of these cases will apply:
268
269 @li The Content-Length is specified and exceeds the limit. In
270 this case the result @ref error::body_limit is returned
271 immediately after the header is parsed.
272
273 @li The Content-Length is unspecified and the chunked encoding
274 is not specified as the last encoding. In this case the end of
275 message is determined by the end of file indicator on the
276 associated stream or input source. If a sufficient number of
277 body payload octets are presented to the parser to exceed the
278 configured limit, the parse fails with the result
279 @ref error::body_limit
280
281 @li The Transfer-Encoding specifies the chunked encoding as the
282 last encoding. In this case, when the number of payload body
283 octets produced by removing the chunked encoding exceeds
284 the configured limit, the parse fails with the result
285 @ref error::body_limit.
286
287 Setting the limit after any body octets have been parsed
288 results in undefined behavior.
289
290 The default limit is 1MB for requests and 8MB for responses.
291
292 @param v The payload body limit to set
293 */
294 void
295 body_limit(std::uint64_t v)
296 {
297 body_limit_ = v;
298 }
299
300 /** Set a limit on the total size of the header.
301
302 This function sets the maximum allowed size of the header
303 including all field name, value, and delimiter characters
304 and also including the CRLF sequences in the serialized
305 input. If the end of the header is not found within the
306 limit of the header size, the error @ref error::header_limit
307 is returned by @ref put.
308
309 Setting the limit after any header octets have been parsed
310 results in undefined behavior.
311 */
312 void
313 header_limit(std::uint32_t v)
314 {
315 header_limit_ = v;
316 }
317
318 /// Returns `true` if the eager parse option is set.
319 bool
320 eager() const
321 {
322 return (f_ & flagEager) != 0;
323 }
324
325 /** Set the eager parse option.
326
327 Normally the parser returns after successfully parsing a structured
328 element (header, chunk header, or chunk body) even if there are octets
329 remaining in the input. This is necessary when attempting to parse the
330 header first, or when the caller wants to inspect information which may
331 be invalidated by subsequent parsing, such as a chunk extension. The
332 `eager` option controls whether the parser keeps going after parsing
333 structured element if there are octets remaining in the buffer and no
334 error occurs. This option is automatically set or cleared during certain
335 stream operations to improve performance with no change in functionality.
336
337 The default setting is `false`.
338
339 @param v `true` to set the eager parse option or `false` to disable it.
340 */
341 void
342 eager(bool v)
343 {
344 if(v)
345 f_ |= flagEager;
346 else
347 f_ &= ~flagEager;
348 }
349
350 /// Returns `true` if the skip parse option is set.
351 bool
352 skip() const
353 {
354 return (f_ & flagSkipBody) != 0;
355 }
356
357 /** Set the skip parse option.
358
359 This option controls whether or not the parser expects to see an HTTP
360 body, regardless of the presence or absence of certain fields such as
361 Content-Length or a chunked Transfer-Encoding. Depending on the request,
362 some responses do not carry a body. For example, a 200 response to a
363 CONNECT request from a tunneling proxy, or a response to a HEAD request.
364 In these cases, callers may use this function inform the parser that
365 no body is expected. The parser will consider the message complete
366 after the header has been received.
367
368 @param v `true` to set the skip body option or `false` to disable it.
369
370 @note This function must called before any bytes are processed.
371 */
372 void
373 skip(bool v);
374
375 /** Write a buffer sequence to the parser.
376
377 This function attempts to incrementally parse the HTTP
378 message data stored in the caller provided buffers. Upon
379 success, a positive return value indicates that the parser
380 made forward progress, consuming that number of
381 bytes.
382
383 In some cases there may be an insufficient number of octets
384 in the input buffer in order to make forward progress. This
385 is indicated by the code @ref error::need_more. When
386 this happens, the caller should place additional bytes into
387 the buffer sequence and call @ref put again.
388
389 The error code @ref error::need_more is special. When this
390 error is returned, a subsequent call to @ref put may succeed
391 if the buffers have been updated. Otherwise, upon error
392 the parser may not be restarted.
393
394 @param buffers An object meeting the requirements of
395 <em>ConstBufferSequence</em> that represents the next chunk of
396 message data. If the length of this buffer sequence is
397 one, the implementation will not allocate additional memory.
398 The class @ref beast::basic_flat_buffer is provided as one way to
399 meet this requirement
400
401 @param ec Set to the error, if any occurred.
402
403 @return The number of octets consumed in the buffer
404 sequence. The caller should remove these octets even if the
405 error is set.
406 */
407 template<class ConstBufferSequence>
408 std::size_t
409 put(ConstBufferSequence const& buffers, error_code& ec);
410
411 #if ! BOOST_BEAST_DOXYGEN
412 std::size_t
413 put(net::const_buffer buffer,
414 error_code& ec);
415 #endif
416
417 /** Inform the parser that the end of stream was reached.
418
419 In certain cases, HTTP needs to know where the end of
420 the stream is. For example, sometimes servers send
421 responses without Content-Length and expect the client
422 to consume input (for the body) until EOF. Callbacks
423 and errors will still be processed as usual.
424
425 This is typically called when a read from the
426 underlying stream object sets the error code to
427 `net::error::eof`.
428
429 @note Only valid after parsing a complete header.
430
431 @param ec Set to the error, if any occurred.
432 */
433 void
434 put_eof(error_code& ec);
435
436 protected:
437 /** Called after receiving the request-line.
438
439 This virtual function is invoked after receiving a request-line
440 when parsing HTTP requests.
441 It can only be called when `isRequest == true`.
442
443 @param method The verb enumeration. If the method string is not
444 one of the predefined strings, this value will be @ref verb::unknown.
445
446 @param method_str The unmodified string representing the verb.
447
448 @param target The request-target.
449
450 @param version The HTTP-version. This will be 10 for HTTP/1.0,
451 and 11 for HTTP/1.1.
452
453 @param ec An output parameter which the function may set to indicate
454 an error. The error will be clear before this function is invoked.
455 */
456 virtual
457 void
458 on_request_impl(
459 verb method,
460 string_view method_str,
461 string_view target,
462 int version,
463 error_code& ec) = 0;
464
465 /** Called after receiving the status-line.
466
467 This virtual function is invoked after receiving a status-line
468 when parsing HTTP responses.
469 It can only be called when `isRequest == false`.
470
471 @param code The numeric status code.
472
473 @param reason The reason-phrase. Note that this value is
474 now obsolete, and only provided for historical or diagnostic
475 purposes.
476
477 @param version The HTTP-version. This will be 10 for HTTP/1.0,
478 and 11 for HTTP/1.1.
479
480 @param ec An output parameter which the function may set to indicate
481 an error. The error will be clear before this function is invoked.
482 */
483 virtual
484 void
485 on_response_impl(
486 int code,
487 string_view reason,
488 int version,
489 error_code& ec) = 0;
490
491 /** Called once for each complete field in the HTTP header.
492
493 This virtual function is invoked for each field that is received
494 while parsing an HTTP message.
495
496 @param name The known field enum value. If the name of the field
497 is not recognized, this value will be @ref field::unknown.
498
499 @param name_string The exact name of the field as received from
500 the input, represented as a string.
501
502 @param value A string holding the value of the field.
503
504 @param ec An output parameter which the function may set to indicate
505 an error. The error will be clear before this function is invoked.
506 */
507 virtual
508 void
509 on_field_impl(
510 field name,
511 string_view name_string,
512 string_view value,
513 error_code& ec) = 0;
514
515 /** Called once after the complete HTTP header is received.
516
517 This virtual function is invoked once, after the complete HTTP
518 header is received while parsing a message.
519
520 @param ec An output parameter which the function may set to indicate
521 an error. The error will be clear before this function is invoked.
522 */
523 virtual
524 void
525 on_header_impl(error_code& ec) = 0;
526
527 /** Called once before the body is processed.
528
529 This virtual function is invoked once, before the content body is
530 processed (but after the complete header is received).
531
532 @param content_length A value representing the content length in
533 bytes if the length is known (this can include a zero length).
534 Otherwise, the value will be `boost::none`.
535
536 @param ec An output parameter which the function may set to indicate
537 an error. The error will be clear before this function is invoked.
538 */
539 virtual
540 void
541 on_body_init_impl(
542 boost::optional<std::uint64_t> const& content_length,
543 error_code& ec) = 0;
544
545 /** Called each time additional data is received representing the content body.
546
547 This virtual function is invoked for each piece of the body which is
548 received while parsing of a message. This function is only used when
549 no chunked transfer encoding is present.
550
551 @param body A string holding the additional body contents. This may
552 contain nulls or unprintable characters.
553
554 @param ec An output parameter which the function may set to indicate
555 an error. The error will be clear before this function is invoked.
556
557 @see on_chunk_body_impl
558 */
559 virtual
560 std::size_t
561 on_body_impl(
562 string_view body,
563 error_code& ec) = 0;
564
565 /** Called each time a new chunk header of a chunk encoded body is received.
566
567 This function is invoked each time a new chunk header is received.
568 The function is only used when the chunked transfer encoding is present.
569
570 @param size The size of this chunk, in bytes.
571
572 @param extensions A string containing the entire chunk extensions.
573 This may be empty, indicating no extensions are present.
574
575 @param ec An output parameter which the function may set to indicate
576 an error. The error will be clear before this function is invoked.
577 */
578 virtual
579 void
580 on_chunk_header_impl(
581 std::uint64_t size,
582 string_view extensions,
583 error_code& ec) = 0;
584
585 /** Called each time additional data is received representing part of a body chunk.
586
587 This virtual function is invoked for each piece of the body which is
588 received while parsing of a message. This function is only used when
589 no chunked transfer encoding is present.
590
591 @param remain The number of bytes remaining in this chunk. This includes
592 the contents of passed `body`. If this value is zero, then this represents
593 the final chunk.
594
595 @param body A string holding the additional body contents. This may
596 contain nulls or unprintable characters.
597
598 @param ec An output parameter which the function may set to indicate
599 an error. The error will be clear before this function is invoked.
600
601 @return This function should return the number of bytes actually consumed
602 from the `body` value. Any bytes that are not consumed on this call
603 will be presented in a subsequent call.
604
605 @see on_body_impl
606 */
607 virtual
608 std::size_t
609 on_chunk_body_impl(
610 std::uint64_t remain,
611 string_view body,
612 error_code& ec) = 0;
613
614 /** Called once when the complete message is received.
615
616 This virtual function is invoked once, after successfully parsing
617 a complete HTTP message.
618
619 @param ec An output parameter which the function may set to indicate
620 an error. The error will be clear before this function is invoked.
621 */
622 virtual
623 void
624 on_finish_impl(error_code& ec) = 0;
625
626 private:
627 template<class ConstBufferSequence>
628 std::size_t
629 put_from_stack(
630 std::size_t size,
631 ConstBufferSequence const& buffers,
632 error_code& ec);
633
634 void
635 maybe_need_more(
636 char const* p, std::size_t n,
637 error_code& ec);
638
639 void
640 parse_start_line(
641 char const*& p, char const* last,
642 error_code& ec, std::true_type);
643
644 void
645 parse_start_line(
646 char const*& p, char const* last,
647 error_code& ec, std::false_type);
648
649 void
650 parse_fields(
651 char const*& p, char const* last,
652 error_code& ec);
653
654 void
655 finish_header(
656 error_code& ec, std::true_type);
657
658 void
659 finish_header(
660 error_code& ec, std::false_type);
661
662 void
663 parse_body(char const*& p,
664 std::size_t n, error_code& ec);
665
666 void
667 parse_body_to_eof(char const*& p,
668 std::size_t n, error_code& ec);
669
670 void
671 parse_chunk_header(char const*& p,
672 std::size_t n, error_code& ec);
673
674 void
675 parse_chunk_body(char const*& p,
676 std::size_t n, error_code& ec);
677
678 void
679 do_field(field f,
680 string_view value, error_code& ec);
681 };
682
683 } // http
684 } // beast
685 } // boost
686
687 #include <boost/beast/http/impl/basic_parser.hpp>
688 #ifdef BOOST_BEAST_HEADER_ONLY
689 #include <boost/beast/http/impl/basic_parser.ipp>
690 #endif
691
692 #endif