]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // |
2 | // Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com) | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | // | |
7 | ||
8 | #ifndef BEAST_HTTP_BASIC_PARSER_HPP | |
9 | #define BEAST_HTTP_BASIC_PARSER_HPP | |
10 | ||
11 | #include <beast/config.hpp> | |
12 | #include <beast/core/error.hpp> | |
13 | #include <beast/http/detail/basic_parser.hpp> | |
14 | #include <boost/asio/buffer.hpp> | |
15 | #include <boost/optional.hpp> | |
16 | #include <boost/assert.hpp> | |
17 | #include <boost/utility/string_ref.hpp> | |
18 | #include <memory> | |
19 | #include <utility> | |
20 | ||
21 | namespace beast { | |
22 | namespace http { | |
23 | ||
24 | /** Describes the parser's current state. | |
25 | ||
26 | The state is expressed as the type of data that | |
27 | @ref basic_parser is expecting to see in subsequently | |
28 | provided octets. | |
29 | */ | |
30 | enum class parse_state | |
31 | { | |
32 | /// Expecting one or more header octets | |
33 | header = 0, | |
34 | ||
35 | /// Expecting one or more body octets | |
36 | body = 1, | |
37 | ||
38 | /// Expecting zero or more body octets followed by EOF | |
39 | body_to_eof = 2, | |
40 | ||
41 | /// Expecting additional chunk header octets | |
42 | chunk_header = 3, | |
43 | ||
44 | /// Expecting one or more chunk body octets | |
45 | chunk_body = 4, | |
46 | ||
47 | /** The parsing is complete. | |
48 | ||
49 | The parse is considered complete when the full header | |
50 | is received and either the full body is received, or | |
51 | the semantics of the message indicate that no body | |
52 | is expected. This includes the case where the caller | |
53 | has indicated to the parser that no body is expected, | |
54 | for example when receiving a response to a HEAD request. | |
55 | */ | |
56 | complete = 5 | |
57 | }; | |
58 | ||
59 | /** A parser for decoding HTTP/1 wire format messages. | |
60 | ||
61 | This parser is designed to efficiently parse messages in the | |
62 | HTTP/1 wire format. It allocates no memory when input is | |
63 | presented as a single contiguous buffer, and uses minimal | |
64 | state. It will handle chunked encoding and it understands | |
65 | the semantics of the Connection, Content-Length, and Upgrade | |
66 | fields. | |
67 | ||
68 | The interface uses CRTP (Curiously Recurring Template Pattern). | |
69 | To use this class, derive from @ref basic_parser. When bytes | |
70 | are presented, the implementation will make a series of zero | |
71 | or more calls to derived class members functions (referred to | |
72 | as "callbacks" from here on) matching a specific signature. | |
73 | ||
74 | Every callback must be provided by the derived class, or else | |
75 | a compilation error will be generated. This exemplar shows | |
76 | the signature and description of the callbacks required in | |
77 | the derived class. | |
78 | ||
79 | @par Derived Example | |
80 | ||
81 | @code | |
82 | template<bool isRequest> | |
83 | struct derived | |
84 | : basic_parser<isRequest, derived<isRequest>> | |
85 | { | |
86 | // The type used when providing a mutable | |
87 | // buffer sequence in which to store body data. | |
88 | // | |
89 | using mutable_buffers_type = ...; | |
90 | ||
91 | // When isRequest == true, called | |
92 | // after the Request Line is received. | |
93 | // | |
94 | void | |
95 | on_request( | |
96 | boost::string_ref const& method, | |
97 | boost::string_ref const& path, | |
98 | int version, | |
99 | error_code& ec); | |
100 | ||
101 | // When isRequest == false, called | |
102 | // after the Status Line is received. | |
103 | // | |
104 | void | |
105 | on_response( | |
106 | int status, | |
107 | boost::string_ref const& reason, | |
108 | int version, | |
109 | error_code& ec); | |
110 | ||
111 | // Called after receiving a field/value pair. | |
112 | // | |
113 | void | |
114 | on_field( | |
115 | boost::string_ref const& name, | |
116 | boost::string_ref const& value, | |
117 | error_code& ec); | |
118 | ||
119 | // Called after the header is complete. | |
120 | // | |
121 | void | |
122 | on_header( | |
123 | error_code& ec); | |
124 | ||
125 | // Called once before the body, if any, is started. | |
126 | // This will only be called if the semantics of the | |
127 | // message indicate that a body exists, including | |
128 | // an indicated body of zero length. | |
129 | // | |
130 | void | |
131 | on_body(); | |
132 | ||
133 | // Called zero or more times to provide body data. | |
134 | // | |
135 | // Only used if isDirect == false | |
136 | // | |
137 | void | |
138 | on_data( | |
139 | boost::string_ref const& s, | |
140 | error_code& ec); | |
141 | ||
142 | // Called zero or more times to retrieve a mutable | |
143 | // buffer sequence in which to store body data. | |
144 | // | |
145 | // Only used if isDirect == true | |
146 | // | |
147 | mutable_buffers_type | |
148 | on_prepare( | |
149 | std::size_t n); | |
150 | ||
151 | // Called after body data has been stored in the | |
152 | // buffer returned by the previous call to on_prepare. | |
153 | // | |
154 | // Only used if isDirect == true | |
155 | // | |
156 | void | |
157 | on_commit( | |
158 | std::size_t n); | |
159 | ||
160 | // If the Transfer-Encoding is specified, and the | |
161 | // last item in the list of encodings is "chunked", | |
162 | // called after receiving a chunk header or a final | |
163 | // chunk. | |
164 | // | |
165 | void | |
166 | on_chunk( | |
167 | std::uint64_t length, // Length of this chunk | |
168 | boost::string_ref const& ext, // The chunk extensions, if any | |
169 | error_code& ec); | |
170 | ||
171 | // Called once when the message is complete. | |
172 | // This will be called even if there is no body. | |
173 | // | |
174 | void | |
175 | on_complete(error_code& ec); | |
176 | }; | |
177 | @endcode | |
178 | ||
179 | If a callback sets the error code, the error will be propagated | |
180 | to the caller of the parser. Behavior of parsing after an error | |
181 | is returned is undefined. | |
182 | ||
183 | When the parser state is positioned to read bytes belonging to | |
184 | the body, calling @ref write or @ref write will implicitly | |
185 | cause a buffer copy (because bytes are first transferred to the | |
186 | dynamic buffer). To avoid this copy, the additional functions | |
187 | @ref copy_body, @ref prepare_body, and @ref commit_body are | |
188 | provided to allow the caller to read bytes directly into buffers | |
189 | supplied by the parser. | |
190 | ||
191 | The parser is optimized for the case where the input buffer | |
192 | sequence consists of a single contiguous buffer. The | |
193 | @ref beast::flat_streambuf class is provided, which guarantees | |
194 | that the input sequence of the stream buffer will be represented | |
195 | by exactly one contiguous buffer. To ensure the optimum performance | |
196 | of the parser, use @ref beast::flat_streambuf with HTTP algorithms | |
197 | such as @ref beast::http::read, @ref beast::http::read_some, | |
198 | @ref beast::http::async_read, and @ref beast::http::async_read_some. | |
199 | Alternatively, the caller may use custom techniques to ensure that | |
200 | the structured portion of the HTTP message (header or chunk header) | |
201 | is contained in a linear buffer. | |
202 | ||
203 | @tparam isRequest A `bool` indicating whether the parser will be | |
204 | presented with request or response message. | |
205 | ||
206 | @tparam isDirect A `bool` indicating whether the parser interface | |
207 | supports reading body data directly into parser-provided buffers. | |
208 | ||
209 | @tparam Derived The derived class type. This is part of the | |
210 | Curiously Recurring Template Pattern interface. | |
211 | */ | |
212 | template<bool isRequest, bool isDirect, class Derived> | |
213 | class basic_parser | |
214 | : private detail::basic_parser_base | |
215 | { | |
216 | template<bool OtherIsRequest, | |
217 | bool OtherIsDirect, class OtherDerived> | |
218 | friend class basic_parser; | |
219 | ||
220 | // Message will be complete after reading header | |
221 | static unsigned constexpr flagSkipBody = 1<< 0; | |
222 | ||
223 | ||
224 | ||
225 | static unsigned constexpr flagOnBody = 1<< 1; | |
226 | ||
227 | // The parser has read at least one byte | |
228 | static unsigned constexpr flagGotSome = 1<< 2; | |
229 | ||
230 | // Message semantics indicate a body is expected. | |
231 | // cleared if flagSkipBody set | |
232 | // | |
233 | static unsigned constexpr flagHasBody = 1<< 3; | |
234 | ||
235 | static unsigned constexpr flagHTTP11 = 1<< 4; | |
236 | static unsigned constexpr flagNeedEOF = 1<< 5; | |
237 | static unsigned constexpr flagExpectCRLF = 1<< 6; | |
238 | static unsigned constexpr flagFinalChunk = 1<< 7; | |
239 | static unsigned constexpr flagConnectionClose = 1<< 8; | |
240 | static unsigned constexpr flagConnectionUpgrade = 1<< 9; | |
241 | static unsigned constexpr flagConnectionKeepAlive = 1<< 10; | |
242 | static unsigned constexpr flagContentLength = 1<< 11; | |
243 | static unsigned constexpr flagChunked = 1<< 12; | |
244 | static unsigned constexpr flagUpgrade = 1<< 13; | |
245 | ||
246 | std::uint64_t len_; // size of chunk or body | |
247 | std::unique_ptr<char[]> buf_; | |
248 | std::size_t buf_len_ = 0; | |
249 | std::size_t skip_ = 0; // search from here | |
250 | std::size_t x_; // scratch variable | |
251 | unsigned f_ = 0; // flags | |
252 | parse_state state_ = parse_state::header; | |
253 | boost::string_ref ext_; | |
254 | boost::string_ref body_; | |
255 | ||
256 | public: | |
257 | /// Copy constructor (disallowed) | |
258 | basic_parser(basic_parser const&) = delete; | |
259 | ||
260 | /// Copy assignment (disallowed) | |
261 | basic_parser& operator=(basic_parser const&) = delete; | |
262 | ||
263 | /// Default constructor | |
264 | basic_parser() = default; | |
265 | ||
266 | /// `true` if this parser parses requests, `false` for responses. | |
267 | static bool constexpr is_request = isRequest; | |
268 | ||
269 | /// Destructor | |
270 | ~basic_parser() = default; | |
271 | ||
272 | /** Move constructor | |
273 | ||
274 | After the move, the only valid operation on the | |
275 | moved-from object is destruction. | |
276 | */ | |
277 | template<bool OtherIsDirect, class OtherDerived> | |
278 | basic_parser(basic_parser< | |
279 | isRequest, OtherIsDirect, OtherDerived>&&); | |
280 | ||
281 | /** Set the skip body option. | |
282 | ||
283 | The option controls whether or not the parser expects to | |
284 | see an HTTP body, regardless of the presence or absence of | |
285 | certain fields such as Content-Length. | |
286 | ||
287 | Depending on the request, some responses do not carry a body. | |
288 | For example, a 200 response to a CONNECT request from a | |
289 | tunneling proxy. In these cases, callers may use this function | |
290 | inform the parser that no body is expected. The parser will | |
291 | consider the message complete after the header has been received. | |
292 | ||
293 | @note This function must called before any bytes are processed. | |
294 | */ | |
295 | void | |
296 | skip_body(); | |
297 | ||
298 | /** Returns the current parser state. | |
299 | ||
300 | The parser state indicates what octets the parser | |
301 | expects to see next in the input stream. | |
302 | */ | |
303 | parse_state | |
304 | state() const | |
305 | { | |
306 | return state_; | |
307 | } | |
308 | ||
309 | /// Returns `true` if the parser has received at least one byte of input. | |
310 | bool | |
311 | got_some() const | |
312 | { | |
313 | return (f_ & flagGotSome) != 0; | |
314 | } | |
315 | ||
316 | /// Returns `true` if the complete header has been parsed. | |
317 | bool | |
318 | got_header() const | |
319 | { | |
320 | return state_ != parse_state::header; | |
321 | } | |
322 | ||
323 | /** Returns `true` if a Content-Length is specified. | |
324 | ||
325 | @note Only valid after parsing a complete header. | |
326 | */ | |
327 | bool | |
328 | got_content_length() const | |
329 | { | |
330 | return (f_ & flagContentLength) != 0; | |
331 | } | |
332 | ||
333 | /** Returns `true` if the message is complete. | |
334 | ||
335 | The message is complete after a full header is | |
336 | parsed and one of the following is true: | |
337 | ||
338 | @li @ref skip_body was called | |
339 | ||
340 | @li The semantics of the message indicate there is no body. | |
341 | ||
342 | @li The semantics of the message indicate a body is | |
343 | expected, and the entire body was received. | |
344 | */ | |
345 | bool | |
346 | is_complete() const | |
347 | { | |
348 | return state_ == parse_state::complete; | |
349 | } | |
350 | ||
351 | /** Returns `true` if the message is an upgrade message. | |
352 | ||
353 | @note Only valid after parsing a complete header. | |
354 | */ | |
355 | bool | |
356 | is_upgrade() const | |
357 | { | |
358 | return (f_ & flagConnectionUpgrade) != 0; | |
359 | } | |
360 | ||
361 | /** Returns `true` if keep-alive is specified | |
362 | ||
363 | @note Only valid after parsing a complete header. | |
364 | */ | |
365 | bool | |
366 | is_keep_alive() const; | |
367 | ||
368 | /** Returns `true` if the chunked Transfer-Encoding is specified. | |
369 | ||
370 | @note Only valid after parsing a complete header. | |
371 | */ | |
372 | bool | |
373 | is_chunked() const | |
374 | { | |
375 | return (f_ & flagChunked) != 0; | |
376 | } | |
377 | ||
378 | /** Write part of a buffer sequence to the parser. | |
379 | ||
380 | This function attempts to parse the HTTP message | |
381 | stored in the caller provided buffers. Upon success, | |
382 | a positive return value indicates that the parser | |
383 | made forward progress, consuming that number of | |
384 | bytes. | |
385 | ||
386 | A return value of zero indicates that the parser | |
387 | requires additional input. In this case the caller | |
388 | should append additional bytes to the input buffer | |
389 | sequence and call @ref write again. | |
390 | ||
391 | @param buffers An object meeting the requirements of | |
392 | @b ConstBufferSequence that represents the message. | |
393 | ||
394 | @param ec Set to the error, if any occurred. | |
395 | ||
396 | @return The number of bytes consumed in the buffer | |
397 | sequence. | |
398 | */ | |
399 | template<class ConstBufferSequence> | |
400 | std::size_t | |
401 | write(ConstBufferSequence const& buffers, error_code& ec); | |
402 | ||
403 | #if ! BEAST_DOXYGEN | |
404 | std::size_t | |
405 | write(boost::asio::const_buffers_1 const& buffer, | |
406 | error_code& ec); | |
407 | #endif | |
408 | ||
409 | /** Inform the parser that the end of stream was reached. | |
410 | ||
411 | In certain cases, HTTP needs to know where the end of | |
412 | the stream is. For example, sometimes servers send | |
413 | responses without Content-Length and expect the client | |
414 | to consume input (for the body) until EOF. Callbacks | |
415 | and errors will still be processed as usual. | |
416 | ||
417 | This is typically called when a read from the | |
418 | underlying stream object sets the error code to | |
419 | `boost::asio::error::eof`. | |
420 | ||
421 | @note Only valid after parsing a complete header. | |
422 | ||
423 | @param ec Set to the error, if any occurred. | |
424 | */ | |
425 | void | |
426 | write_eof(error_code& ec); | |
427 | ||
428 | /** Returns the number of bytes remaining in the body or chunk. | |
429 | ||
430 | If a Content-Length is specified and the parser state | |
431 | is equal to @ref beast::http::parse_state::body, this will return | |
432 | the number of bytes remaining in the body. If the | |
433 | chunked Transfer-Encoding is indicated and the parser | |
434 | state is equal to @ref beast::http::parse_state::chunk_body, this | |
435 | will return the number of bytes remaining in the chunk. | |
436 | Otherwise, the function behavior is undefined. | |
437 | */ | |
438 | std::uint64_t | |
439 | size() const | |
440 | { | |
441 | BOOST_ASSERT( | |
442 | state_ == parse_state::body || | |
443 | state_ == parse_state::chunk_body); | |
444 | return len_; | |
445 | } | |
446 | ||
447 | /** Returns the body data parsed in the last call to @ref write. | |
448 | ||
449 | This buffer is invalidated after any call to @ref write | |
450 | or @ref write_eof. | |
451 | ||
452 | @note If the last call to @ref write came from the input | |
453 | area of a @b DynamicBuffer object, a call to the dynamic | |
454 | buffer's `consume` function may invalidate this return | |
455 | value. | |
456 | */ | |
457 | boost::string_ref const& | |
458 | body() const | |
459 | { | |
460 | // This function not available when isDirect==true | |
461 | static_assert(! isDirect, ""); | |
462 | return body_; | |
463 | } | |
464 | ||
465 | /** Returns the chunk extension parsed in the last call to @ref write. | |
466 | ||
467 | This buffer is invalidated after any call to @ref write | |
468 | or @ref write_eof. | |
469 | ||
470 | @note If the last call to @ref write came from the input | |
471 | area of a @b DynamicBuffer object, a call to the dynamic | |
472 | buffer's `consume` function may invalidate this return | |
473 | value. | |
474 | */ | |
475 | boost::string_ref const& | |
476 | chunk_extension() const | |
477 | { | |
478 | // This function not available when isDirect==true | |
479 | static_assert(! isDirect, ""); | |
480 | return ext_; | |
481 | } | |
482 | ||
483 | /** Returns the optional value of Content-Length if known. | |
484 | ||
485 | @note The return value is undefined unless a complete | |
486 | header has been parsed. | |
487 | */ | |
488 | boost::optional<std::uint64_t> | |
489 | content_length() const | |
490 | { | |
491 | BOOST_ASSERT(got_header()); | |
492 | if(! (f_ & flagContentLength)) | |
493 | return boost::none; | |
494 | return len_; | |
495 | } | |
496 | ||
497 | /** Copy leftover body data from the dynamic buffer. | |
498 | ||
499 | @note This member function is only available when | |
500 | `isDirect==true`. | |
501 | ||
502 | @return The number of bytes processed from the dynamic | |
503 | buffer. The caller should remove these bytes by calling | |
504 | `consume` on the buffer. | |
505 | */ | |
506 | template<class DynamicBuffer> | |
507 | std::size_t | |
508 | copy_body(DynamicBuffer& dynabuf); | |
509 | ||
510 | /** Returns a set of buffers for storing body data. | |
511 | ||
512 | @note This member function is only available when | |
513 | `isDirect==true`. | |
514 | ||
515 | @param limit The maximum number of bytes in the | |
516 | size of the returned buffer sequence. The actual size | |
517 | of the buffer sequence may be lower than this number. | |
518 | */ | |
519 | template<class MutableBufferSequence> | |
520 | void | |
521 | prepare_body(boost::optional< | |
522 | MutableBufferSequence>& buffers, std::size_t limit); | |
523 | ||
524 | /** Commit body data. | |
525 | ||
526 | @note This member function is only available when | |
527 | `isDirect==true`. | |
528 | */ | |
529 | void | |
530 | commit_body(std::size_t n); | |
531 | ||
532 | /** Indicate that body octets have been consumed. | |
533 | */ | |
534 | void | |
535 | consume(std::size_t n) | |
536 | { | |
537 | BOOST_ASSERT(n <= len_); | |
538 | BOOST_ASSERT( | |
539 | state_ == parse_state::body || | |
540 | state_ == parse_state::chunk_body); | |
541 | len_ -= n; | |
542 | if(len_ == 0) | |
543 | { | |
544 | if(state_ == parse_state::body) | |
545 | state_ = parse_state::complete; | |
546 | else | |
547 | state_ = parse_state::chunk_header; | |
548 | } | |
549 | } | |
550 | ||
551 | /** Consume all remaining body data. | |
552 | ||
553 | This function instructs the parser to advance the | |
554 | state past any expected body octets. Callers who | |
555 | wish to read and process the body themselves will | |
556 | call this function. | |
557 | */ | |
558 | void | |
559 | consume_body(error_code& ec); | |
560 | ||
561 | private: | |
562 | inline | |
563 | Derived& | |
564 | impl() | |
565 | { | |
566 | return *static_cast<Derived*>(this); | |
567 | } | |
568 | ||
569 | template<class ConstBufferSequence> | |
570 | boost::string_ref | |
571 | maybe_flatten( | |
572 | ConstBufferSequence const& buffers); | |
573 | ||
574 | std::size_t | |
575 | do_write(boost::asio::const_buffers_1 const& buffer, | |
576 | error_code& ec, std::true_type); | |
577 | ||
578 | std::size_t | |
579 | do_write(boost::asio::const_buffers_1 const& buffer, | |
580 | error_code& ec, std::false_type); | |
581 | ||
582 | void | |
583 | parse_startline(char const*& it, | |
584 | int& version, int& status, | |
585 | error_code& ec, std::true_type); | |
586 | ||
587 | void | |
588 | parse_startline(char const*& it, | |
589 | int& version, int& status, | |
590 | error_code& ec, std::false_type); | |
591 | ||
592 | void | |
593 | parse_fields(char const*& it, | |
594 | char const* last, error_code& ec); | |
595 | ||
596 | void | |
597 | do_field( | |
598 | boost::string_ref const& name, | |
599 | boost::string_ref const& value, | |
600 | error_code& ec); | |
601 | ||
602 | std::size_t | |
603 | parse_header(char const* p, | |
604 | std::size_t n, error_code& ec); | |
605 | ||
606 | void | |
607 | do_header(int, std::true_type); | |
608 | ||
609 | void | |
610 | do_header(int status, std::false_type); | |
611 | ||
612 | void | |
613 | maybe_do_body_direct(); | |
614 | ||
615 | void | |
616 | maybe_do_body_indirect(error_code& ec); | |
617 | ||
618 | std::size_t | |
619 | parse_chunk_header(char const* p, | |
620 | std::size_t n, error_code& ec); | |
621 | ||
622 | std::size_t | |
623 | parse_body(char const* p, | |
624 | std::size_t n, error_code& ec); | |
625 | ||
626 | std::size_t | |
627 | parse_body_to_eof(char const* p, | |
628 | std::size_t n, error_code& ec); | |
629 | ||
630 | std::size_t | |
631 | parse_chunk_body(char const* p, | |
632 | std::size_t n, error_code& ec); | |
633 | ||
634 | void | |
635 | do_complete(error_code& ec); | |
636 | }; | |
637 | ||
638 | } // http | |
639 | } // beast | |
640 | ||
641 | #include <beast/http/impl/basic_parser.ipp> | |
642 | ||
643 | #endif |