1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "http_parser.h"
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
45 # define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
54 #define SET_ERRNO(e) \
56 parser->http_errno = (e); \
59 #define CURRENT_STATE() p_state
60 #define UPDATE_STATE(V) p_state = (enum state) (V);
63 parser->state = CURRENT_STATE(); \
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
89 UPDATE_STATE(parser->state); \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
116 UPDATE_STATE(parser->state); \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
135 /* Set the mark FOR; non-destructive if mark is already set */
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
154 #define COUNT_HEADER_SIZE(V) \
156 parser->nread += (V); \
157 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
174 static const char *method_strings
[] =
176 #define XX(num, name, string) #string,
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
189 static const char tokens
[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 0, '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
224 static const int8_t unhex
[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
236 #if HTTP_PARSER_STRICT
243 static const uint8_t normal_url_char
[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
280 { s_dead
= 1 /* important that this is > 0 */
289 , s_res_first_http_major
291 , s_res_first_http_minor
293 , s_res_first_status_code
297 , s_res_line_almost_done
302 , s_req_spaces_before_url
305 , s_req_schema_slash_slash
308 , s_req_server_with_at
310 , s_req_query_string_start
312 , s_req_fragment_start
319 , s_req_first_http_major
321 , s_req_first_http_minor
323 , s_req_line_almost_done
325 , s_header_field_start
327 , s_header_value_discard_ws
328 , s_header_value_discard_ws_almost_done
329 , s_header_value_discard_lws
330 , s_header_value_start
334 , s_header_almost_done
339 , s_chunk_size_almost_done
341 , s_headers_almost_done
344 /* Important: 's_headers_done' must be the last 'header' state. All
345 * states beyond this must be 'body' states. It is used for overflow
346 * checking. See the PARSING_HEADER() macro.
350 , s_chunk_data_almost_done
354 , s_body_identity_eof
360 #define PARSING_HEADER(state) (state <= s_headers_done)
369 , h_matching_connection
370 , h_matching_proxy_connection
371 , h_matching_content_length
372 , h_matching_transfer_encoding
377 , h_transfer_encoding
380 , h_matching_transfer_encoding_chunked
381 , h_matching_connection_token_start
382 , h_matching_connection_keep_alive
383 , h_matching_connection_close
384 , h_matching_connection_upgrade
385 , h_matching_connection_token
387 , h_transfer_encoding_chunked
388 , h_connection_keep_alive
390 , h_connection_upgrade
396 , s_http_userinfo_start
399 , s_http_host_v6_start
403 , s_http_host_v6_zone_start
404 , s_http_host_v6_zone
405 , s_http_host_port_start
409 /* Macros for character classes; depends on strict-mode */
412 #define LOWER(c) (unsigned char)(c | 0x20)
413 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
415 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
416 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
418 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
420 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422 (c) == '$' || (c) == ',')
424 #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
426 #if HTTP_PARSER_STRICT
427 #define TOKEN(c) (tokens[(unsigned char)c])
428 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
429 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
431 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
432 #define IS_URL_CHAR(c) \
433 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434 #define IS_HOST_CHAR(c) \
435 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
439 * Verify that a char is a valid visible (printable) US-ASCII
440 * character or %x80-FF
442 #define IS_HEADER_CHAR(ch) \
443 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
445 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
448 #if HTTP_PARSER_STRICT
449 # define STRICT_CHECK(cond) \
452 SET_ERRNO(HPE_STRICT); \
456 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
458 # define STRICT_CHECK(cond)
459 # define NEW_MESSAGE() start_state
463 /* Map errno values to strings for human-readable output */
464 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
467 const char *description
;
468 } http_strerror_tab
[] = {
469 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN
)
471 #undef HTTP_STRERROR_GEN
473 int http_message_needs_eof(const http_parser
*parser
);
477 * This is designed to be shared by http_parser_execute() for URL validation,
478 * hence it has a state transition + byte-for-byte interface. In addition, it
479 * is meant to be embedded in http_parser_parse_url(), which does the dirty
480 * work of turning state transitions URL components for its API.
482 * This function should only be invoked with non-space characters. It is
483 * assumed that the caller cares about (and can detect) the transition between
484 * URL and non-URL states by looking for these.
487 parse_url_char(enum state s
, const char ch
)
489 if (ch
== ' ' || ch
== '\r' || ch
== '\n') {
493 #if HTTP_PARSER_STRICT
494 if (ch
== '\t' || ch
== '\f') {
500 case s_req_spaces_before_url
:
501 /* Proxied requests are followed by scheme of an absolute URI (alpha).
502 * All methods except CONNECT are followed by '/' or '*'.
505 if (ch
== '/' || ch
== '*') {
521 return s_req_schema_slash
;
526 case s_req_schema_slash
:
528 return s_req_schema_slash_slash
;
533 case s_req_schema_slash_slash
:
535 return s_req_server_start
;
540 case s_req_server_with_at
:
546 case s_req_server_start
:
553 return s_req_query_string_start
;
557 return s_req_server_with_at
;
560 if (IS_USERINFO_CHAR(ch
) || ch
== '[' || ch
== ']') {
567 if (IS_URL_CHAR(ch
)) {
573 return s_req_query_string_start
;
576 return s_req_fragment_start
;
581 case s_req_query_string_start
:
582 case s_req_query_string
:
583 if (IS_URL_CHAR(ch
)) {
584 return s_req_query_string
;
589 /* allow extra '?' in query string */
590 return s_req_query_string
;
593 return s_req_fragment_start
;
598 case s_req_fragment_start
:
599 if (IS_URL_CHAR(ch
)) {
600 return s_req_fragment
;
605 return s_req_fragment
;
614 if (IS_URL_CHAR(ch
)) {
630 /* We should never fall out of the switch above unless there's an error */
634 size_t http_parser_execute (http_parser
*parser
,
635 const http_parser_settings
*settings
,
641 const char *p
= data
;
642 const char *header_field_mark
= 0;
643 const char *header_value_mark
= 0;
644 const char *url_mark
= 0;
645 const char *body_mark
= 0;
646 const char *status_mark
= 0;
647 enum state p_state
= (enum state
) parser
->state
;
648 const unsigned int lenient
= parser
->lenient_http_headers
;
650 /* We're in an error state. Don't bother doing anything. */
651 if (HTTP_PARSER_ERRNO(parser
) != HPE_OK
) {
656 switch (CURRENT_STATE()) {
657 case s_body_identity_eof
:
658 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
661 CALLBACK_NOTIFY_NOADVANCE(message_complete
);
665 case s_start_req_or_res
:
671 SET_ERRNO(HPE_INVALID_EOF_STATE
);
677 if (CURRENT_STATE() == s_header_field
)
678 header_field_mark
= data
;
679 if (CURRENT_STATE() == s_header_value
)
680 header_value_mark
= data
;
681 switch (CURRENT_STATE()) {
684 case s_req_schema_slash
:
685 case s_req_schema_slash_slash
:
686 case s_req_server_start
:
688 case s_req_server_with_at
:
689 case s_req_query_string_start
:
690 case s_req_query_string
:
691 case s_req_fragment_start
:
702 for (p
=data
; p
!= data
+ len
; p
++) {
705 if (PARSING_HEADER(CURRENT_STATE()))
706 COUNT_HEADER_SIZE(1);
709 switch (CURRENT_STATE()) {
712 /* this state is used after a 'Connection: close' message
713 * the parser will error out if it reads another message
715 if (LIKELY(ch
== CR
|| ch
== LF
))
718 SET_ERRNO(HPE_CLOSED_CONNECTION
);
721 case s_start_req_or_res
:
723 if (ch
== CR
|| ch
== LF
)
726 parser
->content_length
= ULLONG_MAX
;
729 UPDATE_STATE(s_res_or_resp_H
);
731 CALLBACK_NOTIFY(message_begin
);
733 parser
->type
= HTTP_REQUEST
;
734 UPDATE_STATE(s_start_req
);
741 case s_res_or_resp_H
:
743 parser
->type
= HTTP_RESPONSE
;
744 UPDATE_STATE(s_res_HT
);
746 if (UNLIKELY(ch
!= 'E')) {
747 SET_ERRNO(HPE_INVALID_CONSTANT
);
751 parser
->type
= HTTP_REQUEST
;
752 parser
->method
= HTTP_HEAD
;
754 UPDATE_STATE(s_req_method
);
761 parser
->content_length
= ULLONG_MAX
;
765 UPDATE_STATE(s_res_H
);
773 SET_ERRNO(HPE_INVALID_CONSTANT
);
777 CALLBACK_NOTIFY(message_begin
);
782 STRICT_CHECK(ch
!= 'T');
783 UPDATE_STATE(s_res_HT
);
787 STRICT_CHECK(ch
!= 'T');
788 UPDATE_STATE(s_res_HTT
);
792 STRICT_CHECK(ch
!= 'P');
793 UPDATE_STATE(s_res_HTTP
);
797 STRICT_CHECK(ch
!= '/');
798 UPDATE_STATE(s_res_first_http_major
);
801 case s_res_first_http_major
:
802 if (UNLIKELY(ch
< '0' || ch
> '9')) {
803 SET_ERRNO(HPE_INVALID_VERSION
);
807 parser
->http_major
= ch
- '0';
808 UPDATE_STATE(s_res_http_major
);
811 /* major HTTP version or dot */
812 case s_res_http_major
:
815 UPDATE_STATE(s_res_first_http_minor
);
820 SET_ERRNO(HPE_INVALID_VERSION
);
824 parser
->http_major
*= 10;
825 parser
->http_major
+= ch
- '0';
827 if (UNLIKELY(parser
->http_major
> 999)) {
828 SET_ERRNO(HPE_INVALID_VERSION
);
835 /* first digit of minor HTTP version */
836 case s_res_first_http_minor
:
837 if (UNLIKELY(!IS_NUM(ch
))) {
838 SET_ERRNO(HPE_INVALID_VERSION
);
842 parser
->http_minor
= ch
- '0';
843 UPDATE_STATE(s_res_http_minor
);
846 /* minor HTTP version or end of request line */
847 case s_res_http_minor
:
850 UPDATE_STATE(s_res_first_status_code
);
854 if (UNLIKELY(!IS_NUM(ch
))) {
855 SET_ERRNO(HPE_INVALID_VERSION
);
859 parser
->http_minor
*= 10;
860 parser
->http_minor
+= ch
- '0';
862 if (UNLIKELY(parser
->http_minor
> 999)) {
863 SET_ERRNO(HPE_INVALID_VERSION
);
870 case s_res_first_status_code
:
877 SET_ERRNO(HPE_INVALID_STATUS
);
880 parser
->status_code
= ch
- '0';
881 UPDATE_STATE(s_res_status_code
);
885 case s_res_status_code
:
890 UPDATE_STATE(s_res_status_start
);
893 UPDATE_STATE(s_res_line_almost_done
);
896 UPDATE_STATE(s_header_field_start
);
899 SET_ERRNO(HPE_INVALID_STATUS
);
905 parser
->status_code
*= 10;
906 parser
->status_code
+= ch
- '0';
908 if (UNLIKELY(parser
->status_code
> 999)) {
909 SET_ERRNO(HPE_INVALID_STATUS
);
916 case s_res_status_start
:
919 UPDATE_STATE(s_res_line_almost_done
);
924 UPDATE_STATE(s_header_field_start
);
929 UPDATE_STATE(s_res_status
);
936 UPDATE_STATE(s_res_line_almost_done
);
937 CALLBACK_DATA(status
);
942 UPDATE_STATE(s_header_field_start
);
943 CALLBACK_DATA(status
);
949 case s_res_line_almost_done
:
950 STRICT_CHECK(ch
!= LF
);
951 UPDATE_STATE(s_header_field_start
);
956 if (ch
== CR
|| ch
== LF
)
959 parser
->content_length
= ULLONG_MAX
;
961 if (UNLIKELY(!IS_ALPHA(ch
))) {
962 SET_ERRNO(HPE_INVALID_METHOD
);
966 parser
->method
= (enum http_method
) 0;
969 case 'A': parser
->method
= HTTP_ACL
; break;
970 case 'B': parser
->method
= HTTP_BIND
; break;
971 case 'C': parser
->method
= HTTP_CONNECT
; /* or COPY, CHECKOUT */ break;
972 case 'D': parser
->method
= HTTP_DELETE
; break;
973 case 'G': parser
->method
= HTTP_GET
; break;
974 case 'H': parser
->method
= HTTP_HEAD
; break;
975 case 'L': parser
->method
= HTTP_LOCK
; /* or LINK */ break;
976 case 'M': parser
->method
= HTTP_MKCOL
; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
977 case 'N': parser
->method
= HTTP_NOTIFY
; break;
978 case 'O': parser
->method
= HTTP_OPTIONS
; break;
979 case 'P': parser
->method
= HTTP_POST
;
980 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
982 case 'R': parser
->method
= HTTP_REPORT
; /* or REBIND */ break;
983 case 'S': parser
->method
= HTTP_SUBSCRIBE
; /* or SEARCH */ break;
984 case 'T': parser
->method
= HTTP_TRACE
; break;
985 case 'U': parser
->method
= HTTP_UNLOCK
; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
987 SET_ERRNO(HPE_INVALID_METHOD
);
990 UPDATE_STATE(s_req_method
);
992 CALLBACK_NOTIFY(message_begin
);
1000 if (UNLIKELY(ch
== '\0')) {
1001 SET_ERRNO(HPE_INVALID_METHOD
);
1005 matcher
= method_strings
[parser
->method
];
1006 if (ch
== ' ' && matcher
[parser
->index
] == '\0') {
1007 UPDATE_STATE(s_req_spaces_before_url
);
1008 } else if (ch
== matcher
[parser
->index
]) {
1010 } else if (IS_ALPHA(ch
)) {
1012 switch (parser
->method
<< 16 | parser
->index
<< 8 | ch
) {
1013 #define XX(meth, pos, ch, new_meth) \
1014 case (HTTP_##meth << 16 | pos << 8 | ch): \
1015 parser->method = HTTP_##new_meth; break;
1017 XX(POST
, 1, 'U', PUT
)
1018 XX(POST
, 1, 'A', PATCH
)
1019 XX(CONNECT
, 1, 'H', CHECKOUT
)
1020 XX(CONNECT
, 2, 'P', COPY
)
1021 XX(MKCOL
, 1, 'O', MOVE
)
1022 XX(MKCOL
, 1, 'E', MERGE
)
1023 XX(MKCOL
, 2, 'A', MKACTIVITY
)
1024 XX(MKCOL
, 3, 'A', MKCALENDAR
)
1025 XX(SUBSCRIBE
, 1, 'E', SEARCH
)
1026 XX(REPORT
, 2, 'B', REBIND
)
1027 XX(POST
, 1, 'R', PROPFIND
)
1028 XX(PROPFIND
, 4, 'P', PROPPATCH
)
1029 XX(PUT
, 2, 'R', PURGE
)
1030 XX(LOCK
, 1, 'I', LINK
)
1031 XX(UNLOCK
, 2, 'S', UNSUBSCRIBE
)
1032 XX(UNLOCK
, 2, 'B', UNBIND
)
1033 XX(UNLOCK
, 3, 'I', UNLINK
)
1037 SET_ERRNO(HPE_INVALID_METHOD
);
1040 } else if (ch
== '-' &&
1041 parser
->index
== 1 &&
1042 parser
->method
== HTTP_MKCOL
) {
1043 parser
->method
= HTTP_MSEARCH
;
1045 SET_ERRNO(HPE_INVALID_METHOD
);
1053 case s_req_spaces_before_url
:
1055 if (ch
== ' ') break;
1058 if (parser
->method
== HTTP_CONNECT
) {
1059 UPDATE_STATE(s_req_server_start
);
1062 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch
));
1063 if (UNLIKELY(CURRENT_STATE() == s_dead
)) {
1064 SET_ERRNO(HPE_INVALID_URL
);
1072 case s_req_schema_slash
:
1073 case s_req_schema_slash_slash
:
1074 case s_req_server_start
:
1077 /* No whitespace allowed here */
1081 SET_ERRNO(HPE_INVALID_URL
);
1084 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch
));
1085 if (UNLIKELY(CURRENT_STATE() == s_dead
)) {
1086 SET_ERRNO(HPE_INVALID_URL
);
1095 case s_req_server_with_at
:
1097 case s_req_query_string_start
:
1098 case s_req_query_string
:
1099 case s_req_fragment_start
:
1100 case s_req_fragment
:
1104 UPDATE_STATE(s_req_http_start
);
1109 parser
->http_major
= 0;
1110 parser
->http_minor
= 9;
1111 UPDATE_STATE((ch
== CR
) ?
1112 s_req_line_almost_done
:
1113 s_header_field_start
);
1117 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch
));
1118 if (UNLIKELY(CURRENT_STATE() == s_dead
)) {
1119 SET_ERRNO(HPE_INVALID_URL
);
1126 case s_req_http_start
:
1129 UPDATE_STATE(s_req_http_H
);
1134 SET_ERRNO(HPE_INVALID_CONSTANT
);
1140 STRICT_CHECK(ch
!= 'T');
1141 UPDATE_STATE(s_req_http_HT
);
1145 STRICT_CHECK(ch
!= 'T');
1146 UPDATE_STATE(s_req_http_HTT
);
1149 case s_req_http_HTT
:
1150 STRICT_CHECK(ch
!= 'P');
1151 UPDATE_STATE(s_req_http_HTTP
);
1154 case s_req_http_HTTP
:
1155 STRICT_CHECK(ch
!= '/');
1156 UPDATE_STATE(s_req_first_http_major
);
1159 /* first digit of major HTTP version */
1160 case s_req_first_http_major
:
1161 if (UNLIKELY(ch
< '1' || ch
> '9')) {
1162 SET_ERRNO(HPE_INVALID_VERSION
);
1166 parser
->http_major
= ch
- '0';
1167 UPDATE_STATE(s_req_http_major
);
1170 /* major HTTP version or dot */
1171 case s_req_http_major
:
1174 UPDATE_STATE(s_req_first_http_minor
);
1178 if (UNLIKELY(!IS_NUM(ch
))) {
1179 SET_ERRNO(HPE_INVALID_VERSION
);
1183 parser
->http_major
*= 10;
1184 parser
->http_major
+= ch
- '0';
1186 if (UNLIKELY(parser
->http_major
> 999)) {
1187 SET_ERRNO(HPE_INVALID_VERSION
);
1194 /* first digit of minor HTTP version */
1195 case s_req_first_http_minor
:
1196 if (UNLIKELY(!IS_NUM(ch
))) {
1197 SET_ERRNO(HPE_INVALID_VERSION
);
1201 parser
->http_minor
= ch
- '0';
1202 UPDATE_STATE(s_req_http_minor
);
1205 /* minor HTTP version or end of request line */
1206 case s_req_http_minor
:
1209 UPDATE_STATE(s_req_line_almost_done
);
1214 UPDATE_STATE(s_header_field_start
);
1218 /* XXX allow spaces after digit? */
1220 if (UNLIKELY(!IS_NUM(ch
))) {
1221 SET_ERRNO(HPE_INVALID_VERSION
);
1225 parser
->http_minor
*= 10;
1226 parser
->http_minor
+= ch
- '0';
1228 if (UNLIKELY(parser
->http_minor
> 999)) {
1229 SET_ERRNO(HPE_INVALID_VERSION
);
1236 /* end of request line */
1237 case s_req_line_almost_done
:
1239 if (UNLIKELY(ch
!= LF
)) {
1240 SET_ERRNO(HPE_LF_EXPECTED
);
1244 UPDATE_STATE(s_header_field_start
);
1248 case s_header_field_start
:
1251 UPDATE_STATE(s_headers_almost_done
);
1256 /* they might be just sending \n instead of \r\n so this would be
1257 * the second \n to denote the end of headers*/
1258 UPDATE_STATE(s_headers_almost_done
);
1265 SET_ERRNO(HPE_INVALID_HEADER_TOKEN
);
1272 UPDATE_STATE(s_header_field
);
1276 parser
->header_state
= h_C
;
1280 parser
->header_state
= h_matching_proxy_connection
;
1284 parser
->header_state
= h_matching_transfer_encoding
;
1288 parser
->header_state
= h_matching_upgrade
;
1292 parser
->header_state
= h_general
;
1298 case s_header_field
:
1300 const char* start
= p
;
1301 for (; p
!= data
+ len
; p
++) {
1308 switch (parser
->header_state
) {
1314 parser
->header_state
= (c
== 'o' ? h_CO
: h_general
);
1319 parser
->header_state
= (c
== 'n' ? h_CON
: h_general
);
1326 parser
->header_state
= h_matching_connection
;
1329 parser
->header_state
= h_matching_content_length
;
1332 parser
->header_state
= h_general
;
1339 case h_matching_connection
:
1341 if (parser
->index
> sizeof(CONNECTION
)-1
1342 || c
!= CONNECTION
[parser
->index
]) {
1343 parser
->header_state
= h_general
;
1344 } else if (parser
->index
== sizeof(CONNECTION
)-2) {
1345 parser
->header_state
= h_connection
;
1349 /* proxy-connection */
1351 case h_matching_proxy_connection
:
1353 if (parser
->index
> sizeof(PROXY_CONNECTION
)-1
1354 || c
!= PROXY_CONNECTION
[parser
->index
]) {
1355 parser
->header_state
= h_general
;
1356 } else if (parser
->index
== sizeof(PROXY_CONNECTION
)-2) {
1357 parser
->header_state
= h_connection
;
1361 /* content-length */
1363 case h_matching_content_length
:
1365 if (parser
->index
> sizeof(CONTENT_LENGTH
)-1
1366 || c
!= CONTENT_LENGTH
[parser
->index
]) {
1367 parser
->header_state
= h_general
;
1368 } else if (parser
->index
== sizeof(CONTENT_LENGTH
)-2) {
1369 if (parser
->flags
& F_CONTENTLENGTH
) {
1370 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH
);
1373 parser
->header_state
= h_content_length
;
1374 parser
->flags
|= F_CONTENTLENGTH
;
1378 /* transfer-encoding */
1380 case h_matching_transfer_encoding
:
1382 if (parser
->index
> sizeof(TRANSFER_ENCODING
)-1
1383 || c
!= TRANSFER_ENCODING
[parser
->index
]) {
1384 parser
->header_state
= h_general
;
1385 } else if (parser
->index
== sizeof(TRANSFER_ENCODING
)-2) {
1386 parser
->header_state
= h_transfer_encoding
;
1392 case h_matching_upgrade
:
1394 if (parser
->index
> sizeof(UPGRADE
)-1
1395 || c
!= UPGRADE
[parser
->index
]) {
1396 parser
->header_state
= h_general
;
1397 } else if (parser
->index
== sizeof(UPGRADE
)-2) {
1398 parser
->header_state
= h_upgrade
;
1403 case h_content_length
:
1404 case h_transfer_encoding
:
1406 if (ch
!= ' ') parser
->header_state
= h_general
;
1410 assert(0 && "Unknown header_state");
1415 COUNT_HEADER_SIZE(p
- start
);
1417 if (p
== data
+ len
) {
1423 UPDATE_STATE(s_header_value_discard_ws
);
1424 CALLBACK_DATA(header_field
);
1428 SET_ERRNO(HPE_INVALID_HEADER_TOKEN
);
1432 case s_header_value_discard_ws
:
1433 if (ch
== ' ' || ch
== '\t') break;
1436 UPDATE_STATE(s_header_value_discard_ws_almost_done
);
1441 UPDATE_STATE(s_header_value_discard_lws
);
1447 case s_header_value_start
:
1451 UPDATE_STATE(s_header_value
);
1456 switch (parser
->header_state
) {
1458 parser
->flags
|= F_UPGRADE
;
1459 parser
->header_state
= h_general
;
1462 case h_transfer_encoding
:
1463 /* looking for 'Transfer-Encoding: chunked' */
1465 parser
->header_state
= h_matching_transfer_encoding_chunked
;
1467 parser
->header_state
= h_general
;
1471 case h_content_length
:
1472 if (UNLIKELY(!IS_NUM(ch
))) {
1473 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH
);
1477 parser
->content_length
= ch
- '0';
1481 /* looking for 'Connection: keep-alive' */
1483 parser
->header_state
= h_matching_connection_keep_alive
;
1484 /* looking for 'Connection: close' */
1485 } else if (c
== 'c') {
1486 parser
->header_state
= h_matching_connection_close
;
1487 } else if (c
== 'u') {
1488 parser
->header_state
= h_matching_connection_upgrade
;
1490 parser
->header_state
= h_matching_connection_token
;
1494 /* Multi-value `Connection` header */
1495 case h_matching_connection_token_start
:
1499 parser
->header_state
= h_general
;
1505 case s_header_value
:
1507 const char* start
= p
;
1508 enum header_states h_state
= (enum header_states
) parser
->header_state
;
1509 for (; p
!= data
+ len
; p
++) {
1512 UPDATE_STATE(s_header_almost_done
);
1513 parser
->header_state
= h_state
;
1514 CALLBACK_DATA(header_value
);
1519 UPDATE_STATE(s_header_almost_done
);
1520 COUNT_HEADER_SIZE(p
- start
);
1521 parser
->header_state
= h_state
;
1522 CALLBACK_DATA_NOADVANCE(header_value
);
1526 if (!lenient
&& !IS_HEADER_CHAR(ch
)) {
1527 SET_ERRNO(HPE_INVALID_HEADER_TOKEN
);
1538 size_t limit
= data
+ len
- p
;
1540 limit
= MIN(limit
, HTTP_MAX_HEADER_SIZE
);
1542 p_cr
= (const char*) memchr(p
, CR
, limit
);
1543 p_lf
= (const char*) memchr(p
, LF
, limit
);
1545 if (p_lf
!= NULL
&& p_cr
>= p_lf
)
1549 } else if (UNLIKELY(p_lf
!= NULL
)) {
1560 case h_transfer_encoding
:
1561 assert(0 && "Shouldn't get here.");
1564 case h_content_length
:
1568 if (ch
== ' ') break;
1570 if (UNLIKELY(!IS_NUM(ch
))) {
1571 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH
);
1572 parser
->header_state
= h_state
;
1576 t
= parser
->content_length
;
1580 /* Overflow? Test against a conservative limit for simplicity. */
1581 if (UNLIKELY((ULLONG_MAX
- 10) / 10 < parser
->content_length
)) {
1582 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH
);
1583 parser
->header_state
= h_state
;
1587 parser
->content_length
= t
;
1591 /* Transfer-Encoding: chunked */
1592 case h_matching_transfer_encoding_chunked
:
1594 if (parser
->index
> sizeof(CHUNKED
)-1
1595 || c
!= CHUNKED
[parser
->index
]) {
1596 h_state
= h_general
;
1597 } else if (parser
->index
== sizeof(CHUNKED
)-2) {
1598 h_state
= h_transfer_encoding_chunked
;
1602 case h_matching_connection_token_start
:
1603 /* looking for 'Connection: keep-alive' */
1605 h_state
= h_matching_connection_keep_alive
;
1606 /* looking for 'Connection: close' */
1607 } else if (c
== 'c') {
1608 h_state
= h_matching_connection_close
;
1609 } else if (c
== 'u') {
1610 h_state
= h_matching_connection_upgrade
;
1611 } else if (STRICT_TOKEN(c
)) {
1612 h_state
= h_matching_connection_token
;
1613 } else if (c
== ' ' || c
== '\t') {
1616 h_state
= h_general
;
1620 /* looking for 'Connection: keep-alive' */
1621 case h_matching_connection_keep_alive
:
1623 if (parser
->index
> sizeof(KEEP_ALIVE
)-1
1624 || c
!= KEEP_ALIVE
[parser
->index
]) {
1625 h_state
= h_matching_connection_token
;
1626 } else if (parser
->index
== sizeof(KEEP_ALIVE
)-2) {
1627 h_state
= h_connection_keep_alive
;
1631 /* looking for 'Connection: close' */
1632 case h_matching_connection_close
:
1634 if (parser
->index
> sizeof(CLOSE
)-1 || c
!= CLOSE
[parser
->index
]) {
1635 h_state
= h_matching_connection_token
;
1636 } else if (parser
->index
== sizeof(CLOSE
)-2) {
1637 h_state
= h_connection_close
;
1641 /* looking for 'Connection: upgrade' */
1642 case h_matching_connection_upgrade
:
1644 if (parser
->index
> sizeof(UPGRADE
) - 1 ||
1645 c
!= UPGRADE
[parser
->index
]) {
1646 h_state
= h_matching_connection_token
;
1647 } else if (parser
->index
== sizeof(UPGRADE
)-2) {
1648 h_state
= h_connection_upgrade
;
1652 case h_matching_connection_token
:
1654 h_state
= h_matching_connection_token_start
;
1659 case h_transfer_encoding_chunked
:
1660 if (ch
!= ' ') h_state
= h_general
;
1663 case h_connection_keep_alive
:
1664 case h_connection_close
:
1665 case h_connection_upgrade
:
1667 if (h_state
== h_connection_keep_alive
) {
1668 parser
->flags
|= F_CONNECTION_KEEP_ALIVE
;
1669 } else if (h_state
== h_connection_close
) {
1670 parser
->flags
|= F_CONNECTION_CLOSE
;
1671 } else if (h_state
== h_connection_upgrade
) {
1672 parser
->flags
|= F_CONNECTION_UPGRADE
;
1674 h_state
= h_matching_connection_token_start
;
1676 } else if (ch
!= ' ') {
1677 h_state
= h_matching_connection_token
;
1682 UPDATE_STATE(s_header_value
);
1683 h_state
= h_general
;
1687 parser
->header_state
= h_state
;
1689 COUNT_HEADER_SIZE(p
- start
);
1691 if (p
== data
+ len
)
1696 case s_header_almost_done
:
1698 if (UNLIKELY(ch
!= LF
)) {
1699 SET_ERRNO(HPE_LF_EXPECTED
);
1703 UPDATE_STATE(s_header_value_lws
);
1707 case s_header_value_lws
:
1709 if (ch
== ' ' || ch
== '\t') {
1710 UPDATE_STATE(s_header_value_start
);
1714 /* finished the header */
1715 switch (parser
->header_state
) {
1716 case h_connection_keep_alive
:
1717 parser
->flags
|= F_CONNECTION_KEEP_ALIVE
;
1719 case h_connection_close
:
1720 parser
->flags
|= F_CONNECTION_CLOSE
;
1722 case h_transfer_encoding_chunked
:
1723 parser
->flags
|= F_CHUNKED
;
1725 case h_connection_upgrade
:
1726 parser
->flags
|= F_CONNECTION_UPGRADE
;
1732 UPDATE_STATE(s_header_field_start
);
1736 case s_header_value_discard_ws_almost_done
:
1738 STRICT_CHECK(ch
!= LF
);
1739 UPDATE_STATE(s_header_value_discard_lws
);
1743 case s_header_value_discard_lws
:
1745 if (ch
== ' ' || ch
== '\t') {
1746 UPDATE_STATE(s_header_value_discard_ws
);
1749 switch (parser
->header_state
) {
1750 case h_connection_keep_alive
:
1751 parser
->flags
|= F_CONNECTION_KEEP_ALIVE
;
1753 case h_connection_close
:
1754 parser
->flags
|= F_CONNECTION_CLOSE
;
1756 case h_connection_upgrade
:
1757 parser
->flags
|= F_CONNECTION_UPGRADE
;
1759 case h_transfer_encoding_chunked
:
1760 parser
->flags
|= F_CHUNKED
;
1766 /* header value was empty */
1768 UPDATE_STATE(s_header_field_start
);
1769 CALLBACK_DATA_NOADVANCE(header_value
);
1774 case s_headers_almost_done
:
1776 STRICT_CHECK(ch
!= LF
);
1778 if (parser
->flags
& F_TRAILING
) {
1779 /* End of a chunked request */
1780 UPDATE_STATE(s_message_done
);
1781 CALLBACK_NOTIFY_NOADVANCE(chunk_complete
);
1785 /* Cannot use chunked encoding and a content-length header together
1786 per the HTTP specification. */
1787 if ((parser
->flags
& F_CHUNKED
) &&
1788 (parser
->flags
& F_CONTENTLENGTH
)) {
1789 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH
);
1793 UPDATE_STATE(s_headers_done
);
1795 /* Set this here so that on_headers_complete() callbacks can see it */
1797 ((parser
->flags
& (F_UPGRADE
| F_CONNECTION_UPGRADE
)) ==
1798 (F_UPGRADE
| F_CONNECTION_UPGRADE
) ||
1799 parser
->method
== HTTP_CONNECT
);
1801 /* Here we call the headers_complete callback. This is somewhat
1802 * different than other callbacks because if the user returns 1, we
1803 * will interpret that as saying that this message has no body. This
1804 * is needed for the annoying case of recieving a response to a HEAD
1807 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1808 * we have to simulate it by handling a change in errno below.
1810 if (settings
->on_headers_complete
) {
1811 switch (settings
->on_headers_complete(parser
)) {
1816 parser
->upgrade
= 1;
1819 parser
->flags
|= F_SKIPBODY
;
1823 SET_ERRNO(HPE_CB_headers_complete
);
1824 RETURN(p
- data
); /* Error */
1828 if (HTTP_PARSER_ERRNO(parser
) != HPE_OK
) {
1835 case s_headers_done
:
1838 STRICT_CHECK(ch
!= LF
);
1842 hasBody
= parser
->flags
& F_CHUNKED
||
1843 (parser
->content_length
> 0 && parser
->content_length
!= ULLONG_MAX
);
1844 if (parser
->upgrade
&& (parser
->method
== HTTP_CONNECT
||
1845 (parser
->flags
& F_SKIPBODY
) || !hasBody
)) {
1846 /* Exit, the rest of the message is in a different protocol. */
1847 UPDATE_STATE(NEW_MESSAGE());
1848 CALLBACK_NOTIFY(message_complete
);
1849 RETURN((p
- data
) + 1);
1852 if (parser
->flags
& F_SKIPBODY
) {
1853 UPDATE_STATE(NEW_MESSAGE());
1854 CALLBACK_NOTIFY(message_complete
);
1855 } else if (parser
->flags
& F_CHUNKED
) {
1856 /* chunked encoding - ignore Content-Length header */
1857 UPDATE_STATE(s_chunk_size_start
);
1859 if (parser
->content_length
== 0) {
1860 /* Content-Length header given but zero: Content-Length: 0\r\n */
1861 UPDATE_STATE(NEW_MESSAGE());
1862 CALLBACK_NOTIFY(message_complete
);
1863 } else if (parser
->content_length
!= ULLONG_MAX
) {
1864 /* Content-Length header given and non-zero */
1865 UPDATE_STATE(s_body_identity
);
1867 if (!http_message_needs_eof(parser
)) {
1868 /* Assume content-length 0 - read the next */
1869 UPDATE_STATE(NEW_MESSAGE());
1870 CALLBACK_NOTIFY(message_complete
);
1872 /* Read body until EOF */
1873 UPDATE_STATE(s_body_identity_eof
);
1881 case s_body_identity
:
1883 uint64_t to_read
= MIN(parser
->content_length
,
1884 (uint64_t) ((data
+ len
) - p
));
1886 assert(parser
->content_length
!= 0
1887 && parser
->content_length
!= ULLONG_MAX
);
1889 /* The difference between advancing content_length and p is because
1890 * the latter will automaticaly advance on the next loop iteration.
1891 * Further, if content_length ends up at 0, we want to see the last
1892 * byte again for our message complete callback.
1895 parser
->content_length
-= to_read
;
1898 if (parser
->content_length
== 0) {
1899 UPDATE_STATE(s_message_done
);
1901 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1903 * The alternative to doing this is to wait for the next byte to
1904 * trigger the data callback, just as in every other case. The
1905 * problem with this is that this makes it difficult for the test
1906 * harness to distinguish between complete-on-EOF and
1907 * complete-on-length. It's not clear that this distinction is
1908 * important for applications, but let's keep it for now.
1910 CALLBACK_DATA_(body
, p
- body_mark
+ 1, p
- data
);
1917 /* read until EOF */
1918 case s_body_identity_eof
:
1924 case s_message_done
:
1925 UPDATE_STATE(NEW_MESSAGE());
1926 CALLBACK_NOTIFY(message_complete
);
1927 if (parser
->upgrade
) {
1928 /* Exit, the rest of the message is in a different protocol. */
1929 RETURN((p
- data
) + 1);
1933 case s_chunk_size_start
:
1935 assert(parser
->nread
== 1);
1936 assert(parser
->flags
& F_CHUNKED
);
1938 unhex_val
= unhex
[(unsigned char)ch
];
1939 if (UNLIKELY(unhex_val
== -1)) {
1940 SET_ERRNO(HPE_INVALID_CHUNK_SIZE
);
1944 parser
->content_length
= unhex_val
;
1945 UPDATE_STATE(s_chunk_size
);
1953 assert(parser
->flags
& F_CHUNKED
);
1956 UPDATE_STATE(s_chunk_size_almost_done
);
1960 unhex_val
= unhex
[(unsigned char)ch
];
1962 if (unhex_val
== -1) {
1963 if (ch
== ';' || ch
== ' ') {
1964 UPDATE_STATE(s_chunk_parameters
);
1968 SET_ERRNO(HPE_INVALID_CHUNK_SIZE
);
1972 t
= parser
->content_length
;
1976 /* Overflow? Test against a conservative limit for simplicity. */
1977 if (UNLIKELY((ULLONG_MAX
- 16) / 16 < parser
->content_length
)) {
1978 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH
);
1982 parser
->content_length
= t
;
1986 case s_chunk_parameters
:
1988 assert(parser
->flags
& F_CHUNKED
);
1989 /* just ignore this shit. TODO check for overflow */
1991 UPDATE_STATE(s_chunk_size_almost_done
);
1997 case s_chunk_size_almost_done
:
1999 assert(parser
->flags
& F_CHUNKED
);
2000 STRICT_CHECK(ch
!= LF
);
2004 if (parser
->content_length
== 0) {
2005 parser
->flags
|= F_TRAILING
;
2006 UPDATE_STATE(s_header_field_start
);
2008 UPDATE_STATE(s_chunk_data
);
2010 CALLBACK_NOTIFY(chunk_header
);
2016 uint64_t to_read
= MIN(parser
->content_length
,
2017 (uint64_t) ((data
+ len
) - p
));
2019 assert(parser
->flags
& F_CHUNKED
);
2020 assert(parser
->content_length
!= 0
2021 && parser
->content_length
!= ULLONG_MAX
);
2023 /* See the explanation in s_body_identity for why the content
2024 * length and data pointers are managed this way.
2027 parser
->content_length
-= to_read
;
2030 if (parser
->content_length
== 0) {
2031 UPDATE_STATE(s_chunk_data_almost_done
);
2037 case s_chunk_data_almost_done
:
2038 assert(parser
->flags
& F_CHUNKED
);
2039 assert(parser
->content_length
== 0);
2040 STRICT_CHECK(ch
!= CR
);
2041 UPDATE_STATE(s_chunk_data_done
);
2042 CALLBACK_DATA(body
);
2045 case s_chunk_data_done
:
2046 assert(parser
->flags
& F_CHUNKED
);
2047 STRICT_CHECK(ch
!= LF
);
2049 UPDATE_STATE(s_chunk_size_start
);
2050 CALLBACK_NOTIFY(chunk_complete
);
2054 assert(0 && "unhandled state");
2055 SET_ERRNO(HPE_INVALID_INTERNAL_STATE
);
2060 /* Run callbacks for any marks that we have leftover after we ran our of
2061 * bytes. There should be at most one of these set, so it's OK to invoke
2062 * them in series (unset marks will not result in callbacks).
2064 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2065 * overflowed 'data' and this allows us to correct for the off-by-one that
2066 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2067 * value that's in-bounds).
2070 assert(((header_field_mark
? 1 : 0) +
2071 (header_value_mark
? 1 : 0) +
2072 (url_mark
? 1 : 0) +
2073 (body_mark
? 1 : 0) +
2074 (status_mark
? 1 : 0)) <= 1);
2076 CALLBACK_DATA_NOADVANCE(header_field
);
2077 CALLBACK_DATA_NOADVANCE(header_value
);
2078 CALLBACK_DATA_NOADVANCE(url
);
2079 CALLBACK_DATA_NOADVANCE(body
);
2080 CALLBACK_DATA_NOADVANCE(status
);
2085 if (HTTP_PARSER_ERRNO(parser
) == HPE_OK
) {
2086 SET_ERRNO(HPE_UNKNOWN
);
2093 /* Does the parser need to see an EOF to find the end of the message? */
2095 http_message_needs_eof (const http_parser
*parser
)
2097 if (parser
->type
== HTTP_REQUEST
) {
2101 /* See RFC 2616 section 4.4 */
2102 if (parser
->status_code
/ 100 == 1 || /* 1xx e.g. Continue */
2103 parser
->status_code
== 204 || /* No Content */
2104 parser
->status_code
== 304 || /* Not Modified */
2105 parser
->flags
& F_SKIPBODY
) { /* response to a HEAD request */
2109 if ((parser
->flags
& F_CHUNKED
) || parser
->content_length
!= ULLONG_MAX
) {
2118 http_should_keep_alive (const http_parser
*parser
)
2120 if (parser
->http_major
> 0 && parser
->http_minor
> 0) {
2122 if (parser
->flags
& F_CONNECTION_CLOSE
) {
2126 /* HTTP/1.0 or earlier */
2127 if (!(parser
->flags
& F_CONNECTION_KEEP_ALIVE
)) {
2132 return !http_message_needs_eof(parser
);
2137 http_method_str (enum http_method m
)
2139 return ELEM_AT(method_strings
, m
, "<unknown>");
2144 http_parser_init (http_parser
*parser
, enum http_parser_type t
)
2146 void *data
= parser
->data
; /* preserve application data */
2147 memset(parser
, 0, sizeof(*parser
));
2148 parser
->data
= data
;
2150 parser
->state
= (t
== HTTP_REQUEST
? s_start_req
: (t
== HTTP_RESPONSE
? s_start_res
: s_start_req_or_res
));
2151 parser
->http_errno
= HPE_OK
;
2155 http_parser_settings_init(http_parser_settings
*settings
)
2157 memset(settings
, 0, sizeof(*settings
));
2161 http_errno_name(enum http_errno err
) {
2162 assert(((size_t) err
) < ARRAY_SIZE(http_strerror_tab
));
2163 return http_strerror_tab
[err
].name
;
2167 http_errno_description(enum http_errno err
) {
2168 assert(((size_t) err
) < ARRAY_SIZE(http_strerror_tab
));
2169 return http_strerror_tab
[err
].description
;
2172 static enum http_host_state
2173 http_parse_host_char(enum http_host_state s
, const char ch
) {
2175 case s_http_userinfo
:
2176 case s_http_userinfo_start
:
2178 return s_http_host_start
;
2181 if (IS_USERINFO_CHAR(ch
)) {
2182 return s_http_userinfo
;
2186 case s_http_host_start
:
2188 return s_http_host_v6_start
;
2191 if (IS_HOST_CHAR(ch
)) {
2198 if (IS_HOST_CHAR(ch
)) {
2203 case s_http_host_v6_end
:
2205 return s_http_host_port_start
;
2210 case s_http_host_v6
:
2212 return s_http_host_v6_end
;
2216 case s_http_host_v6_start
:
2217 if (IS_HEX(ch
) || ch
== ':' || ch
== '.') {
2218 return s_http_host_v6
;
2221 if (s
== s_http_host_v6
&& ch
== '%') {
2222 return s_http_host_v6_zone_start
;
2226 case s_http_host_v6_zone
:
2228 return s_http_host_v6_end
;
2232 case s_http_host_v6_zone_start
:
2233 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2234 if (IS_ALPHANUM(ch
) || ch
== '%' || ch
== '.' || ch
== '-' || ch
== '_' ||
2236 return s_http_host_v6_zone
;
2240 case s_http_host_port
:
2241 case s_http_host_port_start
:
2243 return s_http_host_port
;
2251 return s_http_host_dead
;
2255 http_parse_host(const char * buf
, struct http_parser_url
*u
, int found_at
) {
2256 assert(u
->field_set
& (1 << UF_HOST
));
2257 enum http_host_state s
;
2260 size_t buflen
= u
->field_data
[UF_HOST
].off
+ u
->field_data
[UF_HOST
].len
;
2262 u
->field_data
[UF_HOST
].len
= 0;
2264 s
= found_at
? s_http_userinfo_start
: s_http_host_start
;
2266 for (p
= buf
+ u
->field_data
[UF_HOST
].off
; p
< buf
+ buflen
; p
++) {
2267 enum http_host_state new_s
= http_parse_host_char(s
, *p
);
2269 if (new_s
== s_http_host_dead
) {
2275 if (s
!= s_http_host
) {
2276 u
->field_data
[UF_HOST
].off
= p
- buf
;
2278 u
->field_data
[UF_HOST
].len
++;
2281 case s_http_host_v6
:
2282 if (s
!= s_http_host_v6
) {
2283 u
->field_data
[UF_HOST
].off
= p
- buf
;
2285 u
->field_data
[UF_HOST
].len
++;
2288 case s_http_host_v6_zone_start
:
2289 case s_http_host_v6_zone
:
2290 u
->field_data
[UF_HOST
].len
++;
2293 case s_http_host_port
:
2294 if (s
!= s_http_host_port
) {
2295 u
->field_data
[UF_PORT
].off
= p
- buf
;
2296 u
->field_data
[UF_PORT
].len
= 0;
2297 u
->field_set
|= (1 << UF_PORT
);
2299 u
->field_data
[UF_PORT
].len
++;
2302 case s_http_userinfo
:
2303 if (s
!= s_http_userinfo
) {
2304 u
->field_data
[UF_USERINFO
].off
= p
- buf
;
2305 u
->field_data
[UF_USERINFO
].len
= 0;
2306 u
->field_set
|= (1 << UF_USERINFO
);
2308 u
->field_data
[UF_USERINFO
].len
++;
2317 /* Make sure we don't end somewhere unexpected */
2319 case s_http_host_start
:
2320 case s_http_host_v6_start
:
2321 case s_http_host_v6
:
2322 case s_http_host_v6_zone_start
:
2323 case s_http_host_v6_zone
:
2324 case s_http_host_port_start
:
2325 case s_http_userinfo
:
2326 case s_http_userinfo_start
:
2336 http_parser_url_init(struct http_parser_url
*u
) {
2337 memset(u
, 0, sizeof(*u
));
2341 http_parser_parse_url(const char *buf
, size_t buflen
, int is_connect
,
2342 struct http_parser_url
*u
)
2346 enum http_parser_url_fields uf
, old_uf
;
2349 u
->port
= u
->field_set
= 0;
2350 s
= is_connect
? s_req_server_start
: s_req_spaces_before_url
;
2353 for (p
= buf
; p
< buf
+ buflen
; p
++) {
2354 s
= parse_url_char(s
, *p
);
2356 /* Figure out the next field that we're operating on */
2361 /* Skip delimeters */
2362 case s_req_schema_slash
:
2363 case s_req_schema_slash_slash
:
2364 case s_req_server_start
:
2365 case s_req_query_string_start
:
2366 case s_req_fragment_start
:
2373 case s_req_server_with_at
:
2385 case s_req_query_string
:
2389 case s_req_fragment
:
2394 assert(!"Unexpected state");
2398 /* Nothing's changed; soldier on */
2400 u
->field_data
[uf
].len
++;
2404 u
->field_data
[uf
].off
= p
- buf
;
2405 u
->field_data
[uf
].len
= 1;
2407 u
->field_set
|= (1 << uf
);
2411 /* host must be present if there is a schema */
2412 /* parsing http:///toto will fail */
2413 if ((u
->field_set
& (1 << UF_SCHEMA
)) &&
2414 (u
->field_set
& (1 << UF_HOST
)) == 0) {
2418 if (u
->field_set
& (1 << UF_HOST
)) {
2419 if (http_parse_host(buf
, u
, found_at
) != 0) {
2424 /* CONNECT requests can only contain "hostname:port" */
2425 if (is_connect
&& u
->field_set
!= ((1 << UF_HOST
)|(1 << UF_PORT
))) {
2429 if (u
->field_set
& (1 << UF_PORT
)) {
2430 /* Don't bother with endp; we've already validated the string */
2431 unsigned long v
= strtoul(buf
+ u
->field_data
[UF_PORT
].off
, NULL
, 10);
2433 /* Ports have a max value of 2^16 */
2438 u
->port
= (uint16_t) v
;
2445 http_parser_pause(http_parser
*parser
, int paused
) {
2446 /* Users should only be pausing/unpausing a parser that is not in an error
2447 * state. In non-debug builds, there's not much that we can do about this
2448 * other than ignore it.
2450 if (HTTP_PARSER_ERRNO(parser
) == HPE_OK
||
2451 HTTP_PARSER_ERRNO(parser
) == HPE_PAUSED
) {
2452 SET_ERRNO((paused
) ? HPE_PAUSED
: HPE_OK
);
2454 assert(0 && "Attempting to pause parser in error state");
2459 http_body_is_final(const struct http_parser
*parser
) {
2460 return parser
->state
== s_message_done
;
2464 http_parser_version(void) {
2465 return HTTP_PARSER_VERSION_MAJOR
* 0x10000 |
2466 HTTP_PARSER_VERSION_MINOR
* 0x00100 |
2467 HTTP_PARSER_VERSION_PATCH
* 0x00001;