4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "json_internal.h"
39 #define V(x, y) [x] = y + 1
40 static const int8_t val
[256] = {
41 V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
42 V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
43 V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
44 V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
52 json_decode_string_escape_unicode(uint8_t **strp
, uint8_t *buf_end
, uint8_t *out
)
57 uint32_t surrogate_high
= 0;
61 assert(buf_end
> str
);
63 if (*str
++ != '\\') return SPDK_JSON_PARSE_INVALID
;
64 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
66 if (*str
++ != 'u') return SPDK_JSON_PARSE_INVALID
;
67 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
69 if ((v3
= hex_value(*str
++)) < 0) return SPDK_JSON_PARSE_INVALID
;
70 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
72 if ((v2
= hex_value(*str
++)) < 0) return SPDK_JSON_PARSE_INVALID
;
73 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
75 if ((v1
= hex_value(*str
++)) < 0) return SPDK_JSON_PARSE_INVALID
;
76 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
78 if ((v0
= hex_value(*str
++)) < 0) return SPDK_JSON_PARSE_INVALID
;
79 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
81 val
= v0
| (v1
<< 4) | (v2
<< 8) | (v3
<< 12);
84 /* We already parsed the high surrogate, so this should be the low part. */
85 if (!utf16_valid_surrogate_low(val
)) {
86 return SPDK_JSON_PARSE_INVALID
;
89 /* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */
90 val
= utf16_decode_surrogate_pair(surrogate_high
, val
);
91 } else if (utf16_valid_surrogate_high(val
)) {
95 * We parsed a \uXXXX sequence that decoded to the first half of a
96 * UTF-16 surrogate pair, so it must be immediately followed by another
99 * Loop around to get the low half of the surrogate pair.
101 if (buf_end
== str
) return SPDK_JSON_PARSE_INCOMPLETE
;
103 } else if (utf16_valid_surrogate_low(val
)) {
105 * We found the second half of surrogate pair without the first half;
106 * this is an invalid encoding.
108 return SPDK_JSON_PARSE_INVALID
;
112 * Convert Unicode escape (or surrogate pair) to UTF-8 in place.
114 * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes
115 * (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a
116 * single codepoint is 4 bytes.
119 rc
= utf8_encode_unsafe(out
, val
);
121 rc
= utf8_codepoint_len(val
);
124 return SPDK_JSON_PARSE_INVALID
;
127 *strp
= str
; /* update input pointer */
128 return rc
; /* return number of bytes decoded */
132 json_decode_string_escape_twochar(uint8_t **strp
, uint8_t *buf_end
, uint8_t *out
)
134 static const uint8_t escapes
[256] = {
144 uint8_t *str
= *strp
;
147 assert(buf_end
> str
);
148 if (buf_end
- str
< 2) {
149 return SPDK_JSON_PARSE_INCOMPLETE
;
152 assert(str
[0] == '\\');
159 *strp
+= 2; /* consumed two bytes */
160 return 1; /* produced one byte */
163 return SPDK_JSON_PARSE_INVALID
;
167 * Decode JSON string backslash escape.
168 * \param strp pointer to pointer to first character of escape (the backslash).
169 * *strp is also advanced to indicate how much input was consumed.
171 * \return Number of bytes appended to out
174 json_decode_string_escape(uint8_t **strp
, uint8_t *buf_end
, uint8_t *out
)
178 rc
= json_decode_string_escape_twochar(strp
, buf_end
, out
);
183 return json_decode_string_escape_unicode(strp
, buf_end
, out
);
187 * Decode JSON string in place.
189 * \param str_start Pointer to the beginning of the string (the opening " character).
191 * \return Number of bytes in decoded string (beginning from start).
194 json_decode_string(uint8_t *str_start
, uint8_t *buf_end
, uint8_t **str_end
, uint32_t flags
)
196 uint8_t *str
= str_start
;
197 uint8_t *out
= str_start
+ 1; /* Decode string in place (skip the initial quote) */
200 if (buf_end
- str_start
< 2) {
202 * Shortest valid string (the empty string) is two bytes (""),
203 * so this can't possibly be valid
206 return SPDK_JSON_PARSE_INCOMPLETE
;
211 return SPDK_JSON_PARSE_INVALID
;
214 while (str
< buf_end
) {
218 * Update str_end to point at next input byte and return output length.
221 return out
- str_start
- 1;
222 } else if (str
[0] == '\\') {
223 rc
= json_decode_string_escape(&str
, buf_end
,
224 flags
& SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE
? out
: NULL
);
231 } else if (str
[0] <= 0x1f) {
232 /* control characters must be escaped */
234 return SPDK_JSON_PARSE_INVALID
;
236 rc
= utf8_valid(str
, buf_end
);
239 return SPDK_JSON_PARSE_INCOMPLETE
;
242 return SPDK_JSON_PARSE_INVALID
;
245 if (out
&& out
!= str
&& (flags
& SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE
)) {
246 memmove(out
, str
, rc
);
253 /* If execution gets here, we ran out of buffer. */
255 return SPDK_JSON_PARSE_INCOMPLETE
;
259 json_valid_number(uint8_t *start
, uint8_t *buf_end
)
264 if (p
>= buf_end
) return -1;
267 if (c
>= '1' && c
<= '9') goto num_int_digits
;
268 if (c
== '0') goto num_frac_or_exp
;
269 if (c
== '-') goto num_int_first_digit
;
274 if (spdk_likely(p
!= buf_end
)) {
276 if (c
== '0') goto num_frac_or_exp
;
277 if (c
>= '1' && c
<= '9') goto num_int_digits
;
283 if (spdk_likely(p
!= buf_end
)) {
285 if (c
>= '0' && c
<= '9') goto num_int_digits
;
286 if (c
== '.') goto num_frac_first_digit
;
287 if (c
== 'e' || c
== 'E') goto num_exp_sign
;
293 if (spdk_likely(p
!= buf_end
)) {
295 if (c
== '.') goto num_frac_first_digit
;
296 if (c
== 'e' || c
== 'E') goto num_exp_sign
;
301 num_frac_first_digit
:
302 if (spdk_likely(p
!= buf_end
)) {
304 if (c
>= '0' && c
<= '9') goto num_frac_digits
;
310 if (spdk_likely(p
!= buf_end
)) {
312 if (c
>= '0' && c
<= '9') goto num_frac_digits
;
313 if (c
== 'e' || c
== 'E') goto num_exp_sign
;
319 if (spdk_likely(p
!= buf_end
)) {
321 if (c
>= '0' && c
<= '9') goto num_exp_digits
;
322 if (c
== '-' || c
== '+') goto num_exp_first_digit
;
328 if (spdk_likely(p
!= buf_end
)) {
330 if (c
>= '0' && c
<= '9') goto num_exp_digits
;
336 if (spdk_likely(p
!= buf_end
)) {
338 if (c
>= '0' && c
<= '9') goto num_exp_digits
;
344 /* Valid end state */
348 /* Invalid end state */
350 /* Hit the end of the buffer - the stream is incomplete. */
351 return SPDK_JSON_PARSE_INCOMPLETE
;
354 /* Found an invalid character in an invalid end state */
355 return SPDK_JSON_PARSE_INVALID
;
359 json_valid_comment(const uint8_t *start
, const uint8_t *buf_end
)
361 const uint8_t *p
= start
;
365 if (buf_end
- p
< 2) {
366 return SPDK_JSON_PARSE_INCOMPLETE
;
370 return SPDK_JSON_PARSE_INVALID
;
374 } else if (p
[1] == '/') {
377 return SPDK_JSON_PARSE_INVALID
;
382 while (p
!= buf_end
- 1) {
383 if (p
[0] == '*' && p
[1] == '/') {
384 /* Include the terminating star and slash in the comment */
385 return p
- start
+ 2;
390 while (p
!= buf_end
) {
391 if (*p
== '\r' || *p
== '\n') {
392 /* Do not include the line terminator in the comment */
399 return SPDK_JSON_PARSE_INCOMPLETE
;
402 struct json_literal
{
403 enum spdk_json_val_type type
;
409 * JSON only defines 3 possible literals; they can be uniquely identified by bits
410 * 3 and 4 of the first character:
414 * These two bits can be used as an index into the g_json_literals array.
416 static const struct json_literal g_json_literals
[] = {
417 {SPDK_JSON_VAL_FALSE
, 5, "false"},
418 {SPDK_JSON_VAL_NULL
, 4, "null"},
419 {SPDK_JSON_VAL_TRUE
, 4, "true"},
424 match_literal(const uint8_t *start
, const uint8_t *end
, const uint8_t *literal
, size_t len
)
426 assert(end
>= start
);
427 if ((size_t)(end
- start
) < len
) {
428 return SPDK_JSON_PARSE_INCOMPLETE
;
431 if (memcmp(start
, literal
, len
) != 0) {
432 return SPDK_JSON_PARSE_INVALID
;
439 spdk_json_parse(void *json
, size_t size
, struct spdk_json_val
*values
, size_t num_values
,
440 void **end
, uint32_t flags
)
442 uint8_t *json_end
= json
+ size
;
443 enum spdk_json_val_type containers
[SPDK_JSON_MAX_NESTING_DEPTH
];
444 size_t con_value
[SPDK_JSON_MAX_NESTING_DEPTH
];
445 enum spdk_json_val_type con_type
= SPDK_JSON_VAL_INVALID
;
446 bool trailing_comma
= false;
447 size_t depth
= 0; /* index into containers */
448 size_t cur_value
= 0; /* index into values */
449 size_t con_start_value
;
450 uint8_t *data
= json
;
453 const struct json_literal
*lit
;
455 STATE_VALUE
, /* initial state */
456 STATE_VALUE_SEPARATOR
, /* value separator (comma) */
457 STATE_NAME
, /* "name": value */
458 STATE_NAME_SEPARATOR
, /* colon */
459 STATE_END
, /* parsed the complete value, so only whitespace is valid */
460 } state
= STATE_VALUE
;
462 #define ADD_VALUE(t, val_start_ptr, val_end_ptr) \
463 if (values && cur_value < num_values) { \
464 values[cur_value].type = t; \
465 values[cur_value].start = val_start_ptr; \
466 values[cur_value].len = val_end_ptr - val_start_ptr; \
470 while (data
< json_end
) {
478 /* Whitespace is allowed between any tokens. */
485 /* true, false, or null */
486 if (state
!= STATE_VALUE
) goto done_invalid
;
487 lit
= &g_json_literals
[(c
>> 3) & 3]; /* See comment above g_json_literals[] */
488 assert(lit
->str
[0] == c
);
489 rc
= match_literal(data
, json_end
, lit
->str
, lit
->len
);
490 if (rc
< 0) goto done_rc
;
491 ADD_VALUE(lit
->type
, data
, data
+ rc
);
493 state
= depth
? STATE_VALUE_SEPARATOR
: STATE_END
;
494 trailing_comma
= false;
498 if (state
!= STATE_VALUE
&& state
!= STATE_NAME
) goto done_invalid
;
499 rc
= json_decode_string(data
, json_end
, &new_data
, flags
);
505 * Start is data + 1 to skip initial quote.
506 * Length is data + rc - 1 to skip both quotes.
508 ADD_VALUE(state
== STATE_VALUE
? SPDK_JSON_VAL_STRING
: SPDK_JSON_VAL_NAME
,
509 data
+ 1, data
+ rc
- 1);
511 if (state
== STATE_NAME
) {
512 state
= STATE_NAME_SEPARATOR
;
514 state
= depth
? STATE_VALUE_SEPARATOR
: STATE_END
;
516 trailing_comma
= false;
530 if (state
!= STATE_VALUE
) goto done_invalid
;
531 rc
= json_valid_number(data
, json_end
);
532 if (rc
< 0) goto done_rc
;
533 ADD_VALUE(SPDK_JSON_VAL_NUMBER
, data
, data
+ rc
);
535 state
= depth
? STATE_VALUE_SEPARATOR
: STATE_END
;
536 trailing_comma
= false;
541 if (state
!= STATE_VALUE
) goto done_invalid
;
542 if (depth
== SPDK_JSON_MAX_NESTING_DEPTH
) {
543 rc
= SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED
;
547 con_type
= SPDK_JSON_VAL_OBJECT_BEGIN
;
550 con_type
= SPDK_JSON_VAL_ARRAY_BEGIN
;
553 con_value
[depth
] = cur_value
;
554 containers
[depth
++] = con_type
;
555 ADD_VALUE(con_type
, data
, data
+ 1);
557 trailing_comma
= false;
562 if (trailing_comma
) goto done_invalid
;
563 if (depth
== 0) goto done_invalid
;
564 con_type
= containers
[--depth
];
565 con_start_value
= con_value
[depth
];
566 if (values
&& con_start_value
< num_values
) {
567 values
[con_start_value
].len
= cur_value
- con_start_value
- 1;
570 if (state
!= STATE_NAME
&& state
!= STATE_VALUE_SEPARATOR
) {
573 if (con_type
!= SPDK_JSON_VAL_OBJECT_BEGIN
) {
576 ADD_VALUE(SPDK_JSON_VAL_OBJECT_END
, data
, data
+ 1);
578 if (state
!= STATE_VALUE
&& state
!= STATE_VALUE_SEPARATOR
) {
581 if (con_type
!= SPDK_JSON_VAL_ARRAY_BEGIN
) {
584 ADD_VALUE(SPDK_JSON_VAL_ARRAY_END
, data
, data
+ 1);
586 con_type
= depth
== 0 ? SPDK_JSON_VAL_INVALID
: containers
[depth
- 1];
588 state
= depth
? STATE_VALUE_SEPARATOR
: STATE_END
;
589 trailing_comma
= false;
593 if (state
!= STATE_VALUE_SEPARATOR
) goto done_invalid
;
595 assert(con_type
== SPDK_JSON_VAL_ARRAY_BEGIN
||
596 con_type
== SPDK_JSON_VAL_OBJECT_BEGIN
);
597 state
= con_type
== SPDK_JSON_VAL_ARRAY_BEGIN
? STATE_VALUE
: STATE_NAME
;
598 trailing_comma
= true;
602 if (state
!= STATE_NAME_SEPARATOR
) goto done_invalid
;
608 if (!(flags
& SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS
)) {
611 rc
= json_valid_comment(data
, json_end
);
612 if (rc
< 0) goto done_rc
;
613 /* Skip over comment */
621 if (state
== STATE_END
) {
626 if (state
== STATE_END
) {
627 /* Skip trailing whitespace */
628 while (data
< json_end
) {
631 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n') {
639 * These asserts are just for sanity checking - they are guaranteed by the allowed
643 assert(trailing_comma
== false);
644 assert(data
<= json_end
);
651 /* Invalid end state - ran out of data */
652 rc
= SPDK_JSON_PARSE_INCOMPLETE
;
662 rc
= SPDK_JSON_PARSE_INVALID
;