]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/json/json_parse.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / lib / json / json_parse.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "json_internal.h"
35
36 static int
37 hex_value(uint8_t c)
38 {
39 #define V(x, y) [x] = y + 1
40 static const int8_t val[256] = {
41 V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
42 V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
43 V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
44 V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
45 };
46 #undef V
47
48 return val[c] - 1;
49 }
50
51 static int
52 json_decode_string_escape_unicode(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
53 {
54 uint8_t *str = *strp;
55 int v0, v1, v2, v3;
56 uint32_t val;
57 uint32_t surrogate_high = 0;
58 int rc;
59 decode:
60 /* \uXXXX */
61 assert(buf_end > str);
62
63 if (*str++ != '\\') return SPDK_JSON_PARSE_INVALID;
64 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
65
66 if (*str++ != 'u') return SPDK_JSON_PARSE_INVALID;
67 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
68
69 if ((v3 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID;
70 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
71
72 if ((v2 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID;
73 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
74
75 if ((v1 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID;
76 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
77
78 if ((v0 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID;
79 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
80
81 val = v0 | (v1 << 4) | (v2 << 8) | (v3 << 12);
82
83 if (surrogate_high) {
84 /* We already parsed the high surrogate, so this should be the low part. */
85 if (!utf16_valid_surrogate_low(val)) {
86 return SPDK_JSON_PARSE_INVALID;
87 }
88
89 /* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */
90 val = utf16_decode_surrogate_pair(surrogate_high, val);
91 } else if (utf16_valid_surrogate_high(val)) {
92 surrogate_high = val;
93
94 /*
95 * We parsed a \uXXXX sequence that decoded to the first half of a
96 * UTF-16 surrogate pair, so it must be immediately followed by another
97 * \uXXXX escape.
98 *
99 * Loop around to get the low half of the surrogate pair.
100 */
101 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE;
102 goto decode;
103 } else if (utf16_valid_surrogate_low(val)) {
104 /*
105 * We found the second half of surrogate pair without the first half;
106 * this is an invalid encoding.
107 */
108 return SPDK_JSON_PARSE_INVALID;
109 }
110
111 /*
112 * Convert Unicode escape (or surrogate pair) to UTF-8 in place.
113 *
114 * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes
115 * (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a
116 * single codepoint is 4 bytes.
117 */
118 if (out) {
119 rc = utf8_encode_unsafe(out, val);
120 } else {
121 rc = utf8_codepoint_len(val);
122 }
123 if (rc < 0) {
124 return SPDK_JSON_PARSE_INVALID;
125 }
126
127 *strp = str; /* update input pointer */
128 return rc; /* return number of bytes decoded */
129 }
130
131 static int
132 json_decode_string_escape_twochar(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
133 {
134 static const uint8_t escapes[256] = {
135 ['b'] = '\b',
136 ['f'] = '\f',
137 ['n'] = '\n',
138 ['r'] = '\r',
139 ['t'] = '\t',
140 ['/'] = '/',
141 ['"'] = '"',
142 ['\\'] = '\\',
143 };
144 uint8_t *str = *strp;
145 uint8_t c;
146
147 assert(buf_end > str);
148 if (buf_end - str < 2) {
149 return SPDK_JSON_PARSE_INCOMPLETE;
150 }
151
152 assert(str[0] == '\\');
153
154 c = escapes[str[1]];
155 if (c) {
156 if (out) {
157 *out = c;
158 }
159 *strp += 2; /* consumed two bytes */
160 return 1; /* produced one byte */
161 }
162
163 return SPDK_JSON_PARSE_INVALID;
164 }
165
166 /*
167 * Decode JSON string backslash escape.
168 * \param strp pointer to pointer to first character of escape (the backslash).
169 * *strp is also advanced to indicate how much input was consumed.
170 *
171 * \return Number of bytes appended to out
172 */
173 static int
174 json_decode_string_escape(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
175 {
176 int rc;
177
178 rc = json_decode_string_escape_twochar(strp, buf_end, out);
179 if (rc > 0) {
180 return rc;
181 }
182
183 return json_decode_string_escape_unicode(strp, buf_end, out);
184 }
185
186 /*
187 * Decode JSON string in place.
188 *
189 * \param str_start Pointer to the beginning of the string (the opening " character).
190 *
191 * \return Number of bytes in decoded string (beginning from start).
192 */
193 static int
194 json_decode_string(uint8_t *str_start, uint8_t *buf_end, uint8_t **str_end, uint32_t flags)
195 {
196 uint8_t *str = str_start;
197 uint8_t *out = str_start + 1; /* Decode string in place (skip the initial quote) */
198 int rc;
199
200 if (buf_end - str_start < 2) {
201 /*
202 * Shortest valid string (the empty string) is two bytes (""),
203 * so this can't possibly be valid
204 */
205 *str_end = str;
206 return SPDK_JSON_PARSE_INCOMPLETE;
207 }
208
209 if (*str++ != '"') {
210 *str_end = str;
211 return SPDK_JSON_PARSE_INVALID;
212 }
213
214 while (str < buf_end) {
215 if (str[0] == '"') {
216 /*
217 * End of string.
218 * Update str_end to point at next input byte and return output length.
219 */
220 *str_end = str + 1;
221 return out - str_start - 1;
222 } else if (str[0] == '\\') {
223 rc = json_decode_string_escape(&str, buf_end,
224 flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE ? out : NULL);
225 assert(rc != 0);
226 if (rc < 0) {
227 *str_end = str;
228 return rc;
229 }
230 out += rc;
231 } else if (str[0] <= 0x1f) {
232 /* control characters must be escaped */
233 *str_end = str;
234 return SPDK_JSON_PARSE_INVALID;
235 } else {
236 rc = utf8_valid(str, buf_end);
237 if (rc == 0) {
238 *str_end = str;
239 return SPDK_JSON_PARSE_INCOMPLETE;
240 } else if (rc < 0) {
241 *str_end = str;
242 return SPDK_JSON_PARSE_INVALID;
243 }
244
245 if (out && out != str && (flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE)) {
246 memmove(out, str, rc);
247 }
248 out += rc;
249 str += rc;
250 }
251 }
252
253 /* If execution gets here, we ran out of buffer. */
254 *str_end = str;
255 return SPDK_JSON_PARSE_INCOMPLETE;
256 }
257
258 static int
259 json_valid_number(uint8_t *start, uint8_t *buf_end)
260 {
261 uint8_t *p = start;
262 uint8_t c;
263
264 if (p >= buf_end) return -1;
265
266 c = *p++;
267 if (c >= '1' && c <= '9') goto num_int_digits;
268 if (c == '0') goto num_frac_or_exp;
269 if (c == '-') goto num_int_first_digit;
270 p--;
271 goto done_invalid;
272
273 num_int_first_digit:
274 if (spdk_likely(p != buf_end)) {
275 c = *p++;
276 if (c == '0') goto num_frac_or_exp;
277 if (c >= '1' && c <= '9') goto num_int_digits;
278 p--;
279 }
280 goto done_invalid;
281
282 num_int_digits:
283 if (spdk_likely(p != buf_end)) {
284 c = *p++;
285 if (c >= '0' && c <= '9') goto num_int_digits;
286 if (c == '.') goto num_frac_first_digit;
287 if (c == 'e' || c == 'E') goto num_exp_sign;
288 p--;
289 }
290 goto done_valid;
291
292 num_frac_or_exp:
293 if (spdk_likely(p != buf_end)) {
294 c = *p++;
295 if (c == '.') goto num_frac_first_digit;
296 if (c == 'e' || c == 'E') goto num_exp_sign;
297 p--;
298 }
299 goto done_valid;
300
301 num_frac_first_digit:
302 if (spdk_likely(p != buf_end)) {
303 c = *p++;
304 if (c >= '0' && c <= '9') goto num_frac_digits;
305 p--;
306 }
307 goto done_invalid;
308
309 num_frac_digits:
310 if (spdk_likely(p != buf_end)) {
311 c = *p++;
312 if (c >= '0' && c <= '9') goto num_frac_digits;
313 if (c == 'e' || c == 'E') goto num_exp_sign;
314 p--;
315 }
316 goto done_valid;
317
318 num_exp_sign:
319 if (spdk_likely(p != buf_end)) {
320 c = *p++;
321 if (c >= '0' && c <= '9') goto num_exp_digits;
322 if (c == '-' || c == '+') goto num_exp_first_digit;
323 p--;
324 }
325 goto done_invalid;
326
327 num_exp_first_digit:
328 if (spdk_likely(p != buf_end)) {
329 c = *p++;
330 if (c >= '0' && c <= '9') goto num_exp_digits;
331 p--;
332 }
333 goto done_invalid;
334
335 num_exp_digits:
336 if (spdk_likely(p != buf_end)) {
337 c = *p++;
338 if (c >= '0' && c <= '9') goto num_exp_digits;
339 p--;
340 }
341 goto done_valid;
342
343 done_valid:
344 /* Valid end state */
345 return p - start;
346
347 done_invalid:
348 /* Invalid end state */
349 if (p == buf_end) {
350 /* Hit the end of the buffer - the stream is incomplete. */
351 return SPDK_JSON_PARSE_INCOMPLETE;
352 }
353
354 /* Found an invalid character in an invalid end state */
355 return SPDK_JSON_PARSE_INVALID;
356 }
357
358 static int
359 json_valid_comment(const uint8_t *start, const uint8_t *buf_end)
360 {
361 const uint8_t *p = start;
362 bool multiline;
363
364 assert(buf_end > p);
365 if (buf_end - p < 2) {
366 return SPDK_JSON_PARSE_INCOMPLETE;
367 }
368
369 if (p[0] != '/') {
370 return SPDK_JSON_PARSE_INVALID;
371 }
372 if (p[1] == '*') {
373 multiline = true;
374 } else if (p[1] == '/') {
375 multiline = false;
376 } else {
377 return SPDK_JSON_PARSE_INVALID;
378 }
379 p += 2;
380
381 if (multiline) {
382 while (p != buf_end - 1) {
383 if (p[0] == '*' && p[1] == '/') {
384 /* Include the terminating star and slash in the comment */
385 return p - start + 2;
386 }
387 p++;
388 }
389 } else {
390 while (p != buf_end) {
391 if (*p == '\r' || *p == '\n') {
392 /* Do not include the line terminator in the comment */
393 return p - start;
394 }
395 p++;
396 }
397 }
398
399 return SPDK_JSON_PARSE_INCOMPLETE;
400 }
401
402 struct json_literal {
403 enum spdk_json_val_type type;
404 uint32_t len;
405 uint8_t str[8];
406 };
407
408 /*
409 * JSON only defines 3 possible literals; they can be uniquely identified by bits
410 * 3 and 4 of the first character:
411 * 'f' = 0b11[00]110
412 * 'n' = 0b11[01]110
413 * 't' = 0b11[10]100
414 * These two bits can be used as an index into the g_json_literals array.
415 */
416 static const struct json_literal g_json_literals[] = {
417 {SPDK_JSON_VAL_FALSE, 5, "false"},
418 {SPDK_JSON_VAL_NULL, 4, "null"},
419 {SPDK_JSON_VAL_TRUE, 4, "true"},
420 {}
421 };
422
423 static int
424 match_literal(const uint8_t *start, const uint8_t *end, const uint8_t *literal, size_t len)
425 {
426 assert(end >= start);
427 if ((size_t)(end - start) < len) {
428 return SPDK_JSON_PARSE_INCOMPLETE;
429 }
430
431 if (memcmp(start, literal, len) != 0) {
432 return SPDK_JSON_PARSE_INVALID;
433 }
434
435 return len;
436 }
437
438 ssize_t
439 spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values,
440 void **end, uint32_t flags)
441 {
442 uint8_t *json_end = json + size;
443 enum spdk_json_val_type containers[SPDK_JSON_MAX_NESTING_DEPTH];
444 size_t con_value[SPDK_JSON_MAX_NESTING_DEPTH];
445 enum spdk_json_val_type con_type = SPDK_JSON_VAL_INVALID;
446 bool trailing_comma = false;
447 size_t depth = 0; /* index into containers */
448 size_t cur_value = 0; /* index into values */
449 size_t con_start_value;
450 uint8_t *data = json;
451 uint8_t *new_data;
452 int rc = 0;
453 const struct json_literal *lit;
454 enum {
455 STATE_VALUE, /* initial state */
456 STATE_VALUE_SEPARATOR, /* value separator (comma) */
457 STATE_NAME, /* "name": value */
458 STATE_NAME_SEPARATOR, /* colon */
459 STATE_END, /* parsed the complete value, so only whitespace is valid */
460 } state = STATE_VALUE;
461
462 #define ADD_VALUE(t, val_start_ptr, val_end_ptr) \
463 if (values && cur_value < num_values) { \
464 values[cur_value].type = t; \
465 values[cur_value].start = val_start_ptr; \
466 values[cur_value].len = val_end_ptr - val_start_ptr; \
467 } \
468 cur_value++
469
470 while (data < json_end) {
471 uint8_t c = *data;
472
473 switch (c) {
474 case ' ':
475 case '\t':
476 case '\r':
477 case '\n':
478 /* Whitespace is allowed between any tokens. */
479 data++;
480 break;
481
482 case 't':
483 case 'f':
484 case 'n':
485 /* true, false, or null */
486 if (state != STATE_VALUE) goto done_invalid;
487 lit = &g_json_literals[(c >> 3) & 3]; /* See comment above g_json_literals[] */
488 assert(lit->str[0] == c);
489 rc = match_literal(data, json_end, lit->str, lit->len);
490 if (rc < 0) goto done_rc;
491 ADD_VALUE(lit->type, data, data + rc);
492 data += rc;
493 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
494 trailing_comma = false;
495 break;
496
497 case '"':
498 if (state != STATE_VALUE && state != STATE_NAME) goto done_invalid;
499 rc = json_decode_string(data, json_end, &new_data, flags);
500 if (rc < 0) {
501 data = new_data;
502 goto done_rc;
503 }
504 /*
505 * Start is data + 1 to skip initial quote.
506 * Length is data + rc - 1 to skip both quotes.
507 */
508 ADD_VALUE(state == STATE_VALUE ? SPDK_JSON_VAL_STRING : SPDK_JSON_VAL_NAME,
509 data + 1, data + rc - 1);
510 data = new_data;
511 if (state == STATE_NAME) {
512 state = STATE_NAME_SEPARATOR;
513 } else {
514 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
515 }
516 trailing_comma = false;
517 break;
518
519 case '-':
520 case '0':
521 case '1':
522 case '2':
523 case '3':
524 case '4':
525 case '5':
526 case '6':
527 case '7':
528 case '8':
529 case '9':
530 if (state != STATE_VALUE) goto done_invalid;
531 rc = json_valid_number(data, json_end);
532 if (rc < 0) goto done_rc;
533 ADD_VALUE(SPDK_JSON_VAL_NUMBER, data, data + rc);
534 data += rc;
535 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
536 trailing_comma = false;
537 break;
538
539 case '{':
540 case '[':
541 if (state != STATE_VALUE) goto done_invalid;
542 if (depth == SPDK_JSON_MAX_NESTING_DEPTH) {
543 rc = SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED;
544 goto done_rc;
545 }
546 if (c == '{') {
547 con_type = SPDK_JSON_VAL_OBJECT_BEGIN;
548 state = STATE_NAME;
549 } else {
550 con_type = SPDK_JSON_VAL_ARRAY_BEGIN;
551 state = STATE_VALUE;
552 }
553 con_value[depth] = cur_value;
554 containers[depth++] = con_type;
555 ADD_VALUE(con_type, data, data + 1);
556 data++;
557 trailing_comma = false;
558 break;
559
560 case '}':
561 case ']':
562 if (trailing_comma) goto done_invalid;
563 if (depth == 0) goto done_invalid;
564 con_type = containers[--depth];
565 con_start_value = con_value[depth];
566 if (values && con_start_value < num_values) {
567 values[con_start_value].len = cur_value - con_start_value - 1;
568 }
569 if (c == '}') {
570 if (state != STATE_NAME && state != STATE_VALUE_SEPARATOR) {
571 goto done_invalid;
572 }
573 if (con_type != SPDK_JSON_VAL_OBJECT_BEGIN) {
574 goto done_invalid;
575 }
576 ADD_VALUE(SPDK_JSON_VAL_OBJECT_END, data, data + 1);
577 } else {
578 if (state != STATE_VALUE && state != STATE_VALUE_SEPARATOR) {
579 goto done_invalid;
580 }
581 if (con_type != SPDK_JSON_VAL_ARRAY_BEGIN) {
582 goto done_invalid;
583 }
584 ADD_VALUE(SPDK_JSON_VAL_ARRAY_END, data, data + 1);
585 }
586 con_type = depth == 0 ? SPDK_JSON_VAL_INVALID : containers[depth - 1];
587 data++;
588 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
589 trailing_comma = false;
590 break;
591
592 case ',':
593 if (state != STATE_VALUE_SEPARATOR) goto done_invalid;
594 data++;
595 assert(con_type == SPDK_JSON_VAL_ARRAY_BEGIN ||
596 con_type == SPDK_JSON_VAL_OBJECT_BEGIN);
597 state = con_type == SPDK_JSON_VAL_ARRAY_BEGIN ? STATE_VALUE : STATE_NAME;
598 trailing_comma = true;
599 break;
600
601 case ':':
602 if (state != STATE_NAME_SEPARATOR) goto done_invalid;
603 data++;
604 state = STATE_VALUE;
605 break;
606
607 case '/':
608 if (!(flags & SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS)) {
609 goto done_invalid;
610 }
611 rc = json_valid_comment(data, json_end);
612 if (rc < 0) goto done_rc;
613 /* Skip over comment */
614 data += rc;
615 break;
616
617 default:
618 goto done_invalid;
619 }
620
621 if (state == STATE_END) {
622 break;
623 }
624 }
625
626 if (state == STATE_END) {
627 /* Skip trailing whitespace */
628 while (data < json_end) {
629 uint8_t c = *data;
630
631 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
632 data++;
633 } else {
634 break;
635 }
636 }
637
638 /*
639 * These asserts are just for sanity checking - they are guaranteed by the allowed
640 * state transitions.
641 */
642 assert(depth == 0);
643 assert(trailing_comma == false);
644 assert(data <= json_end);
645 if (end) {
646 *end = data;
647 }
648 return cur_value;
649 }
650
651 /* Invalid end state - ran out of data */
652 rc = SPDK_JSON_PARSE_INCOMPLETE;
653
654 done_rc:
655 assert(rc < 0);
656 if (end) {
657 *end = data;
658 }
659 return rc;
660
661 done_invalid:
662 rc = SPDK_JSON_PARSE_INVALID;
663 goto done_rc;
664 }