]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/json/json_parse.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / lib / json / json_parse.c
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
11fdf7f2
TL
34#include "spdk/json.h"
35
36#include "spdk_internal/utf.h"
37
38#define SPDK_JSON_MAX_NESTING_DEPTH 64
7c673cae
FG
39
40static int
41hex_value(uint8_t c)
42{
43#define V(x, y) [x] = y + 1
44 static const int8_t val[256] = {
45 V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
46 V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
47 V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
48 V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
49 };
50#undef V
51
52 return val[c] - 1;
53}
54
55static int
56json_decode_string_escape_unicode(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
57{
58 uint8_t *str = *strp;
59 int v0, v1, v2, v3;
60 uint32_t val;
61 uint32_t surrogate_high = 0;
62 int rc;
63decode:
64 /* \uXXXX */
65 assert(buf_end > str);
66
11fdf7f2
TL
67 if (*str++ != '\\') { return SPDK_JSON_PARSE_INVALID; }
68 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae 69
11fdf7f2
TL
70 if (*str++ != 'u') { return SPDK_JSON_PARSE_INVALID; }
71 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae 72
11fdf7f2
TL
73 if ((v3 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
74 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae 75
11fdf7f2
TL
76 if ((v2 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
77 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae 78
11fdf7f2
TL
79 if ((v1 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
80 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae 81
11fdf7f2
TL
82 if ((v0 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
83 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae
FG
84
85 val = v0 | (v1 << 4) | (v2 << 8) | (v3 << 12);
86
87 if (surrogate_high) {
88 /* We already parsed the high surrogate, so this should be the low part. */
89 if (!utf16_valid_surrogate_low(val)) {
90 return SPDK_JSON_PARSE_INVALID;
91 }
92
93 /* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */
94 val = utf16_decode_surrogate_pair(surrogate_high, val);
95 } else if (utf16_valid_surrogate_high(val)) {
96 surrogate_high = val;
97
98 /*
99 * We parsed a \uXXXX sequence that decoded to the first half of a
100 * UTF-16 surrogate pair, so it must be immediately followed by another
101 * \uXXXX escape.
102 *
103 * Loop around to get the low half of the surrogate pair.
104 */
11fdf7f2 105 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
7c673cae
FG
106 goto decode;
107 } else if (utf16_valid_surrogate_low(val)) {
108 /*
109 * We found the second half of surrogate pair without the first half;
110 * this is an invalid encoding.
111 */
112 return SPDK_JSON_PARSE_INVALID;
113 }
114
115 /*
116 * Convert Unicode escape (or surrogate pair) to UTF-8 in place.
117 *
118 * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes
119 * (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a
120 * single codepoint is 4 bytes.
121 */
122 if (out) {
123 rc = utf8_encode_unsafe(out, val);
124 } else {
125 rc = utf8_codepoint_len(val);
126 }
127 if (rc < 0) {
128 return SPDK_JSON_PARSE_INVALID;
129 }
130
131 *strp = str; /* update input pointer */
132 return rc; /* return number of bytes decoded */
133}
134
135static int
136json_decode_string_escape_twochar(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
137{
138 static const uint8_t escapes[256] = {
139 ['b'] = '\b',
140 ['f'] = '\f',
141 ['n'] = '\n',
142 ['r'] = '\r',
143 ['t'] = '\t',
144 ['/'] = '/',
145 ['"'] = '"',
146 ['\\'] = '\\',
147 };
148 uint8_t *str = *strp;
149 uint8_t c;
150
151 assert(buf_end > str);
152 if (buf_end - str < 2) {
153 return SPDK_JSON_PARSE_INCOMPLETE;
154 }
155
156 assert(str[0] == '\\');
157
158 c = escapes[str[1]];
159 if (c) {
160 if (out) {
161 *out = c;
162 }
163 *strp += 2; /* consumed two bytes */
164 return 1; /* produced one byte */
165 }
166
167 return SPDK_JSON_PARSE_INVALID;
168}
169
170/*
171 * Decode JSON string backslash escape.
172 * \param strp pointer to pointer to first character of escape (the backslash).
173 * *strp is also advanced to indicate how much input was consumed.
174 *
175 * \return Number of bytes appended to out
176 */
177static int
178json_decode_string_escape(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
179{
180 int rc;
181
182 rc = json_decode_string_escape_twochar(strp, buf_end, out);
183 if (rc > 0) {
184 return rc;
185 }
186
187 return json_decode_string_escape_unicode(strp, buf_end, out);
188}
189
190/*
191 * Decode JSON string in place.
192 *
193 * \param str_start Pointer to the beginning of the string (the opening " character).
194 *
195 * \return Number of bytes in decoded string (beginning from start).
196 */
197static int
198json_decode_string(uint8_t *str_start, uint8_t *buf_end, uint8_t **str_end, uint32_t flags)
199{
200 uint8_t *str = str_start;
201 uint8_t *out = str_start + 1; /* Decode string in place (skip the initial quote) */
202 int rc;
203
204 if (buf_end - str_start < 2) {
205 /*
206 * Shortest valid string (the empty string) is two bytes (""),
207 * so this can't possibly be valid
208 */
209 *str_end = str;
210 return SPDK_JSON_PARSE_INCOMPLETE;
211 }
212
213 if (*str++ != '"') {
214 *str_end = str;
215 return SPDK_JSON_PARSE_INVALID;
216 }
217
218 while (str < buf_end) {
219 if (str[0] == '"') {
220 /*
221 * End of string.
222 * Update str_end to point at next input byte and return output length.
223 */
224 *str_end = str + 1;
225 return out - str_start - 1;
226 } else if (str[0] == '\\') {
227 rc = json_decode_string_escape(&str, buf_end,
228 flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE ? out : NULL);
229 assert(rc != 0);
230 if (rc < 0) {
231 *str_end = str;
232 return rc;
233 }
234 out += rc;
235 } else if (str[0] <= 0x1f) {
236 /* control characters must be escaped */
237 *str_end = str;
238 return SPDK_JSON_PARSE_INVALID;
239 } else {
240 rc = utf8_valid(str, buf_end);
241 if (rc == 0) {
242 *str_end = str;
243 return SPDK_JSON_PARSE_INCOMPLETE;
244 } else if (rc < 0) {
245 *str_end = str;
246 return SPDK_JSON_PARSE_INVALID;
247 }
248
249 if (out && out != str && (flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE)) {
250 memmove(out, str, rc);
251 }
252 out += rc;
253 str += rc;
254 }
255 }
256
257 /* If execution gets here, we ran out of buffer. */
258 *str_end = str;
259 return SPDK_JSON_PARSE_INCOMPLETE;
260}
261
262static int
263json_valid_number(uint8_t *start, uint8_t *buf_end)
264{
265 uint8_t *p = start;
266 uint8_t c;
267
11fdf7f2 268 if (p >= buf_end) { return -1; }
7c673cae
FG
269
270 c = *p++;
11fdf7f2
TL
271 if (c >= '1' && c <= '9') { goto num_int_digits; }
272 if (c == '0') { goto num_frac_or_exp; }
273 if (c == '-') { goto num_int_first_digit; }
7c673cae
FG
274 p--;
275 goto done_invalid;
276
277num_int_first_digit:
278 if (spdk_likely(p != buf_end)) {
279 c = *p++;
11fdf7f2
TL
280 if (c == '0') { goto num_frac_or_exp; }
281 if (c >= '1' && c <= '9') { goto num_int_digits; }
7c673cae
FG
282 p--;
283 }
284 goto done_invalid;
285
286num_int_digits:
287 if (spdk_likely(p != buf_end)) {
288 c = *p++;
11fdf7f2
TL
289 if (c >= '0' && c <= '9') { goto num_int_digits; }
290 if (c == '.') { goto num_frac_first_digit; }
291 if (c == 'e' || c == 'E') { goto num_exp_sign; }
7c673cae
FG
292 p--;
293 }
294 goto done_valid;
295
296num_frac_or_exp:
297 if (spdk_likely(p != buf_end)) {
298 c = *p++;
11fdf7f2
TL
299 if (c == '.') { goto num_frac_first_digit; }
300 if (c == 'e' || c == 'E') { goto num_exp_sign; }
7c673cae
FG
301 p--;
302 }
303 goto done_valid;
304
305num_frac_first_digit:
306 if (spdk_likely(p != buf_end)) {
307 c = *p++;
11fdf7f2 308 if (c >= '0' && c <= '9') { goto num_frac_digits; }
7c673cae
FG
309 p--;
310 }
311 goto done_invalid;
312
313num_frac_digits:
314 if (spdk_likely(p != buf_end)) {
315 c = *p++;
11fdf7f2
TL
316 if (c >= '0' && c <= '9') { goto num_frac_digits; }
317 if (c == 'e' || c == 'E') { goto num_exp_sign; }
7c673cae
FG
318 p--;
319 }
320 goto done_valid;
321
322num_exp_sign:
323 if (spdk_likely(p != buf_end)) {
324 c = *p++;
11fdf7f2
TL
325 if (c >= '0' && c <= '9') { goto num_exp_digits; }
326 if (c == '-' || c == '+') { goto num_exp_first_digit; }
7c673cae
FG
327 p--;
328 }
329 goto done_invalid;
330
331num_exp_first_digit:
332 if (spdk_likely(p != buf_end)) {
333 c = *p++;
11fdf7f2 334 if (c >= '0' && c <= '9') { goto num_exp_digits; }
7c673cae
FG
335 p--;
336 }
337 goto done_invalid;
338
339num_exp_digits:
340 if (spdk_likely(p != buf_end)) {
341 c = *p++;
11fdf7f2 342 if (c >= '0' && c <= '9') { goto num_exp_digits; }
7c673cae
FG
343 p--;
344 }
345 goto done_valid;
346
347done_valid:
348 /* Valid end state */
349 return p - start;
350
351done_invalid:
352 /* Invalid end state */
353 if (p == buf_end) {
354 /* Hit the end of the buffer - the stream is incomplete. */
355 return SPDK_JSON_PARSE_INCOMPLETE;
356 }
357
358 /* Found an invalid character in an invalid end state */
359 return SPDK_JSON_PARSE_INVALID;
360}
361
362static int
363json_valid_comment(const uint8_t *start, const uint8_t *buf_end)
364{
365 const uint8_t *p = start;
366 bool multiline;
367
368 assert(buf_end > p);
369 if (buf_end - p < 2) {
370 return SPDK_JSON_PARSE_INCOMPLETE;
371 }
372
373 if (p[0] != '/') {
374 return SPDK_JSON_PARSE_INVALID;
375 }
376 if (p[1] == '*') {
377 multiline = true;
378 } else if (p[1] == '/') {
379 multiline = false;
380 } else {
381 return SPDK_JSON_PARSE_INVALID;
382 }
383 p += 2;
384
385 if (multiline) {
386 while (p != buf_end - 1) {
387 if (p[0] == '*' && p[1] == '/') {
388 /* Include the terminating star and slash in the comment */
389 return p - start + 2;
390 }
391 p++;
392 }
393 } else {
394 while (p != buf_end) {
395 if (*p == '\r' || *p == '\n') {
396 /* Do not include the line terminator in the comment */
397 return p - start;
398 }
399 p++;
400 }
401 }
402
403 return SPDK_JSON_PARSE_INCOMPLETE;
404}
405
406struct json_literal {
407 enum spdk_json_val_type type;
408 uint32_t len;
409 uint8_t str[8];
410};
411
412/*
413 * JSON only defines 3 possible literals; they can be uniquely identified by bits
414 * 3 and 4 of the first character:
415 * 'f' = 0b11[00]110
416 * 'n' = 0b11[01]110
417 * 't' = 0b11[10]100
418 * These two bits can be used as an index into the g_json_literals array.
419 */
420static const struct json_literal g_json_literals[] = {
421 {SPDK_JSON_VAL_FALSE, 5, "false"},
422 {SPDK_JSON_VAL_NULL, 4, "null"},
423 {SPDK_JSON_VAL_TRUE, 4, "true"},
424 {}
425};
426
427static int
428match_literal(const uint8_t *start, const uint8_t *end, const uint8_t *literal, size_t len)
429{
430 assert(end >= start);
431 if ((size_t)(end - start) < len) {
432 return SPDK_JSON_PARSE_INCOMPLETE;
433 }
434
435 if (memcmp(start, literal, len) != 0) {
436 return SPDK_JSON_PARSE_INVALID;
437 }
438
439 return len;
440}
441
442ssize_t
443spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values,
444 void **end, uint32_t flags)
445{
446 uint8_t *json_end = json + size;
447 enum spdk_json_val_type containers[SPDK_JSON_MAX_NESTING_DEPTH];
448 size_t con_value[SPDK_JSON_MAX_NESTING_DEPTH];
449 enum spdk_json_val_type con_type = SPDK_JSON_VAL_INVALID;
450 bool trailing_comma = false;
451 size_t depth = 0; /* index into containers */
452 size_t cur_value = 0; /* index into values */
453 size_t con_start_value;
454 uint8_t *data = json;
455 uint8_t *new_data;
456 int rc = 0;
457 const struct json_literal *lit;
458 enum {
459 STATE_VALUE, /* initial state */
460 STATE_VALUE_SEPARATOR, /* value separator (comma) */
461 STATE_NAME, /* "name": value */
462 STATE_NAME_SEPARATOR, /* colon */
463 STATE_END, /* parsed the complete value, so only whitespace is valid */
464 } state = STATE_VALUE;
465
466#define ADD_VALUE(t, val_start_ptr, val_end_ptr) \
467 if (values && cur_value < num_values) { \
468 values[cur_value].type = t; \
469 values[cur_value].start = val_start_ptr; \
470 values[cur_value].len = val_end_ptr - val_start_ptr; \
471 } \
472 cur_value++
473
474 while (data < json_end) {
475 uint8_t c = *data;
476
477 switch (c) {
478 case ' ':
479 case '\t':
480 case '\r':
481 case '\n':
482 /* Whitespace is allowed between any tokens. */
483 data++;
484 break;
485
486 case 't':
487 case 'f':
488 case 'n':
489 /* true, false, or null */
11fdf7f2 490 if (state != STATE_VALUE) { goto done_invalid; }
7c673cae
FG
491 lit = &g_json_literals[(c >> 3) & 3]; /* See comment above g_json_literals[] */
492 assert(lit->str[0] == c);
493 rc = match_literal(data, json_end, lit->str, lit->len);
11fdf7f2 494 if (rc < 0) { goto done_rc; }
7c673cae
FG
495 ADD_VALUE(lit->type, data, data + rc);
496 data += rc;
497 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
498 trailing_comma = false;
499 break;
500
501 case '"':
11fdf7f2 502 if (state != STATE_VALUE && state != STATE_NAME) { goto done_invalid; }
7c673cae
FG
503 rc = json_decode_string(data, json_end, &new_data, flags);
504 if (rc < 0) {
505 data = new_data;
506 goto done_rc;
507 }
508 /*
509 * Start is data + 1 to skip initial quote.
510 * Length is data + rc - 1 to skip both quotes.
511 */
512 ADD_VALUE(state == STATE_VALUE ? SPDK_JSON_VAL_STRING : SPDK_JSON_VAL_NAME,
513 data + 1, data + rc - 1);
514 data = new_data;
515 if (state == STATE_NAME) {
516 state = STATE_NAME_SEPARATOR;
517 } else {
518 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
519 }
520 trailing_comma = false;
521 break;
522
523 case '-':
524 case '0':
525 case '1':
526 case '2':
527 case '3':
528 case '4':
529 case '5':
530 case '6':
531 case '7':
532 case '8':
533 case '9':
11fdf7f2 534 if (state != STATE_VALUE) { goto done_invalid; }
7c673cae 535 rc = json_valid_number(data, json_end);
11fdf7f2 536 if (rc < 0) { goto done_rc; }
7c673cae
FG
537 ADD_VALUE(SPDK_JSON_VAL_NUMBER, data, data + rc);
538 data += rc;
539 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
540 trailing_comma = false;
541 break;
542
543 case '{':
544 case '[':
11fdf7f2 545 if (state != STATE_VALUE) { goto done_invalid; }
7c673cae
FG
546 if (depth == SPDK_JSON_MAX_NESTING_DEPTH) {
547 rc = SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED;
548 goto done_rc;
549 }
550 if (c == '{') {
551 con_type = SPDK_JSON_VAL_OBJECT_BEGIN;
552 state = STATE_NAME;
553 } else {
554 con_type = SPDK_JSON_VAL_ARRAY_BEGIN;
555 state = STATE_VALUE;
556 }
557 con_value[depth] = cur_value;
558 containers[depth++] = con_type;
559 ADD_VALUE(con_type, data, data + 1);
560 data++;
561 trailing_comma = false;
562 break;
563
564 case '}':
565 case ']':
11fdf7f2
TL
566 if (trailing_comma) { goto done_invalid; }
567 if (depth == 0) { goto done_invalid; }
7c673cae
FG
568 con_type = containers[--depth];
569 con_start_value = con_value[depth];
570 if (values && con_start_value < num_values) {
571 values[con_start_value].len = cur_value - con_start_value - 1;
572 }
573 if (c == '}') {
574 if (state != STATE_NAME && state != STATE_VALUE_SEPARATOR) {
575 goto done_invalid;
576 }
577 if (con_type != SPDK_JSON_VAL_OBJECT_BEGIN) {
578 goto done_invalid;
579 }
580 ADD_VALUE(SPDK_JSON_VAL_OBJECT_END, data, data + 1);
581 } else {
582 if (state != STATE_VALUE && state != STATE_VALUE_SEPARATOR) {
583 goto done_invalid;
584 }
585 if (con_type != SPDK_JSON_VAL_ARRAY_BEGIN) {
586 goto done_invalid;
587 }
588 ADD_VALUE(SPDK_JSON_VAL_ARRAY_END, data, data + 1);
589 }
590 con_type = depth == 0 ? SPDK_JSON_VAL_INVALID : containers[depth - 1];
591 data++;
592 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
593 trailing_comma = false;
594 break;
595
596 case ',':
11fdf7f2 597 if (state != STATE_VALUE_SEPARATOR) { goto done_invalid; }
7c673cae
FG
598 data++;
599 assert(con_type == SPDK_JSON_VAL_ARRAY_BEGIN ||
600 con_type == SPDK_JSON_VAL_OBJECT_BEGIN);
601 state = con_type == SPDK_JSON_VAL_ARRAY_BEGIN ? STATE_VALUE : STATE_NAME;
602 trailing_comma = true;
603 break;
604
605 case ':':
11fdf7f2 606 if (state != STATE_NAME_SEPARATOR) { goto done_invalid; }
7c673cae
FG
607 data++;
608 state = STATE_VALUE;
609 break;
610
611 case '/':
612 if (!(flags & SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS)) {
613 goto done_invalid;
614 }
615 rc = json_valid_comment(data, json_end);
11fdf7f2 616 if (rc < 0) { goto done_rc; }
7c673cae
FG
617 /* Skip over comment */
618 data += rc;
619 break;
620
621 default:
622 goto done_invalid;
623 }
624
625 if (state == STATE_END) {
626 break;
627 }
628 }
629
630 if (state == STATE_END) {
631 /* Skip trailing whitespace */
632 while (data < json_end) {
633 uint8_t c = *data;
634
635 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
636 data++;
637 } else {
638 break;
639 }
640 }
641
642 /*
643 * These asserts are just for sanity checking - they are guaranteed by the allowed
644 * state transitions.
645 */
646 assert(depth == 0);
647 assert(trailing_comma == false);
648 assert(data <= json_end);
649 if (end) {
650 *end = data;
651 }
652 return cur_value;
653 }
654
655 /* Invalid end state - ran out of data */
656 rc = SPDK_JSON_PARSE_INCOMPLETE;
657
658done_rc:
659 assert(rc < 0);
660 if (end) {
661 *end = data;
662 }
663 return rc;
664
665done_invalid:
666 rc = SPDK_JSON_PARSE_INVALID;
667 goto done_rc;
668}