]>
Commit | Line | Data |
---|---|---|
f38b84ea | 1 | /* |
1600fa68 | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
f38b84ea BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
19 | #include "json.h" | |
20 | ||
f38b84ea BP |
21 | #include <ctype.h> |
22 | #include <errno.h> | |
23 | #include <float.h> | |
24 | #include <limits.h> | |
f38b84ea BP |
25 | #include <string.h> |
26 | ||
27 | #include "dynamic-string.h" | |
28 | #include "hash.h" | |
29 | #include "shash.h" | |
30 | #include "unicode.h" | |
31 | #include "util.h" | |
32 | ||
33 | /* The type of a JSON token. */ | |
34 | enum json_token_type { | |
35 | T_EOF = 0, | |
36 | T_BEGIN_ARRAY = '[', | |
37 | T_END_ARRAY = ']', | |
38 | T_BEGIN_OBJECT = '{', | |
39 | T_END_OBJECT = '}', | |
40 | T_NAME_SEPARATOR = ':', | |
41 | T_VALUE_SEPARATOR = ',', | |
42 | T_FALSE = UCHAR_MAX + 1, | |
43 | T_NULL, | |
44 | T_TRUE, | |
45 | T_INTEGER, | |
46 | T_REAL, | |
47 | T_STRING | |
48 | }; | |
49 | ||
50 | /* A JSON token. | |
51 | * | |
52 | * RFC 4627 doesn't define a lexical structure for JSON but I believe this to | |
53 | * be compliant with the standard. | |
54 | */ | |
55 | struct json_token { | |
56 | enum json_token_type type; | |
57 | union { | |
58 | double real; | |
59 | long long int integer; | |
60 | const char *string; | |
61 | } u; | |
62 | }; | |
63 | ||
64 | enum json_lex_state { | |
65 | JSON_LEX_START, /* Not inside a token. */ | |
66 | JSON_LEX_NUMBER, /* Reading a number. */ | |
67 | JSON_LEX_KEYWORD, /* Reading a keyword. */ | |
68 | JSON_LEX_STRING, /* Reading a quoted string. */ | |
69 | JSON_LEX_ESCAPE /* In a quoted string just after a "\". */ | |
70 | }; | |
71 | ||
72 | enum json_parse_state { | |
73 | JSON_PARSE_START, /* Beginning of input. */ | |
74 | JSON_PARSE_END, /* End of input. */ | |
75 | ||
76 | /* Objects. */ | |
77 | JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */ | |
78 | JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */ | |
79 | JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */ | |
80 | JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */ | |
81 | JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */ | |
82 | ||
83 | /* Arrays. */ | |
84 | JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */ | |
85 | JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */ | |
86 | JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */ | |
87 | }; | |
88 | ||
89 | struct json_parser_node { | |
90 | struct json *json; | |
91 | }; | |
92 | ||
93 | /* A JSON parser. */ | |
94 | struct json_parser { | |
95 | int flags; | |
96 | ||
97 | /* Lexical analysis. */ | |
98 | enum json_lex_state lex_state; | |
99 | struct ds buffer; /* Buffer for accumulating token text. */ | |
0bdf342a BP |
100 | int line_number; |
101 | int column_number; | |
102 | int byte_number; | |
f38b84ea BP |
103 | |
104 | /* Parsing. */ | |
105 | enum json_parse_state parse_state; | |
106 | #define JSON_MAX_HEIGHT 1000 | |
107 | struct json_parser_node *stack; | |
108 | size_t height, allocated_height; | |
109 | char *member_name; | |
110 | ||
111 | /* Parse status. */ | |
112 | bool done; | |
113 | char *error; /* Error message, if any, null if none yet. */ | |
114 | }; | |
115 | ||
116 | static struct json *json_create(enum json_type type); | |
117 | static void json_parser_input(struct json_parser *, struct json_token *); | |
118 | ||
119 | static void json_error(struct json_parser *p, const char *format, ...) | |
120 | PRINTF_FORMAT(2, 3); | |
121 | \f | |
122 | const char * | |
123 | json_type_to_string(enum json_type type) | |
124 | { | |
125 | switch (type) { | |
126 | case JSON_NULL: | |
127 | return "null"; | |
128 | ||
129 | case JSON_FALSE: | |
130 | return "false"; | |
131 | ||
132 | case JSON_TRUE: | |
133 | return "true"; | |
134 | ||
135 | case JSON_OBJECT: | |
136 | return "object"; | |
137 | ||
138 | case JSON_ARRAY: | |
139 | return "array"; | |
140 | ||
141 | case JSON_INTEGER: | |
142 | case JSON_REAL: | |
143 | return "number"; | |
144 | ||
145 | case JSON_STRING: | |
146 | return "string"; | |
147 | ||
148 | case JSON_N_TYPES: | |
149 | default: | |
150 | return "<invalid>"; | |
151 | } | |
152 | } | |
153 | \f | |
154 | /* Functions for manipulating struct json. */ | |
155 | ||
156 | struct json * | |
157 | json_null_create(void) | |
158 | { | |
159 | return json_create(JSON_NULL); | |
160 | } | |
161 | ||
162 | struct json * | |
163 | json_boolean_create(bool b) | |
164 | { | |
165 | return json_create(b ? JSON_TRUE : JSON_FALSE); | |
166 | } | |
167 | ||
168 | struct json * | |
169 | json_string_create_nocopy(char *s) | |
170 | { | |
171 | struct json *json = json_create(JSON_STRING); | |
172 | json->u.string = s; | |
173 | return json; | |
174 | } | |
175 | ||
176 | struct json * | |
177 | json_string_create(const char *s) | |
178 | { | |
179 | return json_string_create_nocopy(xstrdup(s)); | |
180 | } | |
181 | ||
182 | struct json * | |
183 | json_array_create_empty(void) | |
184 | { | |
185 | struct json *json = json_create(JSON_ARRAY); | |
186 | json->u.array.elems = NULL; | |
187 | json->u.array.n = 0; | |
188 | json->u.array.n_allocated = 0; | |
189 | return json; | |
190 | } | |
191 | ||
192 | void | |
193 | json_array_add(struct json *array_, struct json *element) | |
194 | { | |
195 | struct json_array *array = json_array(array_); | |
196 | if (array->n >= array->n_allocated) { | |
197 | array->elems = x2nrealloc(array->elems, &array->n_allocated, | |
198 | sizeof *array->elems); | |
199 | } | |
200 | array->elems[array->n++] = element; | |
201 | } | |
202 | ||
203 | void | |
204 | json_array_trim(struct json *array_) | |
205 | { | |
206 | struct json_array *array = json_array(array_); | |
207 | if (array->n < array->n_allocated){ | |
208 | array->n_allocated = array->n; | |
209 | array->elems = xrealloc(array->elems, array->n * sizeof *array->elems); | |
210 | } | |
211 | } | |
212 | ||
213 | struct json * | |
214 | json_array_create(struct json **elements, size_t n) | |
215 | { | |
216 | struct json *json = json_create(JSON_ARRAY); | |
217 | json->u.array.elems = elements; | |
218 | json->u.array.n = n; | |
219 | json->u.array.n_allocated = n; | |
220 | return json; | |
221 | } | |
222 | ||
f6f8c3ba BP |
223 | struct json * |
224 | json_array_create_1(struct json *elem0) | |
225 | { | |
226 | struct json **elems = xmalloc(sizeof *elems); | |
227 | elems[0] = elem0; | |
228 | return json_array_create(elems, 1); | |
229 | } | |
230 | ||
f38b84ea BP |
231 | struct json * |
232 | json_array_create_2(struct json *elem0, struct json *elem1) | |
233 | { | |
234 | struct json **elems = xmalloc(2 * sizeof *elems); | |
235 | elems[0] = elem0; | |
236 | elems[1] = elem1; | |
237 | return json_array_create(elems, 2); | |
238 | } | |
239 | ||
240 | struct json * | |
241 | json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2) | |
242 | { | |
243 | struct json **elems = xmalloc(3 * sizeof *elems); | |
244 | elems[0] = elem0; | |
245 | elems[1] = elem1; | |
246 | elems[2] = elem2; | |
247 | return json_array_create(elems, 3); | |
248 | } | |
249 | ||
250 | struct json * | |
251 | json_object_create(void) | |
252 | { | |
253 | struct json *json = json_create(JSON_OBJECT); | |
254 | json->u.object = xmalloc(sizeof *json->u.object); | |
255 | shash_init(json->u.object); | |
256 | return json; | |
257 | } | |
258 | ||
259 | struct json * | |
260 | json_integer_create(long long int integer) | |
261 | { | |
262 | struct json *json = json_create(JSON_INTEGER); | |
263 | json->u.integer = integer; | |
264 | return json; | |
265 | } | |
266 | ||
267 | struct json * | |
268 | json_real_create(double real) | |
269 | { | |
270 | struct json *json = json_create(JSON_REAL); | |
271 | json->u.real = real; | |
272 | return json; | |
273 | } | |
274 | ||
275 | void | |
276 | json_object_put(struct json *json, const char *name, struct json *value) | |
277 | { | |
597cf5a1 | 278 | json_destroy(shash_replace(json->u.object, name, value)); |
f38b84ea BP |
279 | } |
280 | ||
281 | void | |
282 | json_object_put_string(struct json *json, const char *name, const char *value) | |
283 | { | |
284 | json_object_put(json, name, json_string_create(value)); | |
285 | } | |
286 | ||
287 | const char * | |
288 | json_string(const struct json *json) | |
289 | { | |
cb22974d | 290 | ovs_assert(json->type == JSON_STRING); |
f38b84ea BP |
291 | return json->u.string; |
292 | } | |
293 | ||
294 | struct json_array * | |
295 | json_array(const struct json *json) | |
296 | { | |
cb22974d | 297 | ovs_assert(json->type == JSON_ARRAY); |
ebc56baa | 298 | return CONST_CAST(struct json_array *, &json->u.array); |
f38b84ea BP |
299 | } |
300 | ||
301 | struct shash * | |
302 | json_object(const struct json *json) | |
303 | { | |
cb22974d | 304 | ovs_assert(json->type == JSON_OBJECT); |
ebc56baa | 305 | return CONST_CAST(struct shash *, json->u.object); |
f38b84ea BP |
306 | } |
307 | ||
308 | bool | |
309 | json_boolean(const struct json *json) | |
310 | { | |
cb22974d | 311 | ovs_assert(json->type == JSON_TRUE || json->type == JSON_FALSE); |
f38b84ea BP |
312 | return json->type == JSON_TRUE; |
313 | } | |
314 | ||
315 | double | |
316 | json_real(const struct json *json) | |
317 | { | |
cb22974d | 318 | ovs_assert(json->type == JSON_REAL || json->type == JSON_INTEGER); |
f38b84ea BP |
319 | return json->type == JSON_REAL ? json->u.real : json->u.integer; |
320 | } | |
321 | ||
322 | int64_t | |
323 | json_integer(const struct json *json) | |
324 | { | |
cb22974d | 325 | ovs_assert(json->type == JSON_INTEGER); |
f38b84ea BP |
326 | return json->u.integer; |
327 | } | |
328 | \f | |
329 | static void json_destroy_object(struct shash *object); | |
330 | static void json_destroy_array(struct json_array *array); | |
331 | ||
332 | /* Frees 'json' and everything it points to, recursively. */ | |
333 | void | |
334 | json_destroy(struct json *json) | |
335 | { | |
336 | if (json) { | |
337 | switch (json->type) { | |
338 | case JSON_OBJECT: | |
339 | json_destroy_object(json->u.object); | |
340 | break; | |
341 | ||
342 | case JSON_ARRAY: | |
343 | json_destroy_array(&json->u.array); | |
344 | break; | |
345 | ||
346 | case JSON_STRING: | |
347 | free(json->u.string); | |
348 | break; | |
349 | ||
350 | case JSON_NULL: | |
351 | case JSON_FALSE: | |
352 | case JSON_TRUE: | |
353 | case JSON_INTEGER: | |
354 | case JSON_REAL: | |
355 | break; | |
356 | ||
357 | case JSON_N_TYPES: | |
358 | NOT_REACHED(); | |
359 | } | |
360 | free(json); | |
361 | } | |
362 | } | |
363 | ||
364 | static void | |
365 | json_destroy_object(struct shash *object) | |
366 | { | |
367 | struct shash_node *node, *next; | |
368 | ||
369 | SHASH_FOR_EACH_SAFE (node, next, object) { | |
370 | struct json *value = node->data; | |
371 | ||
372 | json_destroy(value); | |
373 | shash_delete(object, node); | |
374 | } | |
375 | shash_destroy(object); | |
376 | free(object); | |
377 | } | |
378 | ||
379 | static void | |
380 | json_destroy_array(struct json_array *array) | |
381 | { | |
382 | size_t i; | |
383 | ||
384 | for (i = 0; i < array->n; i++) { | |
385 | json_destroy(array->elems[i]); | |
386 | } | |
387 | free(array->elems); | |
388 | } | |
389 | \f | |
390 | static struct json *json_clone_object(const struct shash *object); | |
391 | static struct json *json_clone_array(const struct json_array *array); | |
392 | ||
393 | /* Returns a deep copy of 'json'. */ | |
394 | struct json * | |
395 | json_clone(const struct json *json) | |
396 | { | |
397 | switch (json->type) { | |
398 | case JSON_OBJECT: | |
399 | return json_clone_object(json->u.object); | |
400 | ||
401 | case JSON_ARRAY: | |
402 | return json_clone_array(&json->u.array); | |
403 | ||
404 | case JSON_STRING: | |
405 | return json_string_create(json->u.string); | |
406 | ||
407 | case JSON_NULL: | |
408 | case JSON_FALSE: | |
409 | case JSON_TRUE: | |
410 | return json_create(json->type); | |
411 | ||
412 | case JSON_INTEGER: | |
413 | return json_integer_create(json->u.integer); | |
414 | ||
415 | case JSON_REAL: | |
416 | return json_real_create(json->u.real); | |
417 | ||
418 | case JSON_N_TYPES: | |
419 | default: | |
420 | NOT_REACHED(); | |
421 | } | |
422 | } | |
423 | ||
424 | static struct json * | |
425 | json_clone_object(const struct shash *object) | |
426 | { | |
427 | struct shash_node *node; | |
428 | struct json *json; | |
429 | ||
430 | json = json_object_create(); | |
431 | SHASH_FOR_EACH (node, object) { | |
432 | struct json *value = node->data; | |
433 | json_object_put(json, node->name, json_clone(value)); | |
434 | } | |
435 | return json; | |
436 | } | |
437 | ||
438 | static struct json * | |
439 | json_clone_array(const struct json_array *array) | |
440 | { | |
441 | struct json **elems; | |
442 | size_t i; | |
443 | ||
444 | elems = xmalloc(array->n * sizeof *elems); | |
445 | for (i = 0; i < array->n; i++) { | |
446 | elems[i] = json_clone(array->elems[i]); | |
447 | } | |
448 | return json_array_create(elems, array->n); | |
449 | } | |
450 | \f | |
451 | static size_t | |
452 | json_hash_object(const struct shash *object, size_t basis) | |
453 | { | |
454 | const struct shash_node **nodes; | |
455 | size_t n, i; | |
456 | ||
457 | nodes = shash_sort(object); | |
458 | n = shash_count(object); | |
459 | for (i = 0; i < n; i++) { | |
460 | const struct shash_node *node = nodes[i]; | |
461 | basis = hash_string(node->name, basis); | |
462 | basis = json_hash(node->data, basis); | |
463 | } | |
464 | return basis; | |
465 | } | |
466 | ||
467 | static size_t | |
468 | json_hash_array(const struct json_array *array, size_t basis) | |
469 | { | |
470 | size_t i; | |
471 | ||
472 | basis = hash_int(array->n, basis); | |
473 | for (i = 0; i < array->n; i++) { | |
474 | basis = json_hash(array->elems[i], basis); | |
475 | } | |
476 | return basis; | |
477 | } | |
478 | ||
479 | size_t | |
480 | json_hash(const struct json *json, size_t basis) | |
481 | { | |
482 | switch (json->type) { | |
483 | case JSON_OBJECT: | |
484 | return json_hash_object(json->u.object, basis); | |
485 | ||
486 | case JSON_ARRAY: | |
487 | return json_hash_array(&json->u.array, basis); | |
488 | ||
489 | case JSON_STRING: | |
490 | return hash_string(json->u.string, basis); | |
491 | ||
492 | case JSON_NULL: | |
493 | case JSON_FALSE: | |
494 | case JSON_TRUE: | |
495 | return hash_int(json->type << 8, basis); | |
496 | ||
497 | case JSON_INTEGER: | |
498 | return hash_int(json->u.integer, basis); | |
499 | ||
500 | case JSON_REAL: | |
501 | return hash_double(json->u.real, basis); | |
502 | ||
503 | case JSON_N_TYPES: | |
504 | default: | |
505 | NOT_REACHED(); | |
506 | } | |
507 | } | |
508 | ||
509 | static bool | |
510 | json_equal_object(const struct shash *a, const struct shash *b) | |
511 | { | |
6e57173f | 512 | struct shash_node *a_node; |
f38b84ea BP |
513 | |
514 | if (shash_count(a) != shash_count(b)) { | |
515 | return false; | |
516 | } | |
517 | ||
6e57173f BP |
518 | SHASH_FOR_EACH (a_node, a) { |
519 | struct shash_node *b_node = shash_find(b, a_node->name); | |
520 | if (!b_node || !json_equal(a_node->data, b_node->data)) { | |
f38b84ea BP |
521 | return false; |
522 | } | |
523 | } | |
524 | ||
525 | return true; | |
526 | } | |
527 | ||
528 | static bool | |
529 | json_equal_array(const struct json_array *a, const struct json_array *b) | |
530 | { | |
531 | size_t i; | |
532 | ||
533 | if (a->n != b->n) { | |
534 | return false; | |
535 | } | |
536 | ||
537 | for (i = 0; i < a->n; i++) { | |
538 | if (!json_equal(a->elems[i], b->elems[i])) { | |
539 | return false; | |
540 | } | |
541 | } | |
542 | ||
543 | return true; | |
544 | } | |
545 | ||
546 | bool | |
547 | json_equal(const struct json *a, const struct json *b) | |
548 | { | |
549 | if (a->type != b->type) { | |
550 | return false; | |
551 | } | |
552 | ||
553 | switch (a->type) { | |
554 | case JSON_OBJECT: | |
555 | return json_equal_object(a->u.object, b->u.object); | |
556 | ||
557 | case JSON_ARRAY: | |
558 | return json_equal_array(&a->u.array, &b->u.array); | |
559 | ||
560 | case JSON_STRING: | |
561 | return !strcmp(a->u.string, b->u.string); | |
562 | ||
563 | case JSON_NULL: | |
564 | case JSON_FALSE: | |
565 | case JSON_TRUE: | |
566 | return true; | |
567 | ||
568 | case JSON_INTEGER: | |
569 | return a->u.integer == b->u.integer; | |
570 | ||
571 | case JSON_REAL: | |
572 | return a->u.real == b->u.real; | |
573 | ||
574 | case JSON_N_TYPES: | |
575 | default: | |
576 | NOT_REACHED(); | |
577 | } | |
578 | } | |
579 | \f | |
580 | /* Lexical analysis. */ | |
581 | ||
582 | static void | |
583 | json_lex_keyword(struct json_parser *p) | |
584 | { | |
585 | struct json_token token; | |
586 | const char *s; | |
587 | ||
588 | s = ds_cstr(&p->buffer); | |
589 | if (!strcmp(s, "false")) { | |
590 | token.type = T_FALSE; | |
591 | } else if (!strcmp(s, "true")) { | |
592 | token.type = T_TRUE; | |
593 | } else if (!strcmp(s, "null")) { | |
594 | token.type = T_NULL; | |
595 | } else { | |
596 | json_error(p, "invalid keyword '%s'", s); | |
597 | return; | |
598 | } | |
599 | json_parser_input(p, &token); | |
600 | } | |
601 | ||
602 | static void | |
603 | json_lex_number(struct json_parser *p) | |
604 | { | |
605 | const char *cp = ds_cstr(&p->buffer); | |
606 | unsigned long long int significand = 0; | |
a105c27b | 607 | struct json_token token; |
f38b84ea BP |
608 | bool imprecise = false; |
609 | bool negative = false; | |
610 | int pow10 = 0; | |
611 | ||
612 | /* Leading minus sign. */ | |
613 | if (*cp == '-') { | |
614 | negative = true; | |
615 | cp++; | |
616 | } | |
617 | ||
618 | /* At least one integer digit, but 0 may not be used as a leading digit for | |
619 | * a longer number. */ | |
620 | significand = 0; | |
f38b84ea BP |
621 | if (*cp == '0') { |
622 | cp++; | |
623 | if (isdigit(*cp)) { | |
624 | json_error(p, "leading zeros not allowed"); | |
625 | return; | |
626 | } | |
627 | } else if (isdigit(*cp)) { | |
628 | do { | |
629 | if (significand <= ULLONG_MAX / 10) { | |
630 | significand = significand * 10 + (*cp - '0'); | |
f38b84ea BP |
631 | } else { |
632 | pow10++; | |
633 | if (*cp != '0') { | |
634 | imprecise = true; | |
635 | } | |
636 | } | |
637 | cp++; | |
638 | } while (isdigit(*cp)); | |
639 | } else { | |
640 | json_error(p, "'-' must be followed by digit"); | |
641 | return; | |
642 | } | |
643 | ||
644 | /* Optional fraction. */ | |
645 | if (*cp == '.') { | |
646 | cp++; | |
647 | if (!isdigit(*cp)) { | |
648 | json_error(p, "decimal point must be followed by digit"); | |
649 | return; | |
650 | } | |
651 | do { | |
652 | if (significand <= ULLONG_MAX / 10) { | |
653 | significand = significand * 10 + (*cp - '0'); | |
f38b84ea BP |
654 | pow10--; |
655 | } else if (*cp != '0') { | |
656 | imprecise = true; | |
657 | } | |
658 | cp++; | |
659 | } while (isdigit(*cp)); | |
660 | } | |
661 | ||
662 | /* Optional exponent. */ | |
663 | if (*cp == 'e' || *cp == 'E') { | |
664 | bool negative_exponent = false; | |
665 | int exponent; | |
666 | ||
667 | cp++; | |
668 | if (*cp == '+') { | |
669 | cp++; | |
670 | } else if (*cp == '-') { | |
671 | negative_exponent = true; | |
672 | cp++; | |
673 | } | |
674 | ||
675 | if (!isdigit(*cp)) { | |
676 | json_error(p, "exponent must contain at least one digit"); | |
677 | return; | |
678 | } | |
679 | ||
680 | exponent = 0; | |
681 | do { | |
682 | if (exponent >= INT_MAX / 10) { | |
683 | json_error(p, "exponent outside valid range"); | |
684 | return; | |
685 | } | |
686 | exponent = exponent * 10 + (*cp - '0'); | |
687 | cp++; | |
688 | } while (isdigit(*cp)); | |
689 | ||
690 | if (negative_exponent) { | |
691 | pow10 -= exponent; | |
692 | } else { | |
693 | pow10 += exponent; | |
694 | } | |
695 | } | |
696 | ||
697 | if (*cp != '\0') { | |
698 | json_error(p, "syntax error in number"); | |
699 | return; | |
700 | } | |
701 | ||
702 | /* Figure out number. | |
703 | * | |
704 | * We suppress negative zeros as a matter of policy. */ | |
705 | if (!significand) { | |
f38b84ea BP |
706 | token.type = T_INTEGER; |
707 | token.u.integer = 0; | |
708 | json_parser_input(p, &token); | |
709 | return; | |
710 | } | |
711 | ||
712 | if (!imprecise) { | |
713 | while (pow10 > 0 && significand < ULLONG_MAX / 10) { | |
714 | significand *= 10; | |
f38b84ea BP |
715 | pow10--; |
716 | } | |
717 | while (pow10 < 0 && significand % 10 == 0) { | |
718 | significand /= 10; | |
f38b84ea BP |
719 | pow10++; |
720 | } | |
721 | if (pow10 == 0 | |
722 | && significand <= (negative | |
723 | ? (unsigned long long int) LLONG_MAX + 1 | |
724 | : LLONG_MAX)) { | |
f38b84ea BP |
725 | token.type = T_INTEGER; |
726 | token.u.integer = negative ? -significand : significand; | |
727 | json_parser_input(p, &token); | |
728 | return; | |
729 | } | |
730 | } | |
731 | ||
a105c27b BP |
732 | token.type = T_REAL; |
733 | if (!str_to_double(ds_cstr(&p->buffer), &token.u.real)) { | |
734 | json_error(p, "number outside valid range"); | |
735 | return; | |
f38b84ea | 736 | } |
a105c27b BP |
737 | /* Suppress negative zero. */ |
738 | if (token.u.real == 0) { | |
739 | token.u.real = 0; | |
740 | } | |
741 | json_parser_input(p, &token); | |
f38b84ea BP |
742 | } |
743 | ||
7d23a63a BP |
744 | static const char * |
745 | json_lex_4hex(const char *cp, const char *end, int *valuep) | |
f38b84ea | 746 | { |
bf971267 | 747 | unsigned int value; |
f38b84ea | 748 | |
7d23a63a BP |
749 | if (cp + 4 > end) { |
750 | return "quoted string ends within \\u escape"; | |
751 | } | |
752 | ||
bf971267 BP |
753 | value = hexits_value(cp, 4, NULL); |
754 | if (value == UINT_MAX) { | |
755 | return "malformed \\u escape"; | |
f38b84ea BP |
756 | } |
757 | if (!value) { | |
7d23a63a | 758 | return "null bytes not supported in quoted strings"; |
f38b84ea BP |
759 | } |
760 | *valuep = value; | |
7d23a63a | 761 | return NULL; |
f38b84ea BP |
762 | } |
763 | ||
764 | static const char * | |
7d23a63a | 765 | json_lex_unicode(const char *cp, const char *end, struct ds *out) |
f38b84ea | 766 | { |
7d23a63a | 767 | const char *error; |
f38b84ea BP |
768 | int c0, c1; |
769 | ||
7d23a63a BP |
770 | error = json_lex_4hex(cp, end, &c0); |
771 | if (error) { | |
772 | ds_clear(out); | |
773 | ds_put_cstr(out, error); | |
f38b84ea BP |
774 | return NULL; |
775 | } | |
776 | cp += 4; | |
777 | if (!uc_is_leading_surrogate(c0)) { | |
7d23a63a | 778 | ds_put_utf8(out, c0); |
f38b84ea BP |
779 | return cp; |
780 | } | |
781 | ||
7d23a63a BP |
782 | if (cp + 2 > end || *cp++ != '\\' || *cp++ != 'u') { |
783 | ds_clear(out); | |
784 | ds_put_cstr(out, "malformed escaped surrogate pair"); | |
f38b84ea BP |
785 | return NULL; |
786 | } | |
787 | ||
7d23a63a BP |
788 | error = json_lex_4hex(cp, end, &c1); |
789 | if (error) { | |
790 | ds_clear(out); | |
791 | ds_put_cstr(out, error); | |
f38b84ea BP |
792 | return NULL; |
793 | } | |
794 | cp += 4; | |
795 | if (!uc_is_trailing_surrogate(c1)) { | |
7d23a63a BP |
796 | ds_clear(out); |
797 | ds_put_cstr(out, "second half of escaped surrogate pair is not " | |
798 | "trailing surrogate"); | |
f38b84ea BP |
799 | return NULL; |
800 | } | |
801 | ||
7d23a63a | 802 | ds_put_utf8(out, utf16_decode_surrogate_pair(c0, c1)); |
f38b84ea BP |
803 | return cp; |
804 | } | |
805 | ||
7d23a63a BP |
806 | bool |
807 | json_string_unescape(const char *in, size_t in_len, char **outp) | |
808 | { | |
809 | const char *end = in + in_len; | |
810 | bool ok = false; | |
811 | struct ds out; | |
812 | ||
813 | ds_init(&out); | |
814 | ds_reserve(&out, in_len); | |
815 | if (in_len > 0 && in[in_len - 1] == '\\') { | |
816 | ds_put_cstr(&out, "quoted string may not end with backslash"); | |
817 | goto exit; | |
818 | } | |
819 | while (in < end) { | |
820 | if (*in == '"') { | |
821 | ds_clear(&out); | |
4bda8288 | 822 | ds_put_cstr(&out, "quoted string may not include unescaped \""); |
7d23a63a BP |
823 | goto exit; |
824 | } | |
825 | if (*in != '\\') { | |
826 | ds_put_char(&out, *in++); | |
f38b84ea BP |
827 | continue; |
828 | } | |
829 | ||
7d23a63a BP |
830 | in++; |
831 | switch (*in++) { | |
f38b84ea | 832 | case '"': case '\\': case '/': |
7d23a63a | 833 | ds_put_char(&out, in[-1]); |
f38b84ea BP |
834 | break; |
835 | ||
836 | case 'b': | |
7d23a63a | 837 | ds_put_char(&out, '\b'); |
f38b84ea BP |
838 | break; |
839 | ||
840 | case 'f': | |
7d23a63a | 841 | ds_put_char(&out, '\f'); |
f38b84ea BP |
842 | break; |
843 | ||
844 | case 'n': | |
7d23a63a | 845 | ds_put_char(&out, '\n'); |
f38b84ea BP |
846 | break; |
847 | ||
848 | case 'r': | |
7d23a63a | 849 | ds_put_char(&out, '\r'); |
f38b84ea BP |
850 | break; |
851 | ||
852 | case 't': | |
7d23a63a | 853 | ds_put_char(&out, '\t'); |
f38b84ea BP |
854 | break; |
855 | ||
856 | case 'u': | |
7d23a63a BP |
857 | in = json_lex_unicode(in, end, &out); |
858 | if (!in) { | |
f38b84ea BP |
859 | goto exit; |
860 | } | |
861 | break; | |
862 | ||
863 | default: | |
7d23a63a BP |
864 | ds_clear(&out); |
865 | ds_put_format(&out, "bad escape \\%c", in[-1]); | |
f38b84ea BP |
866 | goto exit; |
867 | } | |
868 | } | |
7d23a63a BP |
869 | ok = true; |
870 | ||
871 | exit: | |
872 | *outp = ds_cstr(&out); | |
873 | return ok; | |
874 | } | |
875 | ||
876 | static void | |
877 | json_parser_input_string(struct json_parser *p, const char *s) | |
878 | { | |
879 | struct json_token token; | |
f38b84ea BP |
880 | |
881 | token.type = T_STRING; | |
7d23a63a | 882 | token.u.string = s; |
f38b84ea | 883 | json_parser_input(p, &token); |
7d23a63a | 884 | } |
f38b84ea | 885 | |
7d23a63a BP |
886 | static void |
887 | json_lex_string(struct json_parser *p) | |
888 | { | |
889 | const char *raw = ds_cstr(&p->buffer); | |
890 | if (!strchr(raw, '\\')) { | |
891 | json_parser_input_string(p, raw); | |
892 | } else { | |
893 | char *cooked; | |
894 | ||
895 | if (json_string_unescape(raw, strlen(raw), &cooked)) { | |
896 | json_parser_input_string(p, cooked); | |
897 | } else { | |
898 | json_error(p, "%s", cooked); | |
899 | } | |
900 | ||
901 | free(cooked); | |
902 | } | |
f38b84ea BP |
903 | } |
904 | ||
905 | static bool | |
0bdf342a | 906 | json_lex_input(struct json_parser *p, unsigned char c) |
f38b84ea BP |
907 | { |
908 | struct json_token token; | |
909 | ||
910 | switch (p->lex_state) { | |
911 | case JSON_LEX_START: | |
912 | switch (c) { | |
913 | case ' ': case '\t': case '\n': case '\r': | |
914 | /* Nothing to do. */ | |
915 | return true; | |
916 | ||
917 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
918 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
919 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
920 | case 'p': case 'q': case 'r': case 's': case 't': | |
921 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
922 | case 'z': | |
923 | p->lex_state = JSON_LEX_KEYWORD; | |
924 | break; | |
925 | ||
926 | case '[': case '{': case ']': case '}': case ':': case ',': | |
927 | token.type = c; | |
928 | json_parser_input(p, &token); | |
929 | return true; | |
930 | ||
931 | case '-': | |
932 | case '0': case '1': case '2': case '3': case '4': | |
933 | case '5': case '6': case '7': case '8': case '9': | |
934 | p->lex_state = JSON_LEX_NUMBER; | |
935 | break; | |
936 | ||
937 | case '"': | |
938 | p->lex_state = JSON_LEX_STRING; | |
939 | return true; | |
940 | ||
941 | default: | |
942 | if (isprint(c)) { | |
943 | json_error(p, "invalid character '%c'", c); | |
944 | } else { | |
945 | json_error(p, "invalid character U+%04x", c); | |
946 | } | |
947 | return true; | |
948 | } | |
949 | break; | |
950 | ||
951 | case JSON_LEX_KEYWORD: | |
952 | if (!isalpha((unsigned char) c)) { | |
953 | json_lex_keyword(p); | |
954 | return false; | |
955 | } | |
956 | break; | |
957 | ||
958 | case JSON_LEX_NUMBER: | |
959 | if (!strchr(".0123456789eE-+", c)) { | |
960 | json_lex_number(p); | |
961 | return false; | |
962 | } | |
963 | break; | |
964 | ||
965 | case JSON_LEX_STRING: | |
966 | if (c == '\\') { | |
967 | p->lex_state = JSON_LEX_ESCAPE; | |
968 | } else if (c == '"') { | |
969 | json_lex_string(p); | |
970 | return true; | |
971 | } else if (c < 0x20) { | |
972 | json_error(p, "U+%04X must be escaped in quoted string", c); | |
973 | return true; | |
974 | } | |
975 | break; | |
976 | ||
977 | case JSON_LEX_ESCAPE: | |
978 | p->lex_state = JSON_LEX_STRING; | |
979 | break; | |
980 | ||
981 | default: | |
982 | abort(); | |
983 | } | |
984 | ds_put_char(&p->buffer, c); | |
985 | return true; | |
986 | } | |
987 | \f | |
988 | /* Parsing. */ | |
989 | ||
990 | /* Parses 'string' as a JSON object or array and returns a newly allocated | |
991 | * 'struct json'. The caller must free the returned structure with | |
992 | * json_destroy() when it is no longer needed. | |
993 | * | |
994 | * 'string' must be encoded in UTF-8. | |
995 | * | |
996 | * If 'string' is valid JSON, then the returned 'struct json' will be either an | |
997 | * object (JSON_OBJECT) or an array (JSON_ARRAY). | |
998 | * | |
999 | * If 'string' is not valid JSON, then the returned 'struct json' will be a | |
1000 | * string (JSON_STRING) that describes the particular error encountered during | |
1001 | * parsing. (This is an acceptable means of error reporting because at its top | |
1002 | * level JSON must be either an object or an array; a bare string is not | |
1003 | * valid.) */ | |
1004 | struct json * | |
1005 | json_from_string(const char *string) | |
1006 | { | |
1007 | struct json_parser *p = json_parser_create(JSPF_TRAILER); | |
1008 | json_parser_feed(p, string, strlen(string)); | |
1009 | return json_parser_finish(p); | |
1010 | } | |
1011 | ||
1012 | /* Reads the file named 'file_name', parses its contents as a JSON object or | |
1013 | * array, and returns a newly allocated 'struct json'. The caller must free | |
1014 | * the returned structure with json_destroy() when it is no longer needed. | |
1015 | * | |
1016 | * The file must be encoded in UTF-8. | |
1017 | * | |
1018 | * See json_from_string() for return value semantics. | |
1019 | */ | |
1020 | struct json * | |
1021 | json_from_file(const char *file_name) | |
1022 | { | |
f38b84ea BP |
1023 | struct json *json; |
1024 | FILE *stream; | |
1025 | ||
f38b84ea BP |
1026 | stream = fopen(file_name, "r"); |
1027 | if (!stream) { | |
1028 | return json_string_create_nocopy( | |
1029 | xasprintf("error opening \"%s\": %s", file_name, strerror(errno))); | |
1030 | } | |
5562d6f5 BP |
1031 | json = json_from_stream(stream); |
1032 | fclose(stream); | |
1033 | ||
1034 | return json; | |
1035 | } | |
1036 | ||
1037 | /* Parses the contents of 'stream' as a JSON object or array, and returns a | |
1038 | * newly allocated 'struct json'. The caller must free the returned structure | |
1039 | * with json_destroy() when it is no longer needed. | |
1040 | * | |
1041 | * The file must be encoded in UTF-8. | |
1042 | * | |
1043 | * See json_from_string() for return value semantics. | |
1044 | */ | |
1045 | struct json * | |
1046 | json_from_stream(FILE *stream) | |
1047 | { | |
1048 | struct json_parser *p; | |
1049 | struct json *json; | |
f38b84ea | 1050 | |
f38b84ea BP |
1051 | p = json_parser_create(JSPF_TRAILER); |
1052 | for (;;) { | |
1053 | char buffer[BUFSIZ]; | |
1054 | size_t n; | |
1055 | ||
1056 | n = fread(buffer, 1, sizeof buffer, stream); | |
1057 | if (!n || json_parser_feed(p, buffer, n) != n) { | |
1058 | break; | |
1059 | } | |
1060 | } | |
1061 | json = json_parser_finish(p); | |
1062 | ||
f38b84ea BP |
1063 | if (ferror(stream)) { |
1064 | json_destroy(json); | |
1065 | json = json_string_create_nocopy( | |
5562d6f5 | 1066 | xasprintf("error reading JSON stream: %s", strerror(errno))); |
f38b84ea | 1067 | } |
f38b84ea BP |
1068 | |
1069 | return json; | |
1070 | } | |
1071 | ||
1072 | struct json_parser * | |
1073 | json_parser_create(int flags) | |
1074 | { | |
1075 | struct json_parser *p = xzalloc(sizeof *p); | |
1076 | p->flags = flags; | |
1077 | return p; | |
1078 | } | |
1079 | ||
1080 | size_t | |
1081 | json_parser_feed(struct json_parser *p, const char *input, size_t n) | |
1082 | { | |
1083 | size_t i; | |
1084 | for (i = 0; !p->done && i < n; ) { | |
1085 | if (json_lex_input(p, input[i])) { | |
c640c04f BP |
1086 | p->byte_number++; |
1087 | if (input[i] == '\n') { | |
1088 | p->column_number = 0; | |
1089 | p->line_number++; | |
1090 | } else { | |
1091 | p->column_number++; | |
1092 | } | |
f38b84ea BP |
1093 | i++; |
1094 | } | |
1095 | } | |
1096 | return i; | |
1097 | } | |
1098 | ||
1099 | bool | |
1100 | json_parser_is_done(const struct json_parser *p) | |
1101 | { | |
1102 | return p->done; | |
1103 | } | |
1104 | ||
1105 | struct json * | |
1106 | json_parser_finish(struct json_parser *p) | |
1107 | { | |
1108 | struct json *json; | |
1109 | ||
1110 | switch (p->lex_state) { | |
1111 | case JSON_LEX_START: | |
1112 | break; | |
1113 | ||
1114 | case JSON_LEX_STRING: | |
1115 | case JSON_LEX_ESCAPE: | |
1116 | json_error(p, "unexpected end of input in quoted string"); | |
1117 | break; | |
1118 | ||
1119 | case JSON_LEX_NUMBER: | |
1120 | case JSON_LEX_KEYWORD: | |
1121 | json_lex_input(p, ' '); | |
1122 | break; | |
1123 | } | |
1124 | ||
1125 | if (p->parse_state == JSON_PARSE_START) { | |
1126 | json_error(p, "empty input stream"); | |
1127 | } else if (p->parse_state != JSON_PARSE_END) { | |
1128 | json_error(p, "unexpected end of input"); | |
1129 | } | |
1130 | ||
1131 | if (!p->error) { | |
cb22974d BP |
1132 | ovs_assert(p->height == 1); |
1133 | ovs_assert(p->stack[0].json != NULL); | |
f38b84ea BP |
1134 | json = p->stack[--p->height].json; |
1135 | } else { | |
1136 | json = json_string_create_nocopy(p->error); | |
1137 | p->error = NULL; | |
1138 | } | |
1139 | ||
1140 | json_parser_abort(p); | |
1141 | ||
1142 | return json; | |
1143 | } | |
1144 | ||
1145 | void | |
1146 | json_parser_abort(struct json_parser *p) | |
1147 | { | |
1148 | if (p) { | |
1149 | ds_destroy(&p->buffer); | |
1150 | if (p->height) { | |
1151 | json_destroy(p->stack[0].json); | |
1152 | } | |
1153 | free(p->stack); | |
1154 | free(p->member_name); | |
1155 | free(p->error); | |
1156 | free(p); | |
1157 | } | |
1158 | } | |
1159 | ||
1160 | static struct json_parser_node * | |
1161 | json_parser_top(struct json_parser *p) | |
1162 | { | |
1163 | return &p->stack[p->height - 1]; | |
1164 | } | |
1165 | ||
1166 | static void | |
1167 | json_parser_put_value(struct json_parser *p, struct json *value) | |
1168 | { | |
1169 | struct json_parser_node *node = json_parser_top(p); | |
1170 | if (node->json->type == JSON_OBJECT) { | |
1171 | json_object_put(node->json, p->member_name, value); | |
1172 | free(p->member_name); | |
1173 | p->member_name = NULL; | |
1174 | } else if (node->json->type == JSON_ARRAY) { | |
1175 | json_array_add(node->json, value); | |
1176 | } else { | |
1177 | NOT_REACHED(); | |
1178 | } | |
1179 | } | |
1180 | ||
20063bd1 | 1181 | static void |
f38b84ea BP |
1182 | json_parser_push(struct json_parser *p, |
1183 | struct json *new_json, enum json_parse_state new_state) | |
1184 | { | |
1185 | if (p->height < JSON_MAX_HEIGHT) { | |
1186 | struct json_parser_node *node; | |
1187 | ||
1188 | if (p->height >= p->allocated_height) { | |
1189 | p->stack = x2nrealloc(p->stack, &p->allocated_height, | |
1190 | sizeof *p->stack); | |
1191 | } | |
1192 | ||
1193 | if (p->height > 0) { | |
1194 | json_parser_put_value(p, new_json); | |
1195 | } | |
1196 | ||
1197 | node = &p->stack[p->height++]; | |
1198 | node->json = new_json; | |
1199 | p->parse_state = new_state; | |
f38b84ea | 1200 | } else { |
d951f1c7 | 1201 | json_destroy(new_json); |
f38b84ea BP |
1202 | json_error(p, "input exceeds maximum nesting depth %d", |
1203 | JSON_MAX_HEIGHT); | |
f38b84ea BP |
1204 | } |
1205 | } | |
1206 | ||
1207 | static void | |
1208 | json_parser_push_object(struct json_parser *p) | |
1209 | { | |
1210 | json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT); | |
1211 | } | |
1212 | ||
1213 | static void | |
1214 | json_parser_push_array(struct json_parser *p) | |
1215 | { | |
1216 | json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT); | |
1217 | } | |
1218 | ||
1219 | static void | |
1220 | json_parse_value(struct json_parser *p, struct json_token *token, | |
1221 | enum json_parse_state next_state) | |
1222 | { | |
1223 | struct json *value; | |
1224 | ||
1225 | switch (token->type) { | |
1226 | case T_FALSE: | |
1227 | value = json_boolean_create(false); | |
1228 | break; | |
1229 | ||
1230 | case T_NULL: | |
1231 | value = json_null_create(); | |
1232 | break; | |
1233 | ||
1234 | case T_TRUE: | |
1235 | value = json_boolean_create(true); | |
1236 | break; | |
1237 | ||
1238 | case '{': | |
1239 | json_parser_push_object(p); | |
1240 | return; | |
1241 | ||
1242 | case '[': | |
1243 | json_parser_push_array(p); | |
1244 | return; | |
1245 | ||
1246 | case T_INTEGER: | |
1247 | value = json_integer_create(token->u.integer); | |
1248 | break; | |
1249 | ||
1250 | case T_REAL: | |
1251 | value = json_real_create(token->u.real); | |
1252 | break; | |
1253 | ||
1254 | case T_STRING: | |
1255 | value = json_string_create(token->u.string); | |
1256 | break; | |
1257 | ||
1258 | case T_EOF: | |
1259 | case '}': | |
1260 | case ']': | |
1261 | case ':': | |
1262 | case ',': | |
1263 | default: | |
1264 | json_error(p, "syntax error expecting value"); | |
1265 | return; | |
1266 | } | |
1267 | ||
1268 | json_parser_put_value(p, value); | |
1269 | p->parse_state = next_state; | |
1270 | } | |
1271 | ||
1272 | static void | |
1273 | json_parser_pop(struct json_parser *p) | |
1274 | { | |
1275 | struct json_parser_node *node; | |
1276 | ||
1277 | /* Conserve memory. */ | |
1278 | node = json_parser_top(p); | |
1279 | if (node->json->type == JSON_ARRAY) { | |
1280 | json_array_trim(node->json); | |
1281 | } | |
1282 | ||
1283 | /* Pop off the top-of-stack. */ | |
1284 | if (p->height == 1) { | |
1285 | p->parse_state = JSON_PARSE_END; | |
1286 | if (!(p->flags & JSPF_TRAILER)) { | |
1287 | p->done = true; | |
1288 | } | |
1289 | } else { | |
1290 | p->height--; | |
1291 | node = json_parser_top(p); | |
1292 | if (node->json->type == JSON_ARRAY) { | |
1293 | p->parse_state = JSON_PARSE_ARRAY_NEXT; | |
1294 | } else if (node->json->type == JSON_OBJECT) { | |
1295 | p->parse_state = JSON_PARSE_OBJECT_NEXT; | |
1296 | } else { | |
1297 | NOT_REACHED(); | |
1298 | } | |
1299 | } | |
1300 | } | |
1301 | ||
1302 | static void | |
1303 | json_parser_input(struct json_parser *p, struct json_token *token) | |
1304 | { | |
1305 | switch (p->parse_state) { | |
1306 | case JSON_PARSE_START: | |
1307 | if (token->type == '{') { | |
1308 | json_parser_push_object(p); | |
1309 | } else if (token->type == '[') { | |
1310 | json_parser_push_array(p); | |
1311 | } else { | |
1312 | json_error(p, "syntax error at beginning of input"); | |
1313 | } | |
1314 | break; | |
1315 | ||
1316 | case JSON_PARSE_END: | |
1317 | json_error(p, "trailing garbage at end of input"); | |
1318 | break; | |
1319 | ||
1320 | case JSON_PARSE_OBJECT_INIT: | |
1321 | if (token->type == '}') { | |
1322 | json_parser_pop(p); | |
1323 | break; | |
1324 | } | |
1325 | /* Fall through. */ | |
1326 | case JSON_PARSE_OBJECT_NAME: | |
1327 | if (token->type == T_STRING) { | |
1328 | p->member_name = xstrdup(token->u.string); | |
1329 | p->parse_state = JSON_PARSE_OBJECT_COLON; | |
1330 | } else { | |
1331 | json_error(p, "syntax error parsing object expecting string"); | |
1332 | } | |
1333 | break; | |
1334 | ||
1335 | case JSON_PARSE_OBJECT_COLON: | |
1336 | if (token->type == ':') { | |
1337 | p->parse_state = JSON_PARSE_OBJECT_VALUE; | |
1338 | } else { | |
1339 | json_error(p, "syntax error parsing object expecting ':'"); | |
1340 | } | |
1341 | break; | |
1342 | ||
1343 | case JSON_PARSE_OBJECT_VALUE: | |
1344 | json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT); | |
1345 | break; | |
1346 | ||
1347 | case JSON_PARSE_OBJECT_NEXT: | |
1348 | if (token->type == ',') { | |
1349 | p->parse_state = JSON_PARSE_OBJECT_NAME; | |
1350 | } else if (token->type == '}') { | |
1351 | json_parser_pop(p); | |
1352 | } else { | |
1353 | json_error(p, "syntax error expecting '}' or ','"); | |
1354 | } | |
1355 | break; | |
1356 | ||
1357 | case JSON_PARSE_ARRAY_INIT: | |
1358 | if (token->type == ']') { | |
1359 | json_parser_pop(p); | |
1360 | break; | |
1361 | } | |
1362 | /* Fall through. */ | |
1363 | case JSON_PARSE_ARRAY_VALUE: | |
1364 | json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT); | |
1365 | break; | |
1366 | ||
1367 | case JSON_PARSE_ARRAY_NEXT: | |
1368 | if (token->type == ',') { | |
1369 | p->parse_state = JSON_PARSE_ARRAY_VALUE; | |
1370 | } else if (token->type == ']') { | |
1371 | json_parser_pop(p); | |
1372 | } else { | |
1373 | json_error(p, "syntax error expecting ']' or ','"); | |
1374 | } | |
1375 | break; | |
1376 | ||
1377 | default: | |
1378 | abort(); | |
1379 | } | |
1380 | ||
1381 | p->lex_state = JSON_LEX_START; | |
1382 | ds_clear(&p->buffer); | |
1383 | } | |
1384 | ||
1385 | static struct json * | |
1386 | json_create(enum json_type type) | |
1387 | { | |
1388 | struct json *json = xmalloc(sizeof *json); | |
1389 | json->type = type; | |
1390 | return json; | |
1391 | } | |
1392 | ||
1393 | static void | |
1394 | json_error(struct json_parser *p, const char *format, ...) | |
1395 | { | |
1396 | if (!p->error) { | |
0bdf342a | 1397 | struct ds msg; |
f38b84ea BP |
1398 | va_list args; |
1399 | ||
0bdf342a BP |
1400 | ds_init(&msg); |
1401 | ds_put_format(&msg, "line %d, column %d, byte %d: ", | |
1402 | p->line_number, p->column_number, p->byte_number); | |
f38b84ea | 1403 | va_start(args, format); |
0bdf342a | 1404 | ds_put_format_valist(&msg, format, args); |
f38b84ea BP |
1405 | va_end(args); |
1406 | ||
0bdf342a BP |
1407 | p->error = ds_steal_cstr(&msg); |
1408 | ||
f38b84ea BP |
1409 | p->done = true; |
1410 | } | |
1411 | } | |
1412 | \f | |
1413 | #define SPACES_PER_LEVEL 2 | |
1414 | ||
1415 | struct json_serializer { | |
36d802ae | 1416 | struct ds *ds; |
f38b84ea BP |
1417 | int depth; |
1418 | int flags; | |
1419 | }; | |
1420 | ||
36d802ae BP |
1421 | static void json_serialize(const struct json *, struct json_serializer *); |
1422 | static void json_serialize_object(const struct shash *object, | |
1423 | struct json_serializer *); | |
1424 | static void json_serialize_array(const struct json_array *, | |
1425 | struct json_serializer *); | |
1426 | static void json_serialize_string(const char *, struct ds *); | |
f38b84ea BP |
1427 | |
1428 | /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns | |
1429 | * that string. The caller is responsible for freeing the returned string, | |
1430 | * with free(), when it is no longer needed. | |
1431 | * | |
1432 | * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each | |
1433 | * nesting level introducing an additional indentation. Otherwise, the | |
1434 | * returned string does not contain any new-line characters. | |
1435 | * | |
1436 | * If 'flags' contains JSSF_SORT, members of objects in the output are sorted | |
1437 | * in bytewise lexicographic order for reproducibility. Otherwise, members of | |
1438 | * objects are output in an indeterminate order. | |
1439 | * | |
1440 | * The returned string is valid JSON only if 'json' represents an array or an | |
1441 | * object, since a bare literal does not satisfy the JSON grammar. */ | |
1442 | char * | |
1443 | json_to_string(const struct json *json, int flags) | |
36d802ae BP |
1444 | { |
1445 | struct ds ds; | |
1446 | ||
1447 | ds_init(&ds); | |
1448 | json_to_ds(json, flags, &ds); | |
1449 | return ds_steal_cstr(&ds); | |
1450 | } | |
1451 | ||
1452 | /* Same as json_to_string(), but the output is appended to 'ds'. */ | |
1453 | void | |
1454 | json_to_ds(const struct json *json, int flags, struct ds *ds) | |
f38b84ea BP |
1455 | { |
1456 | struct json_serializer s; | |
36d802ae BP |
1457 | |
1458 | s.ds = ds; | |
f38b84ea BP |
1459 | s.depth = 0; |
1460 | s.flags = flags; | |
36d802ae | 1461 | json_serialize(json, &s); |
f38b84ea BP |
1462 | } |
1463 | ||
1464 | static void | |
36d802ae | 1465 | json_serialize(const struct json *json, struct json_serializer *s) |
f38b84ea | 1466 | { |
36d802ae | 1467 | struct ds *ds = s->ds; |
f38b84ea BP |
1468 | |
1469 | switch (json->type) { | |
1470 | case JSON_NULL: | |
1471 | ds_put_cstr(ds, "null"); | |
1472 | break; | |
1473 | ||
1474 | case JSON_FALSE: | |
1475 | ds_put_cstr(ds, "false"); | |
1476 | break; | |
1477 | ||
1478 | case JSON_TRUE: | |
1479 | ds_put_cstr(ds, "true"); | |
1480 | break; | |
1481 | ||
1482 | case JSON_OBJECT: | |
36d802ae | 1483 | json_serialize_object(json->u.object, s); |
f38b84ea BP |
1484 | break; |
1485 | ||
1486 | case JSON_ARRAY: | |
36d802ae | 1487 | json_serialize_array(&json->u.array, s); |
f38b84ea BP |
1488 | break; |
1489 | ||
1490 | case JSON_INTEGER: | |
1491 | ds_put_format(ds, "%lld", json->u.integer); | |
1492 | break; | |
1493 | ||
1494 | case JSON_REAL: | |
1495 | ds_put_format(ds, "%.*g", DBL_DIG, json->u.real); | |
1496 | break; | |
1497 | ||
1498 | case JSON_STRING: | |
36d802ae | 1499 | json_serialize_string(json->u.string, ds); |
f38b84ea BP |
1500 | break; |
1501 | ||
1502 | case JSON_N_TYPES: | |
1503 | default: | |
1504 | NOT_REACHED(); | |
1505 | } | |
1506 | } | |
1507 | ||
1508 | static void | |
1509 | indent_line(struct json_serializer *s) | |
1510 | { | |
1511 | if (s->flags & JSSF_PRETTY) { | |
36d802ae BP |
1512 | ds_put_char(s->ds, '\n'); |
1513 | ds_put_char_multiple(s->ds, ' ', SPACES_PER_LEVEL * s->depth); | |
f38b84ea BP |
1514 | } |
1515 | } | |
1516 | ||
1517 | static void | |
36d802ae BP |
1518 | json_serialize_object_member(size_t i, const struct shash_node *node, |
1519 | struct json_serializer *s) | |
f38b84ea | 1520 | { |
36d802ae | 1521 | struct ds *ds = s->ds; |
f38b84ea BP |
1522 | |
1523 | if (i) { | |
1524 | ds_put_char(ds, ','); | |
1525 | indent_line(s); | |
1526 | } | |
1527 | ||
36d802ae | 1528 | json_serialize_string(node->name, ds); |
f38b84ea BP |
1529 | ds_put_char(ds, ':'); |
1530 | if (s->flags & JSSF_PRETTY) { | |
1531 | ds_put_char(ds, ' '); | |
1532 | } | |
36d802ae | 1533 | json_serialize(node->data, s); |
f38b84ea BP |
1534 | } |
1535 | ||
1536 | static void | |
36d802ae | 1537 | json_serialize_object(const struct shash *object, struct json_serializer *s) |
f38b84ea | 1538 | { |
36d802ae | 1539 | struct ds *ds = s->ds; |
f38b84ea BP |
1540 | |
1541 | ds_put_char(ds, '{'); | |
1542 | ||
1543 | s->depth++; | |
1544 | indent_line(s); | |
1545 | ||
1546 | if (s->flags & JSSF_SORT) { | |
1547 | const struct shash_node **nodes; | |
1548 | size_t n, i; | |
1549 | ||
1550 | nodes = shash_sort(object); | |
1551 | n = shash_count(object); | |
1552 | for (i = 0; i < n; i++) { | |
36d802ae | 1553 | json_serialize_object_member(i, nodes[i], s); |
f38b84ea BP |
1554 | } |
1555 | free(nodes); | |
1556 | } else { | |
1557 | struct shash_node *node; | |
1558 | size_t i; | |
1559 | ||
1560 | i = 0; | |
1561 | SHASH_FOR_EACH (node, object) { | |
36d802ae | 1562 | json_serialize_object_member(i++, node, s); |
f38b84ea BP |
1563 | } |
1564 | } | |
1565 | ||
1566 | ds_put_char(ds, '}'); | |
1567 | s->depth--; | |
1568 | } | |
1569 | ||
1570 | static void | |
36d802ae | 1571 | json_serialize_array(const struct json_array *array, struct json_serializer *s) |
f38b84ea | 1572 | { |
36d802ae | 1573 | struct ds *ds = s->ds; |
f38b84ea BP |
1574 | size_t i; |
1575 | ||
1576 | ds_put_char(ds, '['); | |
1577 | s->depth++; | |
1578 | ||
1579 | if (array->n > 0) { | |
1580 | indent_line(s); | |
1581 | ||
1582 | for (i = 0; i < array->n; i++) { | |
1583 | if (i) { | |
1584 | ds_put_char(ds, ','); | |
1585 | indent_line(s); | |
1586 | } | |
36d802ae | 1587 | json_serialize(array->elems[i], s); |
f38b84ea BP |
1588 | } |
1589 | } | |
1590 | ||
1591 | s->depth--; | |
1592 | ds_put_char(ds, ']'); | |
1593 | } | |
1594 | ||
1595 | static void | |
36d802ae | 1596 | json_serialize_string(const char *string, struct ds *ds) |
f38b84ea BP |
1597 | { |
1598 | uint8_t c; | |
1599 | ||
1600 | ds_put_char(ds, '"'); | |
1601 | while ((c = *string++) != '\0') { | |
1602 | switch (c) { | |
1603 | case '"': | |
1604 | ds_put_cstr(ds, "\\\""); | |
1605 | break; | |
1606 | ||
1607 | case '\\': | |
1608 | ds_put_cstr(ds, "\\\\"); | |
1609 | break; | |
1610 | ||
1611 | case '\b': | |
1612 | ds_put_cstr(ds, "\\b"); | |
1613 | break; | |
1614 | ||
1615 | case '\f': | |
1616 | ds_put_cstr(ds, "\\f"); | |
1617 | break; | |
1618 | ||
1619 | case '\n': | |
1620 | ds_put_cstr(ds, "\\n"); | |
1621 | break; | |
1622 | ||
1623 | case '\r': | |
1624 | ds_put_cstr(ds, "\\r"); | |
1625 | break; | |
1626 | ||
1627 | case '\t': | |
1628 | ds_put_cstr(ds, "\\t"); | |
1629 | break; | |
1630 | ||
1631 | default: | |
1632 | if (c >= 32) { | |
1633 | ds_put_char(ds, c); | |
1634 | } else { | |
1635 | ds_put_format(ds, "\\u%04x", c); | |
1636 | } | |
1637 | break; | |
1638 | } | |
1639 | } | |
1640 | ds_put_char(ds, '"'); | |
1641 | } | |
1600fa68 BP |
1642 | \f |
1643 | static size_t | |
1644 | json_string_serialized_length(const char *string) | |
1645 | { | |
1646 | size_t length; | |
1647 | uint8_t c; | |
1648 | ||
1649 | length = strlen("\"\""); | |
1650 | ||
1651 | while ((c = *string++) != '\0') { | |
1652 | switch (c) { | |
1653 | case '"': | |
1654 | case '\\': | |
1655 | case '\b': | |
1656 | case '\f': | |
1657 | case '\n': | |
1658 | case '\r': | |
1659 | case '\t': | |
1660 | length += 2; | |
1661 | break; | |
1662 | ||
1663 | default: | |
1664 | if (c >= 32) { | |
1665 | length++; | |
1666 | } else { | |
1667 | /* \uXXXX */ | |
1668 | length += 6; | |
1669 | } | |
1670 | break; | |
1671 | } | |
1672 | } | |
1673 | ||
1674 | return length; | |
1675 | } | |
1676 | ||
1677 | static size_t | |
1678 | json_object_serialized_length(const struct shash *object) | |
1679 | { | |
1680 | size_t length = strlen("{}"); | |
1681 | ||
1682 | if (!shash_is_empty(object)) { | |
1683 | struct shash_node *node; | |
1684 | ||
1685 | /* Commas and colons. */ | |
1686 | length += 2 * shash_count(object) - 1; | |
1687 | ||
1688 | SHASH_FOR_EACH (node, object) { | |
1689 | const struct json *value = node->data; | |
1690 | ||
1691 | length += json_string_serialized_length(node->name); | |
1692 | length += json_serialized_length(value); | |
1693 | } | |
1694 | } | |
1695 | ||
1696 | return length; | |
1697 | } | |
1698 | ||
1699 | static size_t | |
1700 | json_array_serialized_length(const struct json_array *array) | |
1701 | { | |
1702 | size_t length = strlen("[]"); | |
1703 | ||
1704 | if (array->n) { | |
1705 | size_t i; | |
1706 | ||
1707 | /* Commas. */ | |
1708 | length += array->n - 1; | |
1709 | ||
1710 | for (i = 0; i < array->n; i++) { | |
1711 | length += json_serialized_length(array->elems[i]); | |
1712 | } | |
1713 | } | |
1714 | ||
1715 | return length; | |
1716 | } | |
1717 | ||
1718 | /* Returns strlen(json_to_string(json, 0)), that is, the number of bytes in the | |
1719 | * JSON output by json_to_string() for 'json' when JSSF_PRETTY is not | |
1720 | * requested. (JSSF_SORT does not affect the length of json_to_string()'s | |
1721 | * output.) */ | |
1722 | size_t | |
1723 | json_serialized_length(const struct json *json) | |
1724 | { | |
1725 | switch (json->type) { | |
1726 | case JSON_NULL: | |
1727 | return strlen("null"); | |
1728 | ||
1729 | case JSON_FALSE: | |
1730 | return strlen("false"); | |
1731 | ||
1732 | case JSON_TRUE: | |
1733 | return strlen("true"); | |
1734 | ||
1735 | case JSON_OBJECT: | |
1736 | return json_object_serialized_length(json->u.object); | |
1737 | ||
1738 | case JSON_ARRAY: | |
1739 | return json_array_serialized_length(&json->u.array); | |
1740 | ||
1741 | case JSON_INTEGER: | |
1742 | return snprintf(NULL, 0, "%lld", json->u.integer); | |
1743 | ||
1744 | case JSON_REAL: | |
1745 | return snprintf(NULL, 0, "%.*g", DBL_DIG, json->u.real); | |
1746 | ||
1747 | case JSON_STRING: | |
1748 | return json_string_serialized_length(json->u.string); | |
1749 | ||
1750 | case JSON_N_TYPES: | |
1751 | default: | |
1752 | NOT_REACHED(); | |
1753 | } | |
1754 | } |