]> git.proxmox.com Git - mirror_ovs.git/blob - lib/json.c
json: Accurately parse very large real numbers.
[mirror_ovs.git] / lib / json.c
1 /*
2 * Copyright (c) 2009 Nicira Networks.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "json.h"
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <float.h>
25 #include <limits.h>
26 #include <math.h>
27 #include <string.h>
28
29 #include "dynamic-string.h"
30 #include "hash.h"
31 #include "shash.h"
32 #include "unicode.h"
33 #include "util.h"
34
35 /* The type of a JSON token. */
36 enum json_token_type {
37 T_EOF = 0,
38 T_BEGIN_ARRAY = '[',
39 T_END_ARRAY = ']',
40 T_BEGIN_OBJECT = '{',
41 T_END_OBJECT = '}',
42 T_NAME_SEPARATOR = ':',
43 T_VALUE_SEPARATOR = ',',
44 T_FALSE = UCHAR_MAX + 1,
45 T_NULL,
46 T_TRUE,
47 T_INTEGER,
48 T_REAL,
49 T_STRING
50 };
51
52 /* A JSON token.
53 *
54 * RFC 4627 doesn't define a lexical structure for JSON but I believe this to
55 * be compliant with the standard.
56 */
57 struct json_token {
58 enum json_token_type type;
59 union {
60 double real;
61 long long int integer;
62 const char *string;
63 } u;
64 };
65
66 enum json_lex_state {
67 JSON_LEX_START, /* Not inside a token. */
68 JSON_LEX_NUMBER, /* Reading a number. */
69 JSON_LEX_KEYWORD, /* Reading a keyword. */
70 JSON_LEX_STRING, /* Reading a quoted string. */
71 JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
72 };
73
74 enum json_parse_state {
75 JSON_PARSE_START, /* Beginning of input. */
76 JSON_PARSE_END, /* End of input. */
77
78 /* Objects. */
79 JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
80 JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
81 JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
82 JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
83 JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
84
85 /* Arrays. */
86 JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
87 JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
88 JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
89 };
90
91 struct json_parser_node {
92 struct json *json;
93 };
94
95 /* A JSON parser. */
96 struct json_parser {
97 int flags;
98
99 /* Lexical analysis. */
100 enum json_lex_state lex_state;
101 struct ds buffer; /* Buffer for accumulating token text. */
102 int line_number;
103 int column_number;
104 int byte_number;
105
106 /* Parsing. */
107 enum json_parse_state parse_state;
108 #define JSON_MAX_HEIGHT 1000
109 struct json_parser_node *stack;
110 size_t height, allocated_height;
111 char *member_name;
112
113 /* Parse status. */
114 bool done;
115 char *error; /* Error message, if any, null if none yet. */
116 };
117
118 static struct json *json_create(enum json_type type);
119 static void json_parser_input(struct json_parser *, struct json_token *);
120
121 static void json_error(struct json_parser *p, const char *format, ...)
122 PRINTF_FORMAT(2, 3);
123 \f
124 const char *
125 json_type_to_string(enum json_type type)
126 {
127 switch (type) {
128 case JSON_NULL:
129 return "null";
130
131 case JSON_FALSE:
132 return "false";
133
134 case JSON_TRUE:
135 return "true";
136
137 case JSON_OBJECT:
138 return "object";
139
140 case JSON_ARRAY:
141 return "array";
142
143 case JSON_INTEGER:
144 case JSON_REAL:
145 return "number";
146
147 case JSON_STRING:
148 return "string";
149
150 case JSON_N_TYPES:
151 default:
152 return "<invalid>";
153 }
154 }
155 \f
156 /* Functions for manipulating struct json. */
157
158 struct json *
159 json_null_create(void)
160 {
161 return json_create(JSON_NULL);
162 }
163
164 struct json *
165 json_boolean_create(bool b)
166 {
167 return json_create(b ? JSON_TRUE : JSON_FALSE);
168 }
169
170 struct json *
171 json_string_create_nocopy(char *s)
172 {
173 struct json *json = json_create(JSON_STRING);
174 json->u.string = s;
175 return json;
176 }
177
178 struct json *
179 json_string_create(const char *s)
180 {
181 return json_string_create_nocopy(xstrdup(s));
182 }
183
184 struct json *
185 json_array_create_empty(void)
186 {
187 struct json *json = json_create(JSON_ARRAY);
188 json->u.array.elems = NULL;
189 json->u.array.n = 0;
190 json->u.array.n_allocated = 0;
191 return json;
192 }
193
194 void
195 json_array_add(struct json *array_, struct json *element)
196 {
197 struct json_array *array = json_array(array_);
198 if (array->n >= array->n_allocated) {
199 array->elems = x2nrealloc(array->elems, &array->n_allocated,
200 sizeof *array->elems);
201 }
202 array->elems[array->n++] = element;
203 }
204
205 void
206 json_array_trim(struct json *array_)
207 {
208 struct json_array *array = json_array(array_);
209 if (array->n < array->n_allocated){
210 array->n_allocated = array->n;
211 array->elems = xrealloc(array->elems, array->n * sizeof *array->elems);
212 }
213 }
214
215 struct json *
216 json_array_create(struct json **elements, size_t n)
217 {
218 struct json *json = json_create(JSON_ARRAY);
219 json->u.array.elems = elements;
220 json->u.array.n = n;
221 json->u.array.n_allocated = n;
222 return json;
223 }
224
225 struct json *
226 json_array_create_1(struct json *elem0)
227 {
228 struct json **elems = xmalloc(sizeof *elems);
229 elems[0] = elem0;
230 return json_array_create(elems, 1);
231 }
232
233 struct json *
234 json_array_create_2(struct json *elem0, struct json *elem1)
235 {
236 struct json **elems = xmalloc(2 * sizeof *elems);
237 elems[0] = elem0;
238 elems[1] = elem1;
239 return json_array_create(elems, 2);
240 }
241
242 struct json *
243 json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
244 {
245 struct json **elems = xmalloc(3 * sizeof *elems);
246 elems[0] = elem0;
247 elems[1] = elem1;
248 elems[2] = elem2;
249 return json_array_create(elems, 3);
250 }
251
252 struct json *
253 json_object_create(void)
254 {
255 struct json *json = json_create(JSON_OBJECT);
256 json->u.object = xmalloc(sizeof *json->u.object);
257 shash_init(json->u.object);
258 return json;
259 }
260
261 struct json *
262 json_integer_create(long long int integer)
263 {
264 struct json *json = json_create(JSON_INTEGER);
265 json->u.integer = integer;
266 return json;
267 }
268
269 struct json *
270 json_real_create(double real)
271 {
272 struct json *json = json_create(JSON_REAL);
273 json->u.real = real;
274 return json;
275 }
276
277 void
278 json_object_put(struct json *json, const char *name, struct json *value)
279 {
280 shash_add(json->u.object, name, value);
281 }
282
283 void
284 json_object_put_string(struct json *json, const char *name, const char *value)
285 {
286 json_object_put(json, name, json_string_create(value));
287 }
288
289 const char *
290 json_string(const struct json *json)
291 {
292 assert(json->type == JSON_STRING);
293 return json->u.string;
294 }
295
296 struct json_array *
297 json_array(const struct json *json)
298 {
299 assert(json->type == JSON_ARRAY);
300 return (struct json_array *) &json->u.array;
301 }
302
303 struct shash *
304 json_object(const struct json *json)
305 {
306 assert(json->type == JSON_OBJECT);
307 return (struct shash *) json->u.object;
308 }
309
310 bool
311 json_boolean(const struct json *json)
312 {
313 assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
314 return json->type == JSON_TRUE;
315 }
316
317 double
318 json_real(const struct json *json)
319 {
320 assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
321 return json->type == JSON_REAL ? json->u.real : json->u.integer;
322 }
323
324 int64_t
325 json_integer(const struct json *json)
326 {
327 assert(json->type == JSON_INTEGER);
328 return json->u.integer;
329 }
330 \f
331 static void json_destroy_object(struct shash *object);
332 static void json_destroy_array(struct json_array *array);
333
334 /* Frees 'json' and everything it points to, recursively. */
335 void
336 json_destroy(struct json *json)
337 {
338 if (json) {
339 switch (json->type) {
340 case JSON_OBJECT:
341 json_destroy_object(json->u.object);
342 break;
343
344 case JSON_ARRAY:
345 json_destroy_array(&json->u.array);
346 break;
347
348 case JSON_STRING:
349 free(json->u.string);
350 break;
351
352 case JSON_NULL:
353 case JSON_FALSE:
354 case JSON_TRUE:
355 case JSON_INTEGER:
356 case JSON_REAL:
357 break;
358
359 case JSON_N_TYPES:
360 NOT_REACHED();
361 }
362 free(json);
363 }
364 }
365
366 static void
367 json_destroy_object(struct shash *object)
368 {
369 struct shash_node *node, *next;
370
371 SHASH_FOR_EACH_SAFE (node, next, object) {
372 struct json *value = node->data;
373
374 json_destroy(value);
375 shash_delete(object, node);
376 }
377 shash_destroy(object);
378 free(object);
379 }
380
381 static void
382 json_destroy_array(struct json_array *array)
383 {
384 size_t i;
385
386 for (i = 0; i < array->n; i++) {
387 json_destroy(array->elems[i]);
388 }
389 free(array->elems);
390 }
391 \f
392 static struct json *json_clone_object(const struct shash *object);
393 static struct json *json_clone_array(const struct json_array *array);
394
395 /* Returns a deep copy of 'json'. */
396 struct json *
397 json_clone(const struct json *json)
398 {
399 switch (json->type) {
400 case JSON_OBJECT:
401 return json_clone_object(json->u.object);
402
403 case JSON_ARRAY:
404 return json_clone_array(&json->u.array);
405
406 case JSON_STRING:
407 return json_string_create(json->u.string);
408
409 case JSON_NULL:
410 case JSON_FALSE:
411 case JSON_TRUE:
412 return json_create(json->type);
413
414 case JSON_INTEGER:
415 return json_integer_create(json->u.integer);
416
417 case JSON_REAL:
418 return json_real_create(json->u.real);
419
420 case JSON_N_TYPES:
421 default:
422 NOT_REACHED();
423 }
424 }
425
426 static struct json *
427 json_clone_object(const struct shash *object)
428 {
429 struct shash_node *node;
430 struct json *json;
431
432 json = json_object_create();
433 SHASH_FOR_EACH (node, object) {
434 struct json *value = node->data;
435 json_object_put(json, node->name, json_clone(value));
436 }
437 return json;
438 }
439
440 static struct json *
441 json_clone_array(const struct json_array *array)
442 {
443 struct json **elems;
444 size_t i;
445
446 elems = xmalloc(array->n * sizeof *elems);
447 for (i = 0; i < array->n; i++) {
448 elems[i] = json_clone(array->elems[i]);
449 }
450 return json_array_create(elems, array->n);
451 }
452 \f
453 static size_t
454 json_hash_object(const struct shash *object, size_t basis)
455 {
456 const struct shash_node **nodes;
457 size_t n, i;
458
459 nodes = shash_sort(object);
460 n = shash_count(object);
461 for (i = 0; i < n; i++) {
462 const struct shash_node *node = nodes[i];
463 basis = hash_string(node->name, basis);
464 basis = json_hash(node->data, basis);
465 }
466 return basis;
467 }
468
469 static size_t
470 json_hash_array(const struct json_array *array, size_t basis)
471 {
472 size_t i;
473
474 basis = hash_int(array->n, basis);
475 for (i = 0; i < array->n; i++) {
476 basis = json_hash(array->elems[i], basis);
477 }
478 return basis;
479 }
480
481 size_t
482 json_hash(const struct json *json, size_t basis)
483 {
484 switch (json->type) {
485 case JSON_OBJECT:
486 return json_hash_object(json->u.object, basis);
487
488 case JSON_ARRAY:
489 return json_hash_array(&json->u.array, basis);
490
491 case JSON_STRING:
492 return hash_string(json->u.string, basis);
493
494 case JSON_NULL:
495 case JSON_FALSE:
496 case JSON_TRUE:
497 return hash_int(json->type << 8, basis);
498
499 case JSON_INTEGER:
500 return hash_int(json->u.integer, basis);
501
502 case JSON_REAL:
503 return hash_double(json->u.real, basis);
504
505 case JSON_N_TYPES:
506 default:
507 NOT_REACHED();
508 }
509 }
510
511 static bool
512 json_equal_object(const struct shash *a, const struct shash *b)
513 {
514 struct shash_node *a_node;
515
516 if (shash_count(a) != shash_count(b)) {
517 return false;
518 }
519
520 SHASH_FOR_EACH (a_node, a) {
521 struct shash_node *b_node = shash_find(b, a_node->name);
522 if (!b_node || !json_equal(a_node->data, b_node->data)) {
523 return false;
524 }
525 }
526
527 return true;
528 }
529
530 static bool
531 json_equal_array(const struct json_array *a, const struct json_array *b)
532 {
533 size_t i;
534
535 if (a->n != b->n) {
536 return false;
537 }
538
539 for (i = 0; i < a->n; i++) {
540 if (!json_equal(a->elems[i], b->elems[i])) {
541 return false;
542 }
543 }
544
545 return true;
546 }
547
548 bool
549 json_equal(const struct json *a, const struct json *b)
550 {
551 if (a->type != b->type) {
552 return false;
553 }
554
555 switch (a->type) {
556 case JSON_OBJECT:
557 return json_equal_object(a->u.object, b->u.object);
558
559 case JSON_ARRAY:
560 return json_equal_array(&a->u.array, &b->u.array);
561
562 case JSON_STRING:
563 return !strcmp(a->u.string, b->u.string);
564
565 case JSON_NULL:
566 case JSON_FALSE:
567 case JSON_TRUE:
568 return true;
569
570 case JSON_INTEGER:
571 return a->u.integer == b->u.integer;
572
573 case JSON_REAL:
574 return a->u.real == b->u.real;
575
576 case JSON_N_TYPES:
577 default:
578 NOT_REACHED();
579 }
580 }
581 \f
582 /* Lexical analysis. */
583
584 static void
585 json_lex_keyword(struct json_parser *p)
586 {
587 struct json_token token;
588 const char *s;
589
590 s = ds_cstr(&p->buffer);
591 if (!strcmp(s, "false")) {
592 token.type = T_FALSE;
593 } else if (!strcmp(s, "true")) {
594 token.type = T_TRUE;
595 } else if (!strcmp(s, "null")) {
596 token.type = T_NULL;
597 } else {
598 json_error(p, "invalid keyword '%s'", s);
599 return;
600 }
601 json_parser_input(p, &token);
602 }
603
604 static void
605 json_lex_number(struct json_parser *p)
606 {
607 const char *cp = ds_cstr(&p->buffer);
608 unsigned long long int significand = 0;
609 struct json_token token;
610 int sig_digits = 0;
611 bool imprecise = false;
612 bool negative = false;
613 int pow10 = 0;
614
615 /* Leading minus sign. */
616 if (*cp == '-') {
617 negative = true;
618 cp++;
619 }
620
621 /* At least one integer digit, but 0 may not be used as a leading digit for
622 * a longer number. */
623 significand = 0;
624 sig_digits = 0;
625 if (*cp == '0') {
626 cp++;
627 if (isdigit(*cp)) {
628 json_error(p, "leading zeros not allowed");
629 return;
630 }
631 } else if (isdigit(*cp)) {
632 do {
633 if (significand <= ULLONG_MAX / 10) {
634 significand = significand * 10 + (*cp - '0');
635 sig_digits++;
636 } else {
637 pow10++;
638 if (*cp != '0') {
639 imprecise = true;
640 }
641 }
642 cp++;
643 } while (isdigit(*cp));
644 } else {
645 json_error(p, "'-' must be followed by digit");
646 return;
647 }
648
649 /* Optional fraction. */
650 if (*cp == '.') {
651 cp++;
652 if (!isdigit(*cp)) {
653 json_error(p, "decimal point must be followed by digit");
654 return;
655 }
656 do {
657 if (significand <= ULLONG_MAX / 10) {
658 significand = significand * 10 + (*cp - '0');
659 sig_digits++;
660 pow10--;
661 } else if (*cp != '0') {
662 imprecise = true;
663 }
664 cp++;
665 } while (isdigit(*cp));
666 }
667
668 /* Optional exponent. */
669 if (*cp == 'e' || *cp == 'E') {
670 bool negative_exponent = false;
671 int exponent;
672
673 cp++;
674 if (*cp == '+') {
675 cp++;
676 } else if (*cp == '-') {
677 negative_exponent = true;
678 cp++;
679 }
680
681 if (!isdigit(*cp)) {
682 json_error(p, "exponent must contain at least one digit");
683 return;
684 }
685
686 exponent = 0;
687 do {
688 if (exponent >= INT_MAX / 10) {
689 json_error(p, "exponent outside valid range");
690 return;
691 }
692 exponent = exponent * 10 + (*cp - '0');
693 cp++;
694 } while (isdigit(*cp));
695
696 if (negative_exponent) {
697 pow10 -= exponent;
698 } else {
699 pow10 += exponent;
700 }
701 }
702
703 if (*cp != '\0') {
704 json_error(p, "syntax error in number");
705 return;
706 }
707
708 /* Figure out number.
709 *
710 * We suppress negative zeros as a matter of policy. */
711 if (!significand) {
712 struct json_token token;
713 token.type = T_INTEGER;
714 token.u.integer = 0;
715 json_parser_input(p, &token);
716 return;
717 }
718
719 if (!imprecise) {
720 while (pow10 > 0 && significand < ULLONG_MAX / 10) {
721 significand *= 10;
722 sig_digits++;
723 pow10--;
724 }
725 while (pow10 < 0 && significand % 10 == 0) {
726 significand /= 10;
727 sig_digits--;
728 pow10++;
729 }
730 if (pow10 == 0
731 && significand <= (negative
732 ? (unsigned long long int) LLONG_MAX + 1
733 : LLONG_MAX)) {
734 token.type = T_INTEGER;
735 token.u.integer = negative ? -significand : significand;
736 json_parser_input(p, &token);
737 return;
738 }
739 }
740
741 token.type = T_REAL;
742 if (!str_to_double(ds_cstr(&p->buffer), &token.u.real)) {
743 json_error(p, "number outside valid range");
744 return;
745 }
746 /* Suppress negative zero. */
747 if (token.u.real == 0) {
748 token.u.real = 0;
749 }
750 json_parser_input(p, &token);
751 }
752
753 static bool
754 json_lex_4hex(struct json_parser *p, const char *cp, int *valuep)
755 {
756 int value, i;
757
758 value = 0;
759 for (i = 0; i < 4; i++) {
760 unsigned char c = *cp++;
761 if (!isxdigit(c)) {
762 json_error(p, "malformed \\u escape");
763 return false;
764 }
765 value = (value << 4) | hexit_value(c);
766 }
767 if (!value) {
768 json_error(p, "null bytes not supported in quoted strings");
769 return false;
770 }
771 *valuep = value;
772 return true;
773 }
774
775 static const char *
776 json_lex_unicode(struct json_parser *p, const char *cp, struct ds *s)
777 {
778 int c0, c1;
779
780 if (!json_lex_4hex(p, cp, &c0)) {
781 return NULL;
782 }
783 cp += 4;
784 if (!uc_is_leading_surrogate(c0)) {
785 ds_put_utf8(s, c0);
786 return cp;
787 }
788
789 if (*cp++ != '\\' || *cp++ != 'u') {
790 json_error(p, "malformed escaped surrogate pair");
791 return NULL;
792 }
793
794 if (!json_lex_4hex(p, cp, &c1)) {
795 return NULL;
796 }
797 cp += 4;
798 if (!uc_is_trailing_surrogate(c1)) {
799 json_error(p, "second half of escaped surrogate pair is not "
800 "trailing surrogate");
801 return NULL;
802 }
803
804 ds_put_utf8(s, utf16_decode_surrogate_pair(c0, c1));
805 return cp;
806 }
807
808 static void
809 json_lex_string(struct json_parser *p)
810 {
811 struct json_token token;
812 const char *cp;
813 struct ds s;
814
815 cp = ds_cstr(&p->buffer);
816 if (!strchr(cp, '\\')) {
817 token.type = T_STRING;
818 token.u.string = cp;
819 json_parser_input(p, &token);
820 return;
821 }
822
823 ds_init(&s);
824 ds_reserve(&s, strlen(cp));
825 while (*cp != '\0') {
826 if (*cp != '\\') {
827 ds_put_char(&s, *cp++);
828 continue;
829 }
830
831 cp++;
832 switch (*cp++) {
833 case '"': case '\\': case '/':
834 ds_put_char(&s, cp[-1]);
835 break;
836
837 case 'b':
838 ds_put_char(&s, '\b');
839 break;
840
841 case 'f':
842 ds_put_char(&s, '\f');
843 break;
844
845 case 'n':
846 ds_put_char(&s, '\n');
847 break;
848
849 case 'r':
850 ds_put_char(&s, '\r');
851 break;
852
853 case 't':
854 ds_put_char(&s, '\t');
855 break;
856
857 case 'u':
858 cp = json_lex_unicode(p, cp, &s);
859 if (!cp) {
860 goto exit;
861 }
862 break;
863
864 default:
865 json_error(p, "bad escape \\%c", cp[-1]);
866 goto exit;
867 }
868 }
869
870 token.type = T_STRING;
871 token.u.string = ds_cstr(&s);
872 json_parser_input(p, &token);
873
874 exit:
875 ds_destroy(&s);
876 return;
877 }
878
879 static bool
880 json_lex_input(struct json_parser *p, unsigned char c)
881 {
882 struct json_token token;
883
884 p->byte_number++;
885 if (c == '\n') {
886 p->column_number = 0;
887 p->line_number++;
888 } else {
889 p->column_number++;
890 }
891
892 switch (p->lex_state) {
893 case JSON_LEX_START:
894 switch (c) {
895 case ' ': case '\t': case '\n': case '\r':
896 /* Nothing to do. */
897 return true;
898
899 case 'a': case 'b': case 'c': case 'd': case 'e':
900 case 'f': case 'g': case 'h': case 'i': case 'j':
901 case 'k': case 'l': case 'm': case 'n': case 'o':
902 case 'p': case 'q': case 'r': case 's': case 't':
903 case 'u': case 'v': case 'w': case 'x': case 'y':
904 case 'z':
905 p->lex_state = JSON_LEX_KEYWORD;
906 break;
907
908 case '[': case '{': case ']': case '}': case ':': case ',':
909 token.type = c;
910 json_parser_input(p, &token);
911 return true;
912
913 case '-':
914 case '0': case '1': case '2': case '3': case '4':
915 case '5': case '6': case '7': case '8': case '9':
916 p->lex_state = JSON_LEX_NUMBER;
917 break;
918
919 case '"':
920 p->lex_state = JSON_LEX_STRING;
921 return true;
922
923 default:
924 if (isprint(c)) {
925 json_error(p, "invalid character '%c'", c);
926 } else {
927 json_error(p, "invalid character U+%04x", c);
928 }
929 return true;
930 }
931 break;
932
933 case JSON_LEX_KEYWORD:
934 if (!isalpha((unsigned char) c)) {
935 json_lex_keyword(p);
936 return false;
937 }
938 break;
939
940 case JSON_LEX_NUMBER:
941 if (!strchr(".0123456789eE-+", c)) {
942 json_lex_number(p);
943 return false;
944 }
945 break;
946
947 case JSON_LEX_STRING:
948 if (c == '\\') {
949 p->lex_state = JSON_LEX_ESCAPE;
950 } else if (c == '"') {
951 json_lex_string(p);
952 return true;
953 } else if (c < 0x20) {
954 json_error(p, "U+%04X must be escaped in quoted string", c);
955 return true;
956 }
957 break;
958
959 case JSON_LEX_ESCAPE:
960 p->lex_state = JSON_LEX_STRING;
961 break;
962
963 default:
964 abort();
965 }
966 ds_put_char(&p->buffer, c);
967 return true;
968 }
969 \f
970 /* Parsing. */
971
972 /* Parses 'string' as a JSON object or array and returns a newly allocated
973 * 'struct json'. The caller must free the returned structure with
974 * json_destroy() when it is no longer needed.
975 *
976 * 'string' must be encoded in UTF-8.
977 *
978 * If 'string' is valid JSON, then the returned 'struct json' will be either an
979 * object (JSON_OBJECT) or an array (JSON_ARRAY).
980 *
981 * If 'string' is not valid JSON, then the returned 'struct json' will be a
982 * string (JSON_STRING) that describes the particular error encountered during
983 * parsing. (This is an acceptable means of error reporting because at its top
984 * level JSON must be either an object or an array; a bare string is not
985 * valid.) */
986 struct json *
987 json_from_string(const char *string)
988 {
989 struct json_parser *p = json_parser_create(JSPF_TRAILER);
990 json_parser_feed(p, string, strlen(string));
991 return json_parser_finish(p);
992 }
993
994 /* Reads the file named 'file_name', parses its contents as a JSON object or
995 * array, and returns a newly allocated 'struct json'. The caller must free
996 * the returned structure with json_destroy() when it is no longer needed.
997 *
998 * The file must be encoded in UTF-8.
999 *
1000 * See json_from_string() for return value semantics.
1001 */
1002 struct json *
1003 json_from_file(const char *file_name)
1004 {
1005 struct json_parser *p;
1006 struct json *json;
1007 FILE *stream;
1008
1009 /* Open file. */
1010 stream = fopen(file_name, "r");
1011 if (!stream) {
1012 return json_string_create_nocopy(
1013 xasprintf("error opening \"%s\": %s", file_name, strerror(errno)));
1014 }
1015
1016 /* Read and parse file. */
1017 p = json_parser_create(JSPF_TRAILER);
1018 for (;;) {
1019 char buffer[BUFSIZ];
1020 size_t n;
1021
1022 n = fread(buffer, 1, sizeof buffer, stream);
1023 if (!n || json_parser_feed(p, buffer, n) != n) {
1024 break;
1025 }
1026 }
1027 json = json_parser_finish(p);
1028
1029 /* Close file and check for I/O errors. */
1030 if (ferror(stream)) {
1031 json_destroy(json);
1032 json = json_string_create_nocopy(
1033 xasprintf("error reading \"%s\": %s", file_name, strerror(errno)));
1034 }
1035 fclose(stream);
1036
1037 return json;
1038 }
1039
1040 struct json_parser *
1041 json_parser_create(int flags)
1042 {
1043 struct json_parser *p = xzalloc(sizeof *p);
1044 p->flags = flags;
1045 return p;
1046 }
1047
1048 size_t
1049 json_parser_feed(struct json_parser *p, const char *input, size_t n)
1050 {
1051 size_t i;
1052 for (i = 0; !p->done && i < n; ) {
1053 if (json_lex_input(p, input[i])) {
1054 i++;
1055 }
1056 }
1057 return i;
1058 }
1059
1060 bool
1061 json_parser_is_done(const struct json_parser *p)
1062 {
1063 return p->done;
1064 }
1065
1066 struct json *
1067 json_parser_finish(struct json_parser *p)
1068 {
1069 struct json *json;
1070
1071 switch (p->lex_state) {
1072 case JSON_LEX_START:
1073 break;
1074
1075 case JSON_LEX_STRING:
1076 case JSON_LEX_ESCAPE:
1077 json_error(p, "unexpected end of input in quoted string");
1078 break;
1079
1080 case JSON_LEX_NUMBER:
1081 case JSON_LEX_KEYWORD:
1082 json_lex_input(p, ' ');
1083 break;
1084 }
1085
1086 if (p->parse_state == JSON_PARSE_START) {
1087 json_error(p, "empty input stream");
1088 } else if (p->parse_state != JSON_PARSE_END) {
1089 json_error(p, "unexpected end of input");
1090 }
1091
1092 if (!p->error) {
1093 assert(p->height == 1);
1094 assert(p->stack[0].json != NULL);
1095 json = p->stack[--p->height].json;
1096 } else {
1097 json = json_string_create_nocopy(p->error);
1098 p->error = NULL;
1099 }
1100
1101 json_parser_abort(p);
1102
1103 return json;
1104 }
1105
1106 void
1107 json_parser_abort(struct json_parser *p)
1108 {
1109 if (p) {
1110 ds_destroy(&p->buffer);
1111 if (p->height) {
1112 json_destroy(p->stack[0].json);
1113 }
1114 free(p->stack);
1115 free(p->member_name);
1116 free(p->error);
1117 free(p);
1118 }
1119 }
1120
1121 static struct json_parser_node *
1122 json_parser_top(struct json_parser *p)
1123 {
1124 return &p->stack[p->height - 1];
1125 }
1126
1127 static void
1128 json_parser_put_value(struct json_parser *p, struct json *value)
1129 {
1130 struct json_parser_node *node = json_parser_top(p);
1131 if (node->json->type == JSON_OBJECT) {
1132 json_object_put(node->json, p->member_name, value);
1133 free(p->member_name);
1134 p->member_name = NULL;
1135 } else if (node->json->type == JSON_ARRAY) {
1136 json_array_add(node->json, value);
1137 } else {
1138 NOT_REACHED();
1139 }
1140 }
1141
1142 static struct json_parser_node *
1143 json_parser_push(struct json_parser *p,
1144 struct json *new_json, enum json_parse_state new_state)
1145 {
1146 if (p->height < JSON_MAX_HEIGHT) {
1147 struct json_parser_node *node;
1148
1149 if (p->height >= p->allocated_height) {
1150 p->stack = x2nrealloc(p->stack, &p->allocated_height,
1151 sizeof *p->stack);
1152 }
1153
1154 if (p->height > 0) {
1155 json_parser_put_value(p, new_json);
1156 }
1157
1158 node = &p->stack[p->height++];
1159 node->json = new_json;
1160 p->parse_state = new_state;
1161 return node;
1162 } else {
1163 json_error(p, "input exceeds maximum nesting depth %d",
1164 JSON_MAX_HEIGHT);
1165 return NULL;
1166 }
1167 }
1168
1169 static void
1170 json_parser_push_object(struct json_parser *p)
1171 {
1172 json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
1173 }
1174
1175 static void
1176 json_parser_push_array(struct json_parser *p)
1177 {
1178 json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
1179 }
1180
1181 static void
1182 json_parse_value(struct json_parser *p, struct json_token *token,
1183 enum json_parse_state next_state)
1184 {
1185 struct json *value;
1186
1187 switch (token->type) {
1188 case T_FALSE:
1189 value = json_boolean_create(false);
1190 break;
1191
1192 case T_NULL:
1193 value = json_null_create();
1194 break;
1195
1196 case T_TRUE:
1197 value = json_boolean_create(true);
1198 break;
1199
1200 case '{':
1201 json_parser_push_object(p);
1202 return;
1203
1204 case '[':
1205 json_parser_push_array(p);
1206 return;
1207
1208 case T_INTEGER:
1209 value = json_integer_create(token->u.integer);
1210 break;
1211
1212 case T_REAL:
1213 value = json_real_create(token->u.real);
1214 break;
1215
1216 case T_STRING:
1217 value = json_string_create(token->u.string);
1218 break;
1219
1220 case T_EOF:
1221 case '}':
1222 case ']':
1223 case ':':
1224 case ',':
1225 default:
1226 json_error(p, "syntax error expecting value");
1227 return;
1228 }
1229
1230 json_parser_put_value(p, value);
1231 p->parse_state = next_state;
1232 }
1233
1234 static void
1235 json_parser_pop(struct json_parser *p)
1236 {
1237 struct json_parser_node *node;
1238
1239 /* Conserve memory. */
1240 node = json_parser_top(p);
1241 if (node->json->type == JSON_ARRAY) {
1242 json_array_trim(node->json);
1243 }
1244
1245 /* Pop off the top-of-stack. */
1246 if (p->height == 1) {
1247 p->parse_state = JSON_PARSE_END;
1248 if (!(p->flags & JSPF_TRAILER)) {
1249 p->done = true;
1250 }
1251 } else {
1252 p->height--;
1253 node = json_parser_top(p);
1254 if (node->json->type == JSON_ARRAY) {
1255 p->parse_state = JSON_PARSE_ARRAY_NEXT;
1256 } else if (node->json->type == JSON_OBJECT) {
1257 p->parse_state = JSON_PARSE_OBJECT_NEXT;
1258 } else {
1259 NOT_REACHED();
1260 }
1261 }
1262 }
1263
1264 static void
1265 json_parser_input(struct json_parser *p, struct json_token *token)
1266 {
1267 switch (p->parse_state) {
1268 case JSON_PARSE_START:
1269 if (token->type == '{') {
1270 json_parser_push_object(p);
1271 } else if (token->type == '[') {
1272 json_parser_push_array(p);
1273 } else {
1274 json_error(p, "syntax error at beginning of input");
1275 }
1276 break;
1277
1278 case JSON_PARSE_END:
1279 json_error(p, "trailing garbage at end of input");
1280 break;
1281
1282 case JSON_PARSE_OBJECT_INIT:
1283 if (token->type == '}') {
1284 json_parser_pop(p);
1285 break;
1286 }
1287 /* Fall through. */
1288 case JSON_PARSE_OBJECT_NAME:
1289 if (token->type == T_STRING) {
1290 p->member_name = xstrdup(token->u.string);
1291 p->parse_state = JSON_PARSE_OBJECT_COLON;
1292 } else {
1293 json_error(p, "syntax error parsing object expecting string");
1294 }
1295 break;
1296
1297 case JSON_PARSE_OBJECT_COLON:
1298 if (token->type == ':') {
1299 p->parse_state = JSON_PARSE_OBJECT_VALUE;
1300 } else {
1301 json_error(p, "syntax error parsing object expecting ':'");
1302 }
1303 break;
1304
1305 case JSON_PARSE_OBJECT_VALUE:
1306 json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
1307 break;
1308
1309 case JSON_PARSE_OBJECT_NEXT:
1310 if (token->type == ',') {
1311 p->parse_state = JSON_PARSE_OBJECT_NAME;
1312 } else if (token->type == '}') {
1313 json_parser_pop(p);
1314 } else {
1315 json_error(p, "syntax error expecting '}' or ','");
1316 }
1317 break;
1318
1319 case JSON_PARSE_ARRAY_INIT:
1320 if (token->type == ']') {
1321 json_parser_pop(p);
1322 break;
1323 }
1324 /* Fall through. */
1325 case JSON_PARSE_ARRAY_VALUE:
1326 json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
1327 break;
1328
1329 case JSON_PARSE_ARRAY_NEXT:
1330 if (token->type == ',') {
1331 p->parse_state = JSON_PARSE_ARRAY_VALUE;
1332 } else if (token->type == ']') {
1333 json_parser_pop(p);
1334 } else {
1335 json_error(p, "syntax error expecting ']' or ','");
1336 }
1337 break;
1338
1339 default:
1340 abort();
1341 }
1342
1343 p->lex_state = JSON_LEX_START;
1344 ds_clear(&p->buffer);
1345 }
1346
1347 static struct json *
1348 json_create(enum json_type type)
1349 {
1350 struct json *json = xmalloc(sizeof *json);
1351 json->type = type;
1352 return json;
1353 }
1354
1355 static void
1356 json_error(struct json_parser *p, const char *format, ...)
1357 {
1358 if (!p->error) {
1359 struct ds msg;
1360 va_list args;
1361
1362 ds_init(&msg);
1363 ds_put_format(&msg, "line %d, column %d, byte %d: ",
1364 p->line_number, p->column_number, p->byte_number);
1365 va_start(args, format);
1366 ds_put_format_valist(&msg, format, args);
1367 va_end(args);
1368
1369 p->error = ds_steal_cstr(&msg);
1370
1371 p->done = true;
1372 }
1373 }
1374 \f
1375 #define SPACES_PER_LEVEL 2
1376
1377 struct json_serializer {
1378 struct ds ds;
1379 int depth;
1380 int flags;
1381 };
1382
1383 static void json_to_ds(const struct json *, struct json_serializer *);
1384 static void json_object_to_ds(const struct shash *object,
1385 struct json_serializer *);
1386 static void json_array_to_ds(const struct json_array *,
1387 struct json_serializer *);
1388 static void json_string_to_ds(const char *string, struct ds *);
1389
1390 /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
1391 * that string. The caller is responsible for freeing the returned string,
1392 * with free(), when it is no longer needed.
1393 *
1394 * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
1395 * nesting level introducing an additional indentation. Otherwise, the
1396 * returned string does not contain any new-line characters.
1397 *
1398 * If 'flags' contains JSSF_SORT, members of objects in the output are sorted
1399 * in bytewise lexicographic order for reproducibility. Otherwise, members of
1400 * objects are output in an indeterminate order.
1401 *
1402 * The returned string is valid JSON only if 'json' represents an array or an
1403 * object, since a bare literal does not satisfy the JSON grammar. */
1404 char *
1405 json_to_string(const struct json *json, int flags)
1406 {
1407 struct json_serializer s;
1408 ds_init(&s.ds);
1409 s.depth = 0;
1410 s.flags = flags;
1411 json_to_ds(json, &s);
1412 return ds_steal_cstr(&s.ds);
1413 }
1414
1415 static void
1416 json_to_ds(const struct json *json, struct json_serializer *s)
1417 {
1418 struct ds *ds = &s->ds;
1419
1420 switch (json->type) {
1421 case JSON_NULL:
1422 ds_put_cstr(ds, "null");
1423 break;
1424
1425 case JSON_FALSE:
1426 ds_put_cstr(ds, "false");
1427 break;
1428
1429 case JSON_TRUE:
1430 ds_put_cstr(ds, "true");
1431 break;
1432
1433 case JSON_OBJECT:
1434 json_object_to_ds(json->u.object, s);
1435 break;
1436
1437 case JSON_ARRAY:
1438 json_array_to_ds(&json->u.array, s);
1439 break;
1440
1441 case JSON_INTEGER:
1442 ds_put_format(ds, "%lld", json->u.integer);
1443 break;
1444
1445 case JSON_REAL:
1446 ds_put_format(ds, "%.*g", DBL_DIG, json->u.real);
1447 break;
1448
1449 case JSON_STRING:
1450 json_string_to_ds(json->u.string, ds);
1451 break;
1452
1453 case JSON_N_TYPES:
1454 default:
1455 NOT_REACHED();
1456 }
1457 }
1458
1459 static void
1460 indent_line(struct json_serializer *s)
1461 {
1462 if (s->flags & JSSF_PRETTY) {
1463 ds_put_char(&s->ds, '\n');
1464 ds_put_char_multiple(&s->ds, ' ', SPACES_PER_LEVEL * s->depth);
1465 }
1466 }
1467
1468 static void
1469 json_object_member_to_ds(size_t i, const struct shash_node *node,
1470 struct json_serializer *s)
1471 {
1472 struct ds *ds = &s->ds;
1473
1474 if (i) {
1475 ds_put_char(ds, ',');
1476 indent_line(s);
1477 }
1478
1479 json_string_to_ds(node->name, ds);
1480 ds_put_char(ds, ':');
1481 if (s->flags & JSSF_PRETTY) {
1482 ds_put_char(ds, ' ');
1483 }
1484 json_to_ds(node->data, s);
1485 }
1486
1487 static void
1488 json_object_to_ds(const struct shash *object, struct json_serializer *s)
1489 {
1490 struct ds *ds = &s->ds;
1491
1492 ds_put_char(ds, '{');
1493
1494 s->depth++;
1495 indent_line(s);
1496
1497 if (s->flags & JSSF_SORT) {
1498 const struct shash_node **nodes;
1499 size_t n, i;
1500
1501 nodes = shash_sort(object);
1502 n = shash_count(object);
1503 for (i = 0; i < n; i++) {
1504 json_object_member_to_ds(i, nodes[i], s);
1505 }
1506 free(nodes);
1507 } else {
1508 struct shash_node *node;
1509 size_t i;
1510
1511 i = 0;
1512 SHASH_FOR_EACH (node, object) {
1513 json_object_member_to_ds(i++, node, s);
1514 }
1515 }
1516
1517 ds_put_char(ds, '}');
1518 s->depth--;
1519 }
1520
1521 static void
1522 json_array_to_ds(const struct json_array *array, struct json_serializer *s)
1523 {
1524 struct ds *ds = &s->ds;
1525 size_t i;
1526
1527 ds_put_char(ds, '[');
1528 s->depth++;
1529
1530 if (array->n > 0) {
1531 indent_line(s);
1532
1533 for (i = 0; i < array->n; i++) {
1534 if (i) {
1535 ds_put_char(ds, ',');
1536 indent_line(s);
1537 }
1538 json_to_ds(array->elems[i], s);
1539 }
1540 }
1541
1542 s->depth--;
1543 ds_put_char(ds, ']');
1544 }
1545
1546 static void
1547 json_string_to_ds(const char *string, struct ds *ds)
1548 {
1549 uint8_t c;
1550
1551 ds_put_char(ds, '"');
1552 while ((c = *string++) != '\0') {
1553 switch (c) {
1554 case '"':
1555 ds_put_cstr(ds, "\\\"");
1556 break;
1557
1558 case '\\':
1559 ds_put_cstr(ds, "\\\\");
1560 break;
1561
1562 case '\b':
1563 ds_put_cstr(ds, "\\b");
1564 break;
1565
1566 case '\f':
1567 ds_put_cstr(ds, "\\f");
1568 break;
1569
1570 case '\n':
1571 ds_put_cstr(ds, "\\n");
1572 break;
1573
1574 case '\r':
1575 ds_put_cstr(ds, "\\r");
1576 break;
1577
1578 case '\t':
1579 ds_put_cstr(ds, "\\t");
1580 break;
1581
1582 default:
1583 if (c >= 32) {
1584 ds_put_char(ds, c);
1585 } else {
1586 ds_put_format(ds, "\\u%04x", c);
1587 }
1588 break;
1589 }
1590 }
1591 ds_put_char(ds, '"');
1592 }