]> git.proxmox.com Git - mirror_ovs.git/blob - lib/json.c
Implement JSON parsing and serialization.
[mirror_ovs.git] / lib / json.c
1 /*
2 * Copyright (c) 2009 Nicira Networks.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "json.h"
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <float.h>
25 #include <limits.h>
26 #include <math.h>
27 #include <string.h>
28
29 #include "dynamic-string.h"
30 #include "hash.h"
31 #include "shash.h"
32 #include "unicode.h"
33 #include "util.h"
34
35 /* The type of a JSON token. */
36 enum json_token_type {
37 T_EOF = 0,
38 T_BEGIN_ARRAY = '[',
39 T_END_ARRAY = ']',
40 T_BEGIN_OBJECT = '{',
41 T_END_OBJECT = '}',
42 T_NAME_SEPARATOR = ':',
43 T_VALUE_SEPARATOR = ',',
44 T_FALSE = UCHAR_MAX + 1,
45 T_NULL,
46 T_TRUE,
47 T_INTEGER,
48 T_REAL,
49 T_STRING
50 };
51
52 /* A JSON token.
53 *
54 * RFC 4627 doesn't define a lexical structure for JSON but I believe this to
55 * be compliant with the standard.
56 */
57 struct json_token {
58 enum json_token_type type;
59 union {
60 double real;
61 long long int integer;
62 const char *string;
63 } u;
64 };
65
66 enum json_lex_state {
67 JSON_LEX_START, /* Not inside a token. */
68 JSON_LEX_NUMBER, /* Reading a number. */
69 JSON_LEX_KEYWORD, /* Reading a keyword. */
70 JSON_LEX_STRING, /* Reading a quoted string. */
71 JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
72 };
73
74 enum json_parse_state {
75 JSON_PARSE_START, /* Beginning of input. */
76 JSON_PARSE_END, /* End of input. */
77
78 /* Objects. */
79 JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
80 JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
81 JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
82 JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
83 JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
84
85 /* Arrays. */
86 JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
87 JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
88 JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
89 };
90
91 struct json_parser_node {
92 struct json *json;
93 };
94
95 /* A JSON parser. */
96 struct json_parser {
97 int flags;
98
99 /* Lexical analysis. */
100 enum json_lex_state lex_state;
101 struct ds buffer; /* Buffer for accumulating token text. */
102
103 /* Parsing. */
104 enum json_parse_state parse_state;
105 #define JSON_MAX_HEIGHT 1000
106 struct json_parser_node *stack;
107 size_t height, allocated_height;
108 char *member_name;
109
110 /* Parse status. */
111 bool done;
112 char *error; /* Error message, if any, null if none yet. */
113 };
114
115 static struct json *json_create(enum json_type type);
116 static void json_parser_input(struct json_parser *, struct json_token *);
117
118 static void json_error(struct json_parser *p, const char *format, ...)
119 PRINTF_FORMAT(2, 3);
120 \f
121 const char *
122 json_type_to_string(enum json_type type)
123 {
124 switch (type) {
125 case JSON_NULL:
126 return "null";
127
128 case JSON_FALSE:
129 return "false";
130
131 case JSON_TRUE:
132 return "true";
133
134 case JSON_OBJECT:
135 return "object";
136
137 case JSON_ARRAY:
138 return "array";
139
140 case JSON_INTEGER:
141 case JSON_REAL:
142 return "number";
143
144 case JSON_STRING:
145 return "string";
146
147 case JSON_N_TYPES:
148 default:
149 return "<invalid>";
150 }
151 }
152 \f
153 /* Functions for manipulating struct json. */
154
155 struct json *
156 json_null_create(void)
157 {
158 return json_create(JSON_NULL);
159 }
160
161 struct json *
162 json_boolean_create(bool b)
163 {
164 return json_create(b ? JSON_TRUE : JSON_FALSE);
165 }
166
167 struct json *
168 json_string_create_nocopy(char *s)
169 {
170 struct json *json = json_create(JSON_STRING);
171 json->u.string = s;
172 return json;
173 }
174
175 struct json *
176 json_string_create(const char *s)
177 {
178 return json_string_create_nocopy(xstrdup(s));
179 }
180
181 struct json *
182 json_array_create_empty(void)
183 {
184 struct json *json = json_create(JSON_ARRAY);
185 json->u.array.elems = NULL;
186 json->u.array.n = 0;
187 json->u.array.n_allocated = 0;
188 return json;
189 }
190
191 void
192 json_array_add(struct json *array_, struct json *element)
193 {
194 struct json_array *array = json_array(array_);
195 if (array->n >= array->n_allocated) {
196 array->elems = x2nrealloc(array->elems, &array->n_allocated,
197 sizeof *array->elems);
198 }
199 array->elems[array->n++] = element;
200 }
201
202 void
203 json_array_trim(struct json *array_)
204 {
205 struct json_array *array = json_array(array_);
206 if (array->n < array->n_allocated){
207 array->n_allocated = array->n;
208 array->elems = xrealloc(array->elems, array->n * sizeof *array->elems);
209 }
210 }
211
212 struct json *
213 json_array_create(struct json **elements, size_t n)
214 {
215 struct json *json = json_create(JSON_ARRAY);
216 json->u.array.elems = elements;
217 json->u.array.n = n;
218 json->u.array.n_allocated = n;
219 return json;
220 }
221
222 struct json *
223 json_array_create_2(struct json *elem0, struct json *elem1)
224 {
225 struct json **elems = xmalloc(2 * sizeof *elems);
226 elems[0] = elem0;
227 elems[1] = elem1;
228 return json_array_create(elems, 2);
229 }
230
231 struct json *
232 json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
233 {
234 struct json **elems = xmalloc(3 * sizeof *elems);
235 elems[0] = elem0;
236 elems[1] = elem1;
237 elems[2] = elem2;
238 return json_array_create(elems, 3);
239 }
240
241 struct json *
242 json_object_create(void)
243 {
244 struct json *json = json_create(JSON_OBJECT);
245 json->u.object = xmalloc(sizeof *json->u.object);
246 shash_init(json->u.object);
247 return json;
248 }
249
250 struct json *
251 json_integer_create(long long int integer)
252 {
253 struct json *json = json_create(JSON_INTEGER);
254 json->u.integer = integer;
255 return json;
256 }
257
258 struct json *
259 json_real_create(double real)
260 {
261 struct json *json = json_create(JSON_REAL);
262 json->u.real = real;
263 return json;
264 }
265
266 void
267 json_object_put(struct json *json, const char *name, struct json *value)
268 {
269 shash_add(json->u.object, name, value);
270 }
271
272 void
273 json_object_put_string(struct json *json, const char *name, const char *value)
274 {
275 json_object_put(json, name, json_string_create(value));
276 }
277
278 const char *
279 json_string(const struct json *json)
280 {
281 assert(json->type == JSON_STRING);
282 return json->u.string;
283 }
284
285 struct json_array *
286 json_array(const struct json *json)
287 {
288 assert(json->type == JSON_ARRAY);
289 return (struct json_array *) &json->u.array;
290 }
291
292 struct shash *
293 json_object(const struct json *json)
294 {
295 assert(json->type == JSON_OBJECT);
296 return (struct shash *) json->u.object;
297 }
298
299 bool
300 json_boolean(const struct json *json)
301 {
302 assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
303 return json->type == JSON_TRUE;
304 }
305
306 double
307 json_real(const struct json *json)
308 {
309 assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
310 return json->type == JSON_REAL ? json->u.real : json->u.integer;
311 }
312
313 int64_t
314 json_integer(const struct json *json)
315 {
316 assert(json->type == JSON_INTEGER);
317 return json->u.integer;
318 }
319 \f
320 static void json_destroy_object(struct shash *object);
321 static void json_destroy_array(struct json_array *array);
322
323 /* Frees 'json' and everything it points to, recursively. */
324 void
325 json_destroy(struct json *json)
326 {
327 if (json) {
328 switch (json->type) {
329 case JSON_OBJECT:
330 json_destroy_object(json->u.object);
331 break;
332
333 case JSON_ARRAY:
334 json_destroy_array(&json->u.array);
335 break;
336
337 case JSON_STRING:
338 free(json->u.string);
339 break;
340
341 case JSON_NULL:
342 case JSON_FALSE:
343 case JSON_TRUE:
344 case JSON_INTEGER:
345 case JSON_REAL:
346 break;
347
348 case JSON_N_TYPES:
349 NOT_REACHED();
350 }
351 free(json);
352 }
353 }
354
355 static void
356 json_destroy_object(struct shash *object)
357 {
358 struct shash_node *node, *next;
359
360 SHASH_FOR_EACH_SAFE (node, next, object) {
361 struct json *value = node->data;
362
363 json_destroy(value);
364 shash_delete(object, node);
365 }
366 shash_destroy(object);
367 free(object);
368 }
369
370 static void
371 json_destroy_array(struct json_array *array)
372 {
373 size_t i;
374
375 for (i = 0; i < array->n; i++) {
376 json_destroy(array->elems[i]);
377 }
378 free(array->elems);
379 }
380 \f
381 static struct json *json_clone_object(const struct shash *object);
382 static struct json *json_clone_array(const struct json_array *array);
383
384 /* Returns a deep copy of 'json'. */
385 struct json *
386 json_clone(const struct json *json)
387 {
388 switch (json->type) {
389 case JSON_OBJECT:
390 return json_clone_object(json->u.object);
391
392 case JSON_ARRAY:
393 return json_clone_array(&json->u.array);
394
395 case JSON_STRING:
396 return json_string_create(json->u.string);
397
398 case JSON_NULL:
399 case JSON_FALSE:
400 case JSON_TRUE:
401 return json_create(json->type);
402
403 case JSON_INTEGER:
404 return json_integer_create(json->u.integer);
405
406 case JSON_REAL:
407 return json_real_create(json->u.real);
408
409 case JSON_N_TYPES:
410 default:
411 NOT_REACHED();
412 }
413 }
414
415 static struct json *
416 json_clone_object(const struct shash *object)
417 {
418 struct shash_node *node;
419 struct json *json;
420
421 json = json_object_create();
422 SHASH_FOR_EACH (node, object) {
423 struct json *value = node->data;
424 json_object_put(json, node->name, json_clone(value));
425 }
426 return json;
427 }
428
429 static struct json *
430 json_clone_array(const struct json_array *array)
431 {
432 struct json **elems;
433 size_t i;
434
435 elems = xmalloc(array->n * sizeof *elems);
436 for (i = 0; i < array->n; i++) {
437 elems[i] = json_clone(array->elems[i]);
438 }
439 return json_array_create(elems, array->n);
440 }
441 \f
442 static size_t
443 json_hash_object(const struct shash *object, size_t basis)
444 {
445 const struct shash_node **nodes;
446 size_t n, i;
447
448 nodes = shash_sort(object);
449 n = shash_count(object);
450 for (i = 0; i < n; i++) {
451 const struct shash_node *node = nodes[i];
452 basis = hash_string(node->name, basis);
453 basis = json_hash(node->data, basis);
454 }
455 return basis;
456 }
457
458 static size_t
459 json_hash_array(const struct json_array *array, size_t basis)
460 {
461 size_t i;
462
463 basis = hash_int(array->n, basis);
464 for (i = 0; i < array->n; i++) {
465 basis = json_hash(array->elems[i], basis);
466 }
467 return basis;
468 }
469
470 size_t
471 json_hash(const struct json *json, size_t basis)
472 {
473 switch (json->type) {
474 case JSON_OBJECT:
475 return json_hash_object(json->u.object, basis);
476
477 case JSON_ARRAY:
478 return json_hash_array(&json->u.array, basis);
479
480 case JSON_STRING:
481 return hash_string(json->u.string, basis);
482
483 case JSON_NULL:
484 case JSON_FALSE:
485 case JSON_TRUE:
486 return hash_int(json->type << 8, basis);
487
488 case JSON_INTEGER:
489 return hash_int(json->u.integer, basis);
490
491 case JSON_REAL:
492 return hash_double(json->u.real, basis);
493
494 case JSON_N_TYPES:
495 default:
496 NOT_REACHED();
497 }
498 }
499
500 static bool
501 json_equal_object(const struct shash *a, const struct shash *b)
502 {
503 struct shash_node *node;
504
505 if (shash_count(a) != shash_count(b)) {
506 return false;
507 }
508
509 SHASH_FOR_EACH (node, a) {
510 if (!shash_find(b, node->name)) {
511 return false;
512 }
513 }
514
515 return true;
516 }
517
518 static bool
519 json_equal_array(const struct json_array *a, const struct json_array *b)
520 {
521 size_t i;
522
523 if (a->n != b->n) {
524 return false;
525 }
526
527 for (i = 0; i < a->n; i++) {
528 if (!json_equal(a->elems[i], b->elems[i])) {
529 return false;
530 }
531 }
532
533 return true;
534 }
535
536 bool
537 json_equal(const struct json *a, const struct json *b)
538 {
539 if (a->type != b->type) {
540 return false;
541 }
542
543 switch (a->type) {
544 case JSON_OBJECT:
545 return json_equal_object(a->u.object, b->u.object);
546
547 case JSON_ARRAY:
548 return json_equal_array(&a->u.array, &b->u.array);
549
550 case JSON_STRING:
551 return !strcmp(a->u.string, b->u.string);
552
553 case JSON_NULL:
554 case JSON_FALSE:
555 case JSON_TRUE:
556 return true;
557
558 case JSON_INTEGER:
559 return a->u.integer == b->u.integer;
560
561 case JSON_REAL:
562 return a->u.real == b->u.real;
563
564 case JSON_N_TYPES:
565 default:
566 NOT_REACHED();
567 }
568 }
569 \f
570 /* Lexical analysis. */
571
572 static void
573 json_lex_keyword(struct json_parser *p)
574 {
575 struct json_token token;
576 const char *s;
577
578 s = ds_cstr(&p->buffer);
579 if (!strcmp(s, "false")) {
580 token.type = T_FALSE;
581 } else if (!strcmp(s, "true")) {
582 token.type = T_TRUE;
583 } else if (!strcmp(s, "null")) {
584 token.type = T_NULL;
585 } else {
586 json_error(p, "invalid keyword '%s'", s);
587 return;
588 }
589 json_parser_input(p, &token);
590 }
591
592 static void
593 json_lex_number(struct json_parser *p)
594 {
595 const char *cp = ds_cstr(&p->buffer);
596 unsigned long long int significand = 0;
597 int sig_digits = 0;
598 bool imprecise = false;
599 bool negative = false;
600 int pow10 = 0;
601
602 /* Leading minus sign. */
603 if (*cp == '-') {
604 negative = true;
605 cp++;
606 }
607
608 /* At least one integer digit, but 0 may not be used as a leading digit for
609 * a longer number. */
610 significand = 0;
611 sig_digits = 0;
612 if (*cp == '0') {
613 cp++;
614 if (isdigit(*cp)) {
615 json_error(p, "leading zeros not allowed");
616 return;
617 }
618 } else if (isdigit(*cp)) {
619 do {
620 if (significand <= ULLONG_MAX / 10) {
621 significand = significand * 10 + (*cp - '0');
622 sig_digits++;
623 } else {
624 pow10++;
625 if (*cp != '0') {
626 imprecise = true;
627 }
628 }
629 cp++;
630 } while (isdigit(*cp));
631 } else {
632 json_error(p, "'-' must be followed by digit");
633 return;
634 }
635
636 /* Optional fraction. */
637 if (*cp == '.') {
638 cp++;
639 if (!isdigit(*cp)) {
640 json_error(p, "decimal point must be followed by digit");
641 return;
642 }
643 do {
644 if (significand <= ULLONG_MAX / 10) {
645 significand = significand * 10 + (*cp - '0');
646 sig_digits++;
647 pow10--;
648 } else if (*cp != '0') {
649 imprecise = true;
650 }
651 cp++;
652 } while (isdigit(*cp));
653 }
654
655 /* Optional exponent. */
656 if (*cp == 'e' || *cp == 'E') {
657 bool negative_exponent = false;
658 int exponent;
659
660 cp++;
661 if (*cp == '+') {
662 cp++;
663 } else if (*cp == '-') {
664 negative_exponent = true;
665 cp++;
666 }
667
668 if (!isdigit(*cp)) {
669 json_error(p, "exponent must contain at least one digit");
670 return;
671 }
672
673 exponent = 0;
674 do {
675 if (exponent >= INT_MAX / 10) {
676 json_error(p, "exponent outside valid range");
677 return;
678 }
679 exponent = exponent * 10 + (*cp - '0');
680 cp++;
681 } while (isdigit(*cp));
682
683 if (negative_exponent) {
684 pow10 -= exponent;
685 } else {
686 pow10 += exponent;
687 }
688 }
689
690 if (*cp != '\0') {
691 json_error(p, "syntax error in number");
692 return;
693 }
694
695 /* Figure out number.
696 *
697 * We suppress negative zeros as a matter of policy. */
698 if (!significand) {
699 struct json_token token;
700 token.type = T_INTEGER;
701 token.u.integer = 0;
702 json_parser_input(p, &token);
703 return;
704 }
705
706 if (!imprecise) {
707 while (pow10 > 0 && significand < ULLONG_MAX / 10) {
708 significand *= 10;
709 sig_digits++;
710 pow10--;
711 }
712 while (pow10 < 0 && significand % 10 == 0) {
713 significand /= 10;
714 sig_digits--;
715 pow10++;
716 }
717 if (pow10 == 0
718 && significand <= (negative
719 ? (unsigned long long int) LLONG_MAX + 1
720 : LLONG_MAX)) {
721 struct json_token token;
722 token.type = T_INTEGER;
723 token.u.integer = negative ? -significand : significand;
724 json_parser_input(p, &token);
725 return;
726 }
727 }
728
729 if (pow10 + sig_digits <= DBL_MAX_10_EXP) {
730 struct json_token token;
731 token.type = T_REAL;
732 token.u.real = significand * pow(10.0, pow10);
733 if (token.u.real <= DBL_MAX) {
734 if (negative && token.u.real) {
735 token.u.real = -token.u.real;
736 }
737 json_parser_input(p, &token);
738 return;
739 }
740 }
741 json_error(p, "number outside valid range");
742 }
743
744 static bool
745 json_lex_4hex(struct json_parser *p, const char *cp, int *valuep)
746 {
747 int value, i;
748
749 value = 0;
750 for (i = 0; i < 4; i++) {
751 unsigned char c = *cp++;
752 if (!isxdigit(c)) {
753 json_error(p, "malformed \\u escape");
754 return false;
755 }
756 value = (value << 4) | hexit_value(c);
757 }
758 if (!value) {
759 json_error(p, "null bytes not supported in quoted strings");
760 return false;
761 }
762 *valuep = value;
763 return true;
764 }
765
766 static const char *
767 json_lex_unicode(struct json_parser *p, const char *cp, struct ds *s)
768 {
769 int c0, c1;
770
771 if (!json_lex_4hex(p, cp, &c0)) {
772 return NULL;
773 }
774 cp += 4;
775 if (!uc_is_leading_surrogate(c0)) {
776 ds_put_utf8(s, c0);
777 return cp;
778 }
779
780 if (*cp++ != '\\' || *cp++ != 'u') {
781 json_error(p, "malformed escaped surrogate pair");
782 return NULL;
783 }
784
785 if (!json_lex_4hex(p, cp, &c1)) {
786 return NULL;
787 }
788 cp += 4;
789 if (!uc_is_trailing_surrogate(c1)) {
790 json_error(p, "second half of escaped surrogate pair is not "
791 "trailing surrogate");
792 return NULL;
793 }
794
795 ds_put_utf8(s, utf16_decode_surrogate_pair(c0, c1));
796 return cp;
797 }
798
799 static void
800 json_lex_string(struct json_parser *p)
801 {
802 struct json_token token;
803 const char *cp;
804 struct ds s;
805
806 cp = ds_cstr(&p->buffer);
807 if (!strchr(cp, '\\')) {
808 token.type = T_STRING;
809 token.u.string = cp;
810 json_parser_input(p, &token);
811 return;
812 }
813
814 ds_init(&s);
815 ds_reserve(&s, strlen(cp));
816 while (*cp != '\0') {
817 if (*cp != '\\') {
818 ds_put_char(&s, *cp++);
819 continue;
820 }
821
822 cp++;
823 switch (*cp++) {
824 case '"': case '\\': case '/':
825 ds_put_char(&s, cp[-1]);
826 break;
827
828 case 'b':
829 ds_put_char(&s, '\b');
830 break;
831
832 case 'f':
833 ds_put_char(&s, '\f');
834 break;
835
836 case 'n':
837 ds_put_char(&s, '\n');
838 break;
839
840 case 'r':
841 ds_put_char(&s, '\r');
842 break;
843
844 case 't':
845 ds_put_char(&s, '\t');
846 break;
847
848 case 'u':
849 cp = json_lex_unicode(p, cp, &s);
850 if (!cp) {
851 goto exit;
852 }
853 break;
854
855 default:
856 json_error(p, "bad escape \\%c", cp[-1]);
857 goto exit;
858 }
859 }
860
861 token.type = T_STRING;
862 token.u.string = ds_cstr(&s);
863 json_parser_input(p, &token);
864
865 exit:
866 ds_destroy(&s);
867 return;
868 }
869
870 static bool
871 json_lex_input(struct json_parser *p, int c)
872 {
873 struct json_token token;
874
875 switch (p->lex_state) {
876 case JSON_LEX_START:
877 switch (c) {
878 case ' ': case '\t': case '\n': case '\r':
879 /* Nothing to do. */
880 return true;
881
882 case 'a': case 'b': case 'c': case 'd': case 'e':
883 case 'f': case 'g': case 'h': case 'i': case 'j':
884 case 'k': case 'l': case 'm': case 'n': case 'o':
885 case 'p': case 'q': case 'r': case 's': case 't':
886 case 'u': case 'v': case 'w': case 'x': case 'y':
887 case 'z':
888 p->lex_state = JSON_LEX_KEYWORD;
889 break;
890
891 case '[': case '{': case ']': case '}': case ':': case ',':
892 token.type = c;
893 json_parser_input(p, &token);
894 return true;
895
896 case '-':
897 case '0': case '1': case '2': case '3': case '4':
898 case '5': case '6': case '7': case '8': case '9':
899 p->lex_state = JSON_LEX_NUMBER;
900 break;
901
902 case '"':
903 p->lex_state = JSON_LEX_STRING;
904 return true;
905
906 default:
907 if (isprint(c)) {
908 json_error(p, "invalid character '%c'", c);
909 } else {
910 json_error(p, "invalid character U+%04x", c);
911 }
912 return true;
913 }
914 break;
915
916 case JSON_LEX_KEYWORD:
917 if (!isalpha((unsigned char) c)) {
918 json_lex_keyword(p);
919 return false;
920 }
921 break;
922
923 case JSON_LEX_NUMBER:
924 if (!strchr(".0123456789eE-+", c)) {
925 json_lex_number(p);
926 return false;
927 }
928 break;
929
930 case JSON_LEX_STRING:
931 if (c == '\\') {
932 p->lex_state = JSON_LEX_ESCAPE;
933 } else if (c == '"') {
934 json_lex_string(p);
935 return true;
936 } else if (c < 0x20) {
937 json_error(p, "U+%04X must be escaped in quoted string", c);
938 return true;
939 }
940 break;
941
942 case JSON_LEX_ESCAPE:
943 p->lex_state = JSON_LEX_STRING;
944 break;
945
946 default:
947 abort();
948 }
949 ds_put_char(&p->buffer, c);
950 return true;
951 }
952 \f
953 /* Parsing. */
954
955 /* Parses 'string' as a JSON object or array and returns a newly allocated
956 * 'struct json'. The caller must free the returned structure with
957 * json_destroy() when it is no longer needed.
958 *
959 * 'string' must be encoded in UTF-8.
960 *
961 * If 'string' is valid JSON, then the returned 'struct json' will be either an
962 * object (JSON_OBJECT) or an array (JSON_ARRAY).
963 *
964 * If 'string' is not valid JSON, then the returned 'struct json' will be a
965 * string (JSON_STRING) that describes the particular error encountered during
966 * parsing. (This is an acceptable means of error reporting because at its top
967 * level JSON must be either an object or an array; a bare string is not
968 * valid.) */
969 struct json *
970 json_from_string(const char *string)
971 {
972 struct json_parser *p = json_parser_create(JSPF_TRAILER);
973 json_parser_feed(p, string, strlen(string));
974 return json_parser_finish(p);
975 }
976
977 /* Reads the file named 'file_name', parses its contents as a JSON object or
978 * array, and returns a newly allocated 'struct json'. The caller must free
979 * the returned structure with json_destroy() when it is no longer needed.
980 *
981 * The file must be encoded in UTF-8.
982 *
983 * See json_from_string() for return value semantics.
984 */
985 struct json *
986 json_from_file(const char *file_name)
987 {
988 struct json_parser *p;
989 struct json *json;
990 FILE *stream;
991
992 /* Open file. */
993 stream = fopen(file_name, "r");
994 if (!stream) {
995 return json_string_create_nocopy(
996 xasprintf("error opening \"%s\": %s", file_name, strerror(errno)));
997 }
998
999 /* Read and parse file. */
1000 p = json_parser_create(JSPF_TRAILER);
1001 for (;;) {
1002 char buffer[BUFSIZ];
1003 size_t n;
1004
1005 n = fread(buffer, 1, sizeof buffer, stream);
1006 if (!n || json_parser_feed(p, buffer, n) != n) {
1007 break;
1008 }
1009 }
1010 json = json_parser_finish(p);
1011
1012 /* Close file and check for I/O errors. */
1013 if (ferror(stream)) {
1014 json_destroy(json);
1015 json = json_string_create_nocopy(
1016 xasprintf("error reading \"%s\": %s", file_name, strerror(errno)));
1017 }
1018 fclose(stream);
1019
1020 return json;
1021 }
1022
1023 struct json_parser *
1024 json_parser_create(int flags)
1025 {
1026 struct json_parser *p = xzalloc(sizeof *p);
1027 p->flags = flags;
1028 return p;
1029 }
1030
1031 size_t
1032 json_parser_feed(struct json_parser *p, const char *input, size_t n)
1033 {
1034 size_t i;
1035 for (i = 0; !p->done && i < n; ) {
1036 if (json_lex_input(p, input[i])) {
1037 i++;
1038 }
1039 }
1040 return i;
1041 }
1042
1043 bool
1044 json_parser_is_done(const struct json_parser *p)
1045 {
1046 return p->done;
1047 }
1048
1049 struct json *
1050 json_parser_finish(struct json_parser *p)
1051 {
1052 struct json *json;
1053
1054 switch (p->lex_state) {
1055 case JSON_LEX_START:
1056 break;
1057
1058 case JSON_LEX_STRING:
1059 case JSON_LEX_ESCAPE:
1060 json_error(p, "unexpected end of input in quoted string");
1061 break;
1062
1063 case JSON_LEX_NUMBER:
1064 case JSON_LEX_KEYWORD:
1065 json_lex_input(p, ' ');
1066 break;
1067 }
1068
1069 if (p->parse_state == JSON_PARSE_START) {
1070 json_error(p, "empty input stream");
1071 } else if (p->parse_state != JSON_PARSE_END) {
1072 json_error(p, "unexpected end of input");
1073 }
1074
1075 if (!p->error) {
1076 assert(p->height == 1);
1077 assert(p->stack[0].json != NULL);
1078 json = p->stack[--p->height].json;
1079 } else {
1080 json = json_string_create_nocopy(p->error);
1081 p->error = NULL;
1082 }
1083
1084 json_parser_abort(p);
1085
1086 return json;
1087 }
1088
1089 void
1090 json_parser_abort(struct json_parser *p)
1091 {
1092 if (p) {
1093 ds_destroy(&p->buffer);
1094 if (p->height) {
1095 json_destroy(p->stack[0].json);
1096 }
1097 free(p->stack);
1098 free(p->member_name);
1099 free(p->error);
1100 free(p);
1101 }
1102 }
1103
1104 static struct json_parser_node *
1105 json_parser_top(struct json_parser *p)
1106 {
1107 return &p->stack[p->height - 1];
1108 }
1109
1110 static void
1111 json_parser_put_value(struct json_parser *p, struct json *value)
1112 {
1113 struct json_parser_node *node = json_parser_top(p);
1114 if (node->json->type == JSON_OBJECT) {
1115 json_object_put(node->json, p->member_name, value);
1116 free(p->member_name);
1117 p->member_name = NULL;
1118 } else if (node->json->type == JSON_ARRAY) {
1119 json_array_add(node->json, value);
1120 } else {
1121 NOT_REACHED();
1122 }
1123 }
1124
1125 static struct json_parser_node *
1126 json_parser_push(struct json_parser *p,
1127 struct json *new_json, enum json_parse_state new_state)
1128 {
1129 if (p->height < JSON_MAX_HEIGHT) {
1130 struct json_parser_node *node;
1131
1132 if (p->height >= p->allocated_height) {
1133 p->stack = x2nrealloc(p->stack, &p->allocated_height,
1134 sizeof *p->stack);
1135 }
1136
1137 if (p->height > 0) {
1138 json_parser_put_value(p, new_json);
1139 }
1140
1141 node = &p->stack[p->height++];
1142 node->json = new_json;
1143 p->parse_state = new_state;
1144 return node;
1145 } else {
1146 json_error(p, "input exceeds maximum nesting depth %d",
1147 JSON_MAX_HEIGHT);
1148 return NULL;
1149 }
1150 }
1151
1152 static void
1153 json_parser_push_object(struct json_parser *p)
1154 {
1155 json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
1156 }
1157
1158 static void
1159 json_parser_push_array(struct json_parser *p)
1160 {
1161 json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
1162 }
1163
1164 static void
1165 json_parse_value(struct json_parser *p, struct json_token *token,
1166 enum json_parse_state next_state)
1167 {
1168 struct json *value;
1169
1170 switch (token->type) {
1171 case T_FALSE:
1172 value = json_boolean_create(false);
1173 break;
1174
1175 case T_NULL:
1176 value = json_null_create();
1177 break;
1178
1179 case T_TRUE:
1180 value = json_boolean_create(true);
1181 break;
1182
1183 case '{':
1184 json_parser_push_object(p);
1185 return;
1186
1187 case '[':
1188 json_parser_push_array(p);
1189 return;
1190
1191 case T_INTEGER:
1192 value = json_integer_create(token->u.integer);
1193 break;
1194
1195 case T_REAL:
1196 value = json_real_create(token->u.real);
1197 break;
1198
1199 case T_STRING:
1200 value = json_string_create(token->u.string);
1201 break;
1202
1203 case T_EOF:
1204 case '}':
1205 case ']':
1206 case ':':
1207 case ',':
1208 default:
1209 json_error(p, "syntax error expecting value");
1210 return;
1211 }
1212
1213 json_parser_put_value(p, value);
1214 p->parse_state = next_state;
1215 }
1216
1217 static void
1218 json_parser_pop(struct json_parser *p)
1219 {
1220 struct json_parser_node *node;
1221
1222 /* Conserve memory. */
1223 node = json_parser_top(p);
1224 if (node->json->type == JSON_ARRAY) {
1225 json_array_trim(node->json);
1226 }
1227
1228 /* Pop off the top-of-stack. */
1229 if (p->height == 1) {
1230 p->parse_state = JSON_PARSE_END;
1231 if (!(p->flags & JSPF_TRAILER)) {
1232 p->done = true;
1233 }
1234 } else {
1235 p->height--;
1236 node = json_parser_top(p);
1237 if (node->json->type == JSON_ARRAY) {
1238 p->parse_state = JSON_PARSE_ARRAY_NEXT;
1239 } else if (node->json->type == JSON_OBJECT) {
1240 p->parse_state = JSON_PARSE_OBJECT_NEXT;
1241 } else {
1242 NOT_REACHED();
1243 }
1244 }
1245 }
1246
1247 static void
1248 json_parser_input(struct json_parser *p, struct json_token *token)
1249 {
1250 switch (p->parse_state) {
1251 case JSON_PARSE_START:
1252 if (token->type == '{') {
1253 json_parser_push_object(p);
1254 } else if (token->type == '[') {
1255 json_parser_push_array(p);
1256 } else {
1257 json_error(p, "syntax error at beginning of input");
1258 }
1259 break;
1260
1261 case JSON_PARSE_END:
1262 json_error(p, "trailing garbage at end of input");
1263 break;
1264
1265 case JSON_PARSE_OBJECT_INIT:
1266 if (token->type == '}') {
1267 json_parser_pop(p);
1268 break;
1269 }
1270 /* Fall through. */
1271 case JSON_PARSE_OBJECT_NAME:
1272 if (token->type == T_STRING) {
1273 p->member_name = xstrdup(token->u.string);
1274 p->parse_state = JSON_PARSE_OBJECT_COLON;
1275 } else {
1276 json_error(p, "syntax error parsing object expecting string");
1277 }
1278 break;
1279
1280 case JSON_PARSE_OBJECT_COLON:
1281 if (token->type == ':') {
1282 p->parse_state = JSON_PARSE_OBJECT_VALUE;
1283 } else {
1284 json_error(p, "syntax error parsing object expecting ':'");
1285 }
1286 break;
1287
1288 case JSON_PARSE_OBJECT_VALUE:
1289 json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
1290 break;
1291
1292 case JSON_PARSE_OBJECT_NEXT:
1293 if (token->type == ',') {
1294 p->parse_state = JSON_PARSE_OBJECT_NAME;
1295 } else if (token->type == '}') {
1296 json_parser_pop(p);
1297 } else {
1298 json_error(p, "syntax error expecting '}' or ','");
1299 }
1300 break;
1301
1302 case JSON_PARSE_ARRAY_INIT:
1303 if (token->type == ']') {
1304 json_parser_pop(p);
1305 break;
1306 }
1307 /* Fall through. */
1308 case JSON_PARSE_ARRAY_VALUE:
1309 json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
1310 break;
1311
1312 case JSON_PARSE_ARRAY_NEXT:
1313 if (token->type == ',') {
1314 p->parse_state = JSON_PARSE_ARRAY_VALUE;
1315 } else if (token->type == ']') {
1316 json_parser_pop(p);
1317 } else {
1318 json_error(p, "syntax error expecting ']' or ','");
1319 }
1320 break;
1321
1322 default:
1323 abort();
1324 }
1325
1326 p->lex_state = JSON_LEX_START;
1327 ds_clear(&p->buffer);
1328 }
1329
1330 static struct json *
1331 json_create(enum json_type type)
1332 {
1333 struct json *json = xmalloc(sizeof *json);
1334 json->type = type;
1335 return json;
1336 }
1337
1338 static void
1339 json_error(struct json_parser *p, const char *format, ...)
1340 {
1341 if (!p->error) {
1342 va_list args;
1343
1344 va_start(args, format);
1345 p->error = xvasprintf(format, args);
1346 va_end(args);
1347
1348 p->done = true;
1349 }
1350 }
1351 \f
1352 #define SPACES_PER_LEVEL 2
1353
1354 struct json_serializer {
1355 struct ds ds;
1356 int depth;
1357 int flags;
1358 };
1359
1360 static void json_to_ds(const struct json *, struct json_serializer *);
1361 static void json_object_to_ds(const struct shash *object,
1362 struct json_serializer *);
1363 static void json_array_to_ds(const struct json_array *,
1364 struct json_serializer *);
1365 static void json_string_to_ds(const char *string, struct ds *);
1366
1367 /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
1368 * that string. The caller is responsible for freeing the returned string,
1369 * with free(), when it is no longer needed.
1370 *
1371 * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
1372 * nesting level introducing an additional indentation. Otherwise, the
1373 * returned string does not contain any new-line characters.
1374 *
1375 * If 'flags' contains JSSF_SORT, members of objects in the output are sorted
1376 * in bytewise lexicographic order for reproducibility. Otherwise, members of
1377 * objects are output in an indeterminate order.
1378 *
1379 * The returned string is valid JSON only if 'json' represents an array or an
1380 * object, since a bare literal does not satisfy the JSON grammar. */
1381 char *
1382 json_to_string(const struct json *json, int flags)
1383 {
1384 struct json_serializer s;
1385 ds_init(&s.ds);
1386 s.depth = 0;
1387 s.flags = flags;
1388 json_to_ds(json, &s);
1389 return ds_steal_cstr(&s.ds);
1390 }
1391
1392 static void
1393 json_to_ds(const struct json *json, struct json_serializer *s)
1394 {
1395 struct ds *ds = &s->ds;
1396
1397 switch (json->type) {
1398 case JSON_NULL:
1399 ds_put_cstr(ds, "null");
1400 break;
1401
1402 case JSON_FALSE:
1403 ds_put_cstr(ds, "false");
1404 break;
1405
1406 case JSON_TRUE:
1407 ds_put_cstr(ds, "true");
1408 break;
1409
1410 case JSON_OBJECT:
1411 json_object_to_ds(json->u.object, s);
1412 break;
1413
1414 case JSON_ARRAY:
1415 json_array_to_ds(&json->u.array, s);
1416 break;
1417
1418 case JSON_INTEGER:
1419 ds_put_format(ds, "%lld", json->u.integer);
1420 break;
1421
1422 case JSON_REAL:
1423 ds_put_format(ds, "%.*g", DBL_DIG, json->u.real);
1424 break;
1425
1426 case JSON_STRING:
1427 json_string_to_ds(json->u.string, ds);
1428 break;
1429
1430 case JSON_N_TYPES:
1431 default:
1432 NOT_REACHED();
1433 }
1434 }
1435
1436 static void
1437 indent_line(struct json_serializer *s)
1438 {
1439 if (s->flags & JSSF_PRETTY) {
1440 ds_put_char(&s->ds, '\n');
1441 ds_put_char_multiple(&s->ds, ' ', SPACES_PER_LEVEL * s->depth);
1442 }
1443 }
1444
1445 static void
1446 json_object_member_to_ds(size_t i, const struct shash_node *node,
1447 struct json_serializer *s)
1448 {
1449 struct ds *ds = &s->ds;
1450
1451 if (i) {
1452 ds_put_char(ds, ',');
1453 indent_line(s);
1454 }
1455
1456 json_string_to_ds(node->name, ds);
1457 ds_put_char(ds, ':');
1458 if (s->flags & JSSF_PRETTY) {
1459 ds_put_char(ds, ' ');
1460 }
1461 json_to_ds(node->data, s);
1462 }
1463
1464 static void
1465 json_object_to_ds(const struct shash *object, struct json_serializer *s)
1466 {
1467 struct ds *ds = &s->ds;
1468
1469 ds_put_char(ds, '{');
1470
1471 s->depth++;
1472 indent_line(s);
1473
1474 if (s->flags & JSSF_SORT) {
1475 const struct shash_node **nodes;
1476 size_t n, i;
1477
1478 nodes = shash_sort(object);
1479 n = shash_count(object);
1480 for (i = 0; i < n; i++) {
1481 json_object_member_to_ds(i, nodes[i], s);
1482 }
1483 free(nodes);
1484 } else {
1485 struct shash_node *node;
1486 size_t i;
1487
1488 i = 0;
1489 SHASH_FOR_EACH (node, object) {
1490 json_object_member_to_ds(i++, node, s);
1491 }
1492 }
1493
1494 ds_put_char(ds, '}');
1495 s->depth--;
1496 }
1497
1498 static void
1499 json_array_to_ds(const struct json_array *array, struct json_serializer *s)
1500 {
1501 struct ds *ds = &s->ds;
1502 size_t i;
1503
1504 ds_put_char(ds, '[');
1505 s->depth++;
1506
1507 if (array->n > 0) {
1508 indent_line(s);
1509
1510 for (i = 0; i < array->n; i++) {
1511 if (i) {
1512 ds_put_char(ds, ',');
1513 indent_line(s);
1514 }
1515 json_to_ds(array->elems[i], s);
1516 }
1517 }
1518
1519 s->depth--;
1520 ds_put_char(ds, ']');
1521 }
1522
1523 static void
1524 json_string_to_ds(const char *string, struct ds *ds)
1525 {
1526 uint8_t c;
1527
1528 ds_put_char(ds, '"');
1529 while ((c = *string++) != '\0') {
1530 switch (c) {
1531 case '"':
1532 ds_put_cstr(ds, "\\\"");
1533 break;
1534
1535 case '\\':
1536 ds_put_cstr(ds, "\\\\");
1537 break;
1538
1539 case '\b':
1540 ds_put_cstr(ds, "\\b");
1541 break;
1542
1543 case '\f':
1544 ds_put_cstr(ds, "\\f");
1545 break;
1546
1547 case '\n':
1548 ds_put_cstr(ds, "\\n");
1549 break;
1550
1551 case '\r':
1552 ds_put_cstr(ds, "\\r");
1553 break;
1554
1555 case '\t':
1556 ds_put_cstr(ds, "\\t");
1557 break;
1558
1559 default:
1560 if (c >= 32) {
1561 ds_put_char(ds, c);
1562 } else {
1563 ds_put_format(ds, "\\u%04x", c);
1564 }
1565 break;
1566 }
1567 }
1568 ds_put_char(ds, '"');
1569 }