]> git.proxmox.com Git - mirror_ovs.git/blob - lib/json.c
Merge "master" branch into "db".
[mirror_ovs.git] / lib / json.c
1 /*
2 * Copyright (c) 2009 Nicira Networks.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "json.h"
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <float.h>
25 #include <limits.h>
26 #include <math.h>
27 #include <string.h>
28
29 #include "dynamic-string.h"
30 #include "hash.h"
31 #include "shash.h"
32 #include "unicode.h"
33 #include "util.h"
34
35 /* The type of a JSON token. */
36 enum json_token_type {
37 T_EOF = 0,
38 T_BEGIN_ARRAY = '[',
39 T_END_ARRAY = ']',
40 T_BEGIN_OBJECT = '{',
41 T_END_OBJECT = '}',
42 T_NAME_SEPARATOR = ':',
43 T_VALUE_SEPARATOR = ',',
44 T_FALSE = UCHAR_MAX + 1,
45 T_NULL,
46 T_TRUE,
47 T_INTEGER,
48 T_REAL,
49 T_STRING
50 };
51
52 /* A JSON token.
53 *
54 * RFC 4627 doesn't define a lexical structure for JSON but I believe this to
55 * be compliant with the standard.
56 */
57 struct json_token {
58 enum json_token_type type;
59 union {
60 double real;
61 long long int integer;
62 const char *string;
63 } u;
64 };
65
66 enum json_lex_state {
67 JSON_LEX_START, /* Not inside a token. */
68 JSON_LEX_NUMBER, /* Reading a number. */
69 JSON_LEX_KEYWORD, /* Reading a keyword. */
70 JSON_LEX_STRING, /* Reading a quoted string. */
71 JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
72 };
73
74 enum json_parse_state {
75 JSON_PARSE_START, /* Beginning of input. */
76 JSON_PARSE_END, /* End of input. */
77
78 /* Objects. */
79 JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
80 JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
81 JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
82 JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
83 JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
84
85 /* Arrays. */
86 JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
87 JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
88 JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
89 };
90
91 struct json_parser_node {
92 struct json *json;
93 };
94
95 /* A JSON parser. */
96 struct json_parser {
97 int flags;
98
99 /* Lexical analysis. */
100 enum json_lex_state lex_state;
101 struct ds buffer; /* Buffer for accumulating token text. */
102
103 /* Parsing. */
104 enum json_parse_state parse_state;
105 #define JSON_MAX_HEIGHT 1000
106 struct json_parser_node *stack;
107 size_t height, allocated_height;
108 char *member_name;
109
110 /* Parse status. */
111 bool done;
112 char *error; /* Error message, if any, null if none yet. */
113 };
114
115 static struct json *json_create(enum json_type type);
116 static void json_parser_input(struct json_parser *, struct json_token *);
117
118 static void json_error(struct json_parser *p, const char *format, ...)
119 PRINTF_FORMAT(2, 3);
120 \f
121 const char *
122 json_type_to_string(enum json_type type)
123 {
124 switch (type) {
125 case JSON_NULL:
126 return "null";
127
128 case JSON_FALSE:
129 return "false";
130
131 case JSON_TRUE:
132 return "true";
133
134 case JSON_OBJECT:
135 return "object";
136
137 case JSON_ARRAY:
138 return "array";
139
140 case JSON_INTEGER:
141 case JSON_REAL:
142 return "number";
143
144 case JSON_STRING:
145 return "string";
146
147 case JSON_N_TYPES:
148 default:
149 return "<invalid>";
150 }
151 }
152 \f
153 /* Functions for manipulating struct json. */
154
155 struct json *
156 json_null_create(void)
157 {
158 return json_create(JSON_NULL);
159 }
160
161 struct json *
162 json_boolean_create(bool b)
163 {
164 return json_create(b ? JSON_TRUE : JSON_FALSE);
165 }
166
167 struct json *
168 json_string_create_nocopy(char *s)
169 {
170 struct json *json = json_create(JSON_STRING);
171 json->u.string = s;
172 return json;
173 }
174
175 struct json *
176 json_string_create(const char *s)
177 {
178 return json_string_create_nocopy(xstrdup(s));
179 }
180
181 struct json *
182 json_array_create_empty(void)
183 {
184 struct json *json = json_create(JSON_ARRAY);
185 json->u.array.elems = NULL;
186 json->u.array.n = 0;
187 json->u.array.n_allocated = 0;
188 return json;
189 }
190
191 void
192 json_array_add(struct json *array_, struct json *element)
193 {
194 struct json_array *array = json_array(array_);
195 if (array->n >= array->n_allocated) {
196 array->elems = x2nrealloc(array->elems, &array->n_allocated,
197 sizeof *array->elems);
198 }
199 array->elems[array->n++] = element;
200 }
201
202 void
203 json_array_trim(struct json *array_)
204 {
205 struct json_array *array = json_array(array_);
206 if (array->n < array->n_allocated){
207 array->n_allocated = array->n;
208 array->elems = xrealloc(array->elems, array->n * sizeof *array->elems);
209 }
210 }
211
212 struct json *
213 json_array_create(struct json **elements, size_t n)
214 {
215 struct json *json = json_create(JSON_ARRAY);
216 json->u.array.elems = elements;
217 json->u.array.n = n;
218 json->u.array.n_allocated = n;
219 return json;
220 }
221
222 struct json *
223 json_array_create_2(struct json *elem0, struct json *elem1)
224 {
225 struct json **elems = xmalloc(2 * sizeof *elems);
226 elems[0] = elem0;
227 elems[1] = elem1;
228 return json_array_create(elems, 2);
229 }
230
231 struct json *
232 json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
233 {
234 struct json **elems = xmalloc(3 * sizeof *elems);
235 elems[0] = elem0;
236 elems[1] = elem1;
237 elems[2] = elem2;
238 return json_array_create(elems, 3);
239 }
240
241 struct json *
242 json_object_create(void)
243 {
244 struct json *json = json_create(JSON_OBJECT);
245 json->u.object = xmalloc(sizeof *json->u.object);
246 shash_init(json->u.object);
247 return json;
248 }
249
250 struct json *
251 json_integer_create(long long int integer)
252 {
253 struct json *json = json_create(JSON_INTEGER);
254 json->u.integer = integer;
255 return json;
256 }
257
258 struct json *
259 json_real_create(double real)
260 {
261 struct json *json = json_create(JSON_REAL);
262 json->u.real = real;
263 return json;
264 }
265
266 void
267 json_object_put(struct json *json, const char *name, struct json *value)
268 {
269 shash_add(json->u.object, name, value);
270 }
271
272 void
273 json_object_put_string(struct json *json, const char *name, const char *value)
274 {
275 json_object_put(json, name, json_string_create(value));
276 }
277
278 const char *
279 json_string(const struct json *json)
280 {
281 assert(json->type == JSON_STRING);
282 return json->u.string;
283 }
284
285 struct json_array *
286 json_array(const struct json *json)
287 {
288 assert(json->type == JSON_ARRAY);
289 return (struct json_array *) &json->u.array;
290 }
291
292 struct shash *
293 json_object(const struct json *json)
294 {
295 assert(json->type == JSON_OBJECT);
296 return (struct shash *) json->u.object;
297 }
298
299 bool
300 json_boolean(const struct json *json)
301 {
302 assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
303 return json->type == JSON_TRUE;
304 }
305
306 double
307 json_real(const struct json *json)
308 {
309 assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
310 return json->type == JSON_REAL ? json->u.real : json->u.integer;
311 }
312
313 int64_t
314 json_integer(const struct json *json)
315 {
316 assert(json->type == JSON_INTEGER);
317 return json->u.integer;
318 }
319 \f
320 static void json_destroy_object(struct shash *object);
321 static void json_destroy_array(struct json_array *array);
322
323 /* Frees 'json' and everything it points to, recursively. */
324 void
325 json_destroy(struct json *json)
326 {
327 if (json) {
328 switch (json->type) {
329 case JSON_OBJECT:
330 json_destroy_object(json->u.object);
331 break;
332
333 case JSON_ARRAY:
334 json_destroy_array(&json->u.array);
335 break;
336
337 case JSON_STRING:
338 free(json->u.string);
339 break;
340
341 case JSON_NULL:
342 case JSON_FALSE:
343 case JSON_TRUE:
344 case JSON_INTEGER:
345 case JSON_REAL:
346 break;
347
348 case JSON_N_TYPES:
349 NOT_REACHED();
350 }
351 free(json);
352 }
353 }
354
355 static void
356 json_destroy_object(struct shash *object)
357 {
358 struct shash_node *node, *next;
359
360 SHASH_FOR_EACH_SAFE (node, next, object) {
361 struct json *value = node->data;
362
363 json_destroy(value);
364 shash_delete(object, node);
365 }
366 shash_destroy(object);
367 free(object);
368 }
369
370 static void
371 json_destroy_array(struct json_array *array)
372 {
373 size_t i;
374
375 for (i = 0; i < array->n; i++) {
376 json_destroy(array->elems[i]);
377 }
378 free(array->elems);
379 }
380 \f
381 static struct json *json_clone_object(const struct shash *object);
382 static struct json *json_clone_array(const struct json_array *array);
383
384 /* Returns a deep copy of 'json'. */
385 struct json *
386 json_clone(const struct json *json)
387 {
388 switch (json->type) {
389 case JSON_OBJECT:
390 return json_clone_object(json->u.object);
391
392 case JSON_ARRAY:
393 return json_clone_array(&json->u.array);
394
395 case JSON_STRING:
396 return json_string_create(json->u.string);
397
398 case JSON_NULL:
399 case JSON_FALSE:
400 case JSON_TRUE:
401 return json_create(json->type);
402
403 case JSON_INTEGER:
404 return json_integer_create(json->u.integer);
405
406 case JSON_REAL:
407 return json_real_create(json->u.real);
408
409 case JSON_N_TYPES:
410 default:
411 NOT_REACHED();
412 }
413 }
414
415 static struct json *
416 json_clone_object(const struct shash *object)
417 {
418 struct shash_node *node;
419 struct json *json;
420
421 json = json_object_create();
422 SHASH_FOR_EACH (node, object) {
423 struct json *value = node->data;
424 json_object_put(json, node->name, json_clone(value));
425 }
426 return json;
427 }
428
429 static struct json *
430 json_clone_array(const struct json_array *array)
431 {
432 struct json **elems;
433 size_t i;
434
435 elems = xmalloc(array->n * sizeof *elems);
436 for (i = 0; i < array->n; i++) {
437 elems[i] = json_clone(array->elems[i]);
438 }
439 return json_array_create(elems, array->n);
440 }
441 \f
442 static size_t
443 json_hash_object(const struct shash *object, size_t basis)
444 {
445 const struct shash_node **nodes;
446 size_t n, i;
447
448 nodes = shash_sort(object);
449 n = shash_count(object);
450 for (i = 0; i < n; i++) {
451 const struct shash_node *node = nodes[i];
452 basis = hash_string(node->name, basis);
453 basis = json_hash(node->data, basis);
454 }
455 return basis;
456 }
457
458 static size_t
459 json_hash_array(const struct json_array *array, size_t basis)
460 {
461 size_t i;
462
463 basis = hash_int(array->n, basis);
464 for (i = 0; i < array->n; i++) {
465 basis = json_hash(array->elems[i], basis);
466 }
467 return basis;
468 }
469
470 size_t
471 json_hash(const struct json *json, size_t basis)
472 {
473 switch (json->type) {
474 case JSON_OBJECT:
475 return json_hash_object(json->u.object, basis);
476
477 case JSON_ARRAY:
478 return json_hash_array(&json->u.array, basis);
479
480 case JSON_STRING:
481 return hash_string(json->u.string, basis);
482
483 case JSON_NULL:
484 case JSON_FALSE:
485 case JSON_TRUE:
486 return hash_int(json->type << 8, basis);
487
488 case JSON_INTEGER:
489 return hash_int(json->u.integer, basis);
490
491 case JSON_REAL:
492 return hash_double(json->u.real, basis);
493
494 case JSON_N_TYPES:
495 default:
496 NOT_REACHED();
497 }
498 }
499
500 static bool
501 json_equal_object(const struct shash *a, const struct shash *b)
502 {
503 struct shash_node *a_node;
504
505 if (shash_count(a) != shash_count(b)) {
506 return false;
507 }
508
509 SHASH_FOR_EACH (a_node, a) {
510 struct shash_node *b_node = shash_find(b, a_node->name);
511 if (!b_node || !json_equal(a_node->data, b_node->data)) {
512 return false;
513 }
514 }
515
516 return true;
517 }
518
519 static bool
520 json_equal_array(const struct json_array *a, const struct json_array *b)
521 {
522 size_t i;
523
524 if (a->n != b->n) {
525 return false;
526 }
527
528 for (i = 0; i < a->n; i++) {
529 if (!json_equal(a->elems[i], b->elems[i])) {
530 return false;
531 }
532 }
533
534 return true;
535 }
536
537 bool
538 json_equal(const struct json *a, const struct json *b)
539 {
540 if (a->type != b->type) {
541 return false;
542 }
543
544 switch (a->type) {
545 case JSON_OBJECT:
546 return json_equal_object(a->u.object, b->u.object);
547
548 case JSON_ARRAY:
549 return json_equal_array(&a->u.array, &b->u.array);
550
551 case JSON_STRING:
552 return !strcmp(a->u.string, b->u.string);
553
554 case JSON_NULL:
555 case JSON_FALSE:
556 case JSON_TRUE:
557 return true;
558
559 case JSON_INTEGER:
560 return a->u.integer == b->u.integer;
561
562 case JSON_REAL:
563 return a->u.real == b->u.real;
564
565 case JSON_N_TYPES:
566 default:
567 NOT_REACHED();
568 }
569 }
570 \f
571 /* Lexical analysis. */
572
573 static void
574 json_lex_keyword(struct json_parser *p)
575 {
576 struct json_token token;
577 const char *s;
578
579 s = ds_cstr(&p->buffer);
580 if (!strcmp(s, "false")) {
581 token.type = T_FALSE;
582 } else if (!strcmp(s, "true")) {
583 token.type = T_TRUE;
584 } else if (!strcmp(s, "null")) {
585 token.type = T_NULL;
586 } else {
587 json_error(p, "invalid keyword '%s'", s);
588 return;
589 }
590 json_parser_input(p, &token);
591 }
592
593 static void
594 json_lex_number(struct json_parser *p)
595 {
596 const char *cp = ds_cstr(&p->buffer);
597 unsigned long long int significand = 0;
598 int sig_digits = 0;
599 bool imprecise = false;
600 bool negative = false;
601 int pow10 = 0;
602
603 /* Leading minus sign. */
604 if (*cp == '-') {
605 negative = true;
606 cp++;
607 }
608
609 /* At least one integer digit, but 0 may not be used as a leading digit for
610 * a longer number. */
611 significand = 0;
612 sig_digits = 0;
613 if (*cp == '0') {
614 cp++;
615 if (isdigit(*cp)) {
616 json_error(p, "leading zeros not allowed");
617 return;
618 }
619 } else if (isdigit(*cp)) {
620 do {
621 if (significand <= ULLONG_MAX / 10) {
622 significand = significand * 10 + (*cp - '0');
623 sig_digits++;
624 } else {
625 pow10++;
626 if (*cp != '0') {
627 imprecise = true;
628 }
629 }
630 cp++;
631 } while (isdigit(*cp));
632 } else {
633 json_error(p, "'-' must be followed by digit");
634 return;
635 }
636
637 /* Optional fraction. */
638 if (*cp == '.') {
639 cp++;
640 if (!isdigit(*cp)) {
641 json_error(p, "decimal point must be followed by digit");
642 return;
643 }
644 do {
645 if (significand <= ULLONG_MAX / 10) {
646 significand = significand * 10 + (*cp - '0');
647 sig_digits++;
648 pow10--;
649 } else if (*cp != '0') {
650 imprecise = true;
651 }
652 cp++;
653 } while (isdigit(*cp));
654 }
655
656 /* Optional exponent. */
657 if (*cp == 'e' || *cp == 'E') {
658 bool negative_exponent = false;
659 int exponent;
660
661 cp++;
662 if (*cp == '+') {
663 cp++;
664 } else if (*cp == '-') {
665 negative_exponent = true;
666 cp++;
667 }
668
669 if (!isdigit(*cp)) {
670 json_error(p, "exponent must contain at least one digit");
671 return;
672 }
673
674 exponent = 0;
675 do {
676 if (exponent >= INT_MAX / 10) {
677 json_error(p, "exponent outside valid range");
678 return;
679 }
680 exponent = exponent * 10 + (*cp - '0');
681 cp++;
682 } while (isdigit(*cp));
683
684 if (negative_exponent) {
685 pow10 -= exponent;
686 } else {
687 pow10 += exponent;
688 }
689 }
690
691 if (*cp != '\0') {
692 json_error(p, "syntax error in number");
693 return;
694 }
695
696 /* Figure out number.
697 *
698 * We suppress negative zeros as a matter of policy. */
699 if (!significand) {
700 struct json_token token;
701 token.type = T_INTEGER;
702 token.u.integer = 0;
703 json_parser_input(p, &token);
704 return;
705 }
706
707 if (!imprecise) {
708 while (pow10 > 0 && significand < ULLONG_MAX / 10) {
709 significand *= 10;
710 sig_digits++;
711 pow10--;
712 }
713 while (pow10 < 0 && significand % 10 == 0) {
714 significand /= 10;
715 sig_digits--;
716 pow10++;
717 }
718 if (pow10 == 0
719 && significand <= (negative
720 ? (unsigned long long int) LLONG_MAX + 1
721 : LLONG_MAX)) {
722 struct json_token token;
723 token.type = T_INTEGER;
724 token.u.integer = negative ? -significand : significand;
725 json_parser_input(p, &token);
726 return;
727 }
728 }
729
730 if (pow10 + sig_digits <= DBL_MAX_10_EXP) {
731 struct json_token token;
732 token.type = T_REAL;
733 token.u.real = significand * pow(10.0, pow10);
734 if (token.u.real <= DBL_MAX) {
735 if (negative && token.u.real) {
736 token.u.real = -token.u.real;
737 }
738 json_parser_input(p, &token);
739 return;
740 }
741 }
742 json_error(p, "number outside valid range");
743 }
744
745 static bool
746 json_lex_4hex(struct json_parser *p, const char *cp, int *valuep)
747 {
748 int value, i;
749
750 value = 0;
751 for (i = 0; i < 4; i++) {
752 unsigned char c = *cp++;
753 if (!isxdigit(c)) {
754 json_error(p, "malformed \\u escape");
755 return false;
756 }
757 value = (value << 4) | hexit_value(c);
758 }
759 if (!value) {
760 json_error(p, "null bytes not supported in quoted strings");
761 return false;
762 }
763 *valuep = value;
764 return true;
765 }
766
767 static const char *
768 json_lex_unicode(struct json_parser *p, const char *cp, struct ds *s)
769 {
770 int c0, c1;
771
772 if (!json_lex_4hex(p, cp, &c0)) {
773 return NULL;
774 }
775 cp += 4;
776 if (!uc_is_leading_surrogate(c0)) {
777 ds_put_utf8(s, c0);
778 return cp;
779 }
780
781 if (*cp++ != '\\' || *cp++ != 'u') {
782 json_error(p, "malformed escaped surrogate pair");
783 return NULL;
784 }
785
786 if (!json_lex_4hex(p, cp, &c1)) {
787 return NULL;
788 }
789 cp += 4;
790 if (!uc_is_trailing_surrogate(c1)) {
791 json_error(p, "second half of escaped surrogate pair is not "
792 "trailing surrogate");
793 return NULL;
794 }
795
796 ds_put_utf8(s, utf16_decode_surrogate_pair(c0, c1));
797 return cp;
798 }
799
800 static void
801 json_lex_string(struct json_parser *p)
802 {
803 struct json_token token;
804 const char *cp;
805 struct ds s;
806
807 cp = ds_cstr(&p->buffer);
808 if (!strchr(cp, '\\')) {
809 token.type = T_STRING;
810 token.u.string = cp;
811 json_parser_input(p, &token);
812 return;
813 }
814
815 ds_init(&s);
816 ds_reserve(&s, strlen(cp));
817 while (*cp != '\0') {
818 if (*cp != '\\') {
819 ds_put_char(&s, *cp++);
820 continue;
821 }
822
823 cp++;
824 switch (*cp++) {
825 case '"': case '\\': case '/':
826 ds_put_char(&s, cp[-1]);
827 break;
828
829 case 'b':
830 ds_put_char(&s, '\b');
831 break;
832
833 case 'f':
834 ds_put_char(&s, '\f');
835 break;
836
837 case 'n':
838 ds_put_char(&s, '\n');
839 break;
840
841 case 'r':
842 ds_put_char(&s, '\r');
843 break;
844
845 case 't':
846 ds_put_char(&s, '\t');
847 break;
848
849 case 'u':
850 cp = json_lex_unicode(p, cp, &s);
851 if (!cp) {
852 goto exit;
853 }
854 break;
855
856 default:
857 json_error(p, "bad escape \\%c", cp[-1]);
858 goto exit;
859 }
860 }
861
862 token.type = T_STRING;
863 token.u.string = ds_cstr(&s);
864 json_parser_input(p, &token);
865
866 exit:
867 ds_destroy(&s);
868 return;
869 }
870
871 static bool
872 json_lex_input(struct json_parser *p, int c)
873 {
874 struct json_token token;
875
876 switch (p->lex_state) {
877 case JSON_LEX_START:
878 switch (c) {
879 case ' ': case '\t': case '\n': case '\r':
880 /* Nothing to do. */
881 return true;
882
883 case 'a': case 'b': case 'c': case 'd': case 'e':
884 case 'f': case 'g': case 'h': case 'i': case 'j':
885 case 'k': case 'l': case 'm': case 'n': case 'o':
886 case 'p': case 'q': case 'r': case 's': case 't':
887 case 'u': case 'v': case 'w': case 'x': case 'y':
888 case 'z':
889 p->lex_state = JSON_LEX_KEYWORD;
890 break;
891
892 case '[': case '{': case ']': case '}': case ':': case ',':
893 token.type = c;
894 json_parser_input(p, &token);
895 return true;
896
897 case '-':
898 case '0': case '1': case '2': case '3': case '4':
899 case '5': case '6': case '7': case '8': case '9':
900 p->lex_state = JSON_LEX_NUMBER;
901 break;
902
903 case '"':
904 p->lex_state = JSON_LEX_STRING;
905 return true;
906
907 default:
908 if (isprint(c)) {
909 json_error(p, "invalid character '%c'", c);
910 } else {
911 json_error(p, "invalid character U+%04x", c);
912 }
913 return true;
914 }
915 break;
916
917 case JSON_LEX_KEYWORD:
918 if (!isalpha((unsigned char) c)) {
919 json_lex_keyword(p);
920 return false;
921 }
922 break;
923
924 case JSON_LEX_NUMBER:
925 if (!strchr(".0123456789eE-+", c)) {
926 json_lex_number(p);
927 return false;
928 }
929 break;
930
931 case JSON_LEX_STRING:
932 if (c == '\\') {
933 p->lex_state = JSON_LEX_ESCAPE;
934 } else if (c == '"') {
935 json_lex_string(p);
936 return true;
937 } else if (c < 0x20) {
938 json_error(p, "U+%04X must be escaped in quoted string", c);
939 return true;
940 }
941 break;
942
943 case JSON_LEX_ESCAPE:
944 p->lex_state = JSON_LEX_STRING;
945 break;
946
947 default:
948 abort();
949 }
950 ds_put_char(&p->buffer, c);
951 return true;
952 }
953 \f
954 /* Parsing. */
955
956 /* Parses 'string' as a JSON object or array and returns a newly allocated
957 * 'struct json'. The caller must free the returned structure with
958 * json_destroy() when it is no longer needed.
959 *
960 * 'string' must be encoded in UTF-8.
961 *
962 * If 'string' is valid JSON, then the returned 'struct json' will be either an
963 * object (JSON_OBJECT) or an array (JSON_ARRAY).
964 *
965 * If 'string' is not valid JSON, then the returned 'struct json' will be a
966 * string (JSON_STRING) that describes the particular error encountered during
967 * parsing. (This is an acceptable means of error reporting because at its top
968 * level JSON must be either an object or an array; a bare string is not
969 * valid.) */
970 struct json *
971 json_from_string(const char *string)
972 {
973 struct json_parser *p = json_parser_create(JSPF_TRAILER);
974 json_parser_feed(p, string, strlen(string));
975 return json_parser_finish(p);
976 }
977
978 /* Reads the file named 'file_name', parses its contents as a JSON object or
979 * array, and returns a newly allocated 'struct json'. The caller must free
980 * the returned structure with json_destroy() when it is no longer needed.
981 *
982 * The file must be encoded in UTF-8.
983 *
984 * See json_from_string() for return value semantics.
985 */
986 struct json *
987 json_from_file(const char *file_name)
988 {
989 struct json_parser *p;
990 struct json *json;
991 FILE *stream;
992
993 /* Open file. */
994 stream = fopen(file_name, "r");
995 if (!stream) {
996 return json_string_create_nocopy(
997 xasprintf("error opening \"%s\": %s", file_name, strerror(errno)));
998 }
999
1000 /* Read and parse file. */
1001 p = json_parser_create(JSPF_TRAILER);
1002 for (;;) {
1003 char buffer[BUFSIZ];
1004 size_t n;
1005
1006 n = fread(buffer, 1, sizeof buffer, stream);
1007 if (!n || json_parser_feed(p, buffer, n) != n) {
1008 break;
1009 }
1010 }
1011 json = json_parser_finish(p);
1012
1013 /* Close file and check for I/O errors. */
1014 if (ferror(stream)) {
1015 json_destroy(json);
1016 json = json_string_create_nocopy(
1017 xasprintf("error reading \"%s\": %s", file_name, strerror(errno)));
1018 }
1019 fclose(stream);
1020
1021 return json;
1022 }
1023
1024 struct json_parser *
1025 json_parser_create(int flags)
1026 {
1027 struct json_parser *p = xzalloc(sizeof *p);
1028 p->flags = flags;
1029 return p;
1030 }
1031
1032 size_t
1033 json_parser_feed(struct json_parser *p, const char *input, size_t n)
1034 {
1035 size_t i;
1036 for (i = 0; !p->done && i < n; ) {
1037 if (json_lex_input(p, input[i])) {
1038 i++;
1039 }
1040 }
1041 return i;
1042 }
1043
1044 bool
1045 json_parser_is_done(const struct json_parser *p)
1046 {
1047 return p->done;
1048 }
1049
1050 struct json *
1051 json_parser_finish(struct json_parser *p)
1052 {
1053 struct json *json;
1054
1055 switch (p->lex_state) {
1056 case JSON_LEX_START:
1057 break;
1058
1059 case JSON_LEX_STRING:
1060 case JSON_LEX_ESCAPE:
1061 json_error(p, "unexpected end of input in quoted string");
1062 break;
1063
1064 case JSON_LEX_NUMBER:
1065 case JSON_LEX_KEYWORD:
1066 json_lex_input(p, ' ');
1067 break;
1068 }
1069
1070 if (p->parse_state == JSON_PARSE_START) {
1071 json_error(p, "empty input stream");
1072 } else if (p->parse_state != JSON_PARSE_END) {
1073 json_error(p, "unexpected end of input");
1074 }
1075
1076 if (!p->error) {
1077 assert(p->height == 1);
1078 assert(p->stack[0].json != NULL);
1079 json = p->stack[--p->height].json;
1080 } else {
1081 json = json_string_create_nocopy(p->error);
1082 p->error = NULL;
1083 }
1084
1085 json_parser_abort(p);
1086
1087 return json;
1088 }
1089
1090 void
1091 json_parser_abort(struct json_parser *p)
1092 {
1093 if (p) {
1094 ds_destroy(&p->buffer);
1095 if (p->height) {
1096 json_destroy(p->stack[0].json);
1097 }
1098 free(p->stack);
1099 free(p->member_name);
1100 free(p->error);
1101 free(p);
1102 }
1103 }
1104
1105 static struct json_parser_node *
1106 json_parser_top(struct json_parser *p)
1107 {
1108 return &p->stack[p->height - 1];
1109 }
1110
1111 static void
1112 json_parser_put_value(struct json_parser *p, struct json *value)
1113 {
1114 struct json_parser_node *node = json_parser_top(p);
1115 if (node->json->type == JSON_OBJECT) {
1116 json_object_put(node->json, p->member_name, value);
1117 free(p->member_name);
1118 p->member_name = NULL;
1119 } else if (node->json->type == JSON_ARRAY) {
1120 json_array_add(node->json, value);
1121 } else {
1122 NOT_REACHED();
1123 }
1124 }
1125
1126 static struct json_parser_node *
1127 json_parser_push(struct json_parser *p,
1128 struct json *new_json, enum json_parse_state new_state)
1129 {
1130 if (p->height < JSON_MAX_HEIGHT) {
1131 struct json_parser_node *node;
1132
1133 if (p->height >= p->allocated_height) {
1134 p->stack = x2nrealloc(p->stack, &p->allocated_height,
1135 sizeof *p->stack);
1136 }
1137
1138 if (p->height > 0) {
1139 json_parser_put_value(p, new_json);
1140 }
1141
1142 node = &p->stack[p->height++];
1143 node->json = new_json;
1144 p->parse_state = new_state;
1145 return node;
1146 } else {
1147 json_error(p, "input exceeds maximum nesting depth %d",
1148 JSON_MAX_HEIGHT);
1149 return NULL;
1150 }
1151 }
1152
1153 static void
1154 json_parser_push_object(struct json_parser *p)
1155 {
1156 json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
1157 }
1158
1159 static void
1160 json_parser_push_array(struct json_parser *p)
1161 {
1162 json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
1163 }
1164
1165 static void
1166 json_parse_value(struct json_parser *p, struct json_token *token,
1167 enum json_parse_state next_state)
1168 {
1169 struct json *value;
1170
1171 switch (token->type) {
1172 case T_FALSE:
1173 value = json_boolean_create(false);
1174 break;
1175
1176 case T_NULL:
1177 value = json_null_create();
1178 break;
1179
1180 case T_TRUE:
1181 value = json_boolean_create(true);
1182 break;
1183
1184 case '{':
1185 json_parser_push_object(p);
1186 return;
1187
1188 case '[':
1189 json_parser_push_array(p);
1190 return;
1191
1192 case T_INTEGER:
1193 value = json_integer_create(token->u.integer);
1194 break;
1195
1196 case T_REAL:
1197 value = json_real_create(token->u.real);
1198 break;
1199
1200 case T_STRING:
1201 value = json_string_create(token->u.string);
1202 break;
1203
1204 case T_EOF:
1205 case '}':
1206 case ']':
1207 case ':':
1208 case ',':
1209 default:
1210 json_error(p, "syntax error expecting value");
1211 return;
1212 }
1213
1214 json_parser_put_value(p, value);
1215 p->parse_state = next_state;
1216 }
1217
1218 static void
1219 json_parser_pop(struct json_parser *p)
1220 {
1221 struct json_parser_node *node;
1222
1223 /* Conserve memory. */
1224 node = json_parser_top(p);
1225 if (node->json->type == JSON_ARRAY) {
1226 json_array_trim(node->json);
1227 }
1228
1229 /* Pop off the top-of-stack. */
1230 if (p->height == 1) {
1231 p->parse_state = JSON_PARSE_END;
1232 if (!(p->flags & JSPF_TRAILER)) {
1233 p->done = true;
1234 }
1235 } else {
1236 p->height--;
1237 node = json_parser_top(p);
1238 if (node->json->type == JSON_ARRAY) {
1239 p->parse_state = JSON_PARSE_ARRAY_NEXT;
1240 } else if (node->json->type == JSON_OBJECT) {
1241 p->parse_state = JSON_PARSE_OBJECT_NEXT;
1242 } else {
1243 NOT_REACHED();
1244 }
1245 }
1246 }
1247
1248 static void
1249 json_parser_input(struct json_parser *p, struct json_token *token)
1250 {
1251 switch (p->parse_state) {
1252 case JSON_PARSE_START:
1253 if (token->type == '{') {
1254 json_parser_push_object(p);
1255 } else if (token->type == '[') {
1256 json_parser_push_array(p);
1257 } else {
1258 json_error(p, "syntax error at beginning of input");
1259 }
1260 break;
1261
1262 case JSON_PARSE_END:
1263 json_error(p, "trailing garbage at end of input");
1264 break;
1265
1266 case JSON_PARSE_OBJECT_INIT:
1267 if (token->type == '}') {
1268 json_parser_pop(p);
1269 break;
1270 }
1271 /* Fall through. */
1272 case JSON_PARSE_OBJECT_NAME:
1273 if (token->type == T_STRING) {
1274 p->member_name = xstrdup(token->u.string);
1275 p->parse_state = JSON_PARSE_OBJECT_COLON;
1276 } else {
1277 json_error(p, "syntax error parsing object expecting string");
1278 }
1279 break;
1280
1281 case JSON_PARSE_OBJECT_COLON:
1282 if (token->type == ':') {
1283 p->parse_state = JSON_PARSE_OBJECT_VALUE;
1284 } else {
1285 json_error(p, "syntax error parsing object expecting ':'");
1286 }
1287 break;
1288
1289 case JSON_PARSE_OBJECT_VALUE:
1290 json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
1291 break;
1292
1293 case JSON_PARSE_OBJECT_NEXT:
1294 if (token->type == ',') {
1295 p->parse_state = JSON_PARSE_OBJECT_NAME;
1296 } else if (token->type == '}') {
1297 json_parser_pop(p);
1298 } else {
1299 json_error(p, "syntax error expecting '}' or ','");
1300 }
1301 break;
1302
1303 case JSON_PARSE_ARRAY_INIT:
1304 if (token->type == ']') {
1305 json_parser_pop(p);
1306 break;
1307 }
1308 /* Fall through. */
1309 case JSON_PARSE_ARRAY_VALUE:
1310 json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
1311 break;
1312
1313 case JSON_PARSE_ARRAY_NEXT:
1314 if (token->type == ',') {
1315 p->parse_state = JSON_PARSE_ARRAY_VALUE;
1316 } else if (token->type == ']') {
1317 json_parser_pop(p);
1318 } else {
1319 json_error(p, "syntax error expecting ']' or ','");
1320 }
1321 break;
1322
1323 default:
1324 abort();
1325 }
1326
1327 p->lex_state = JSON_LEX_START;
1328 ds_clear(&p->buffer);
1329 }
1330
1331 static struct json *
1332 json_create(enum json_type type)
1333 {
1334 struct json *json = xmalloc(sizeof *json);
1335 json->type = type;
1336 return json;
1337 }
1338
1339 static void
1340 json_error(struct json_parser *p, const char *format, ...)
1341 {
1342 if (!p->error) {
1343 va_list args;
1344
1345 va_start(args, format);
1346 p->error = xvasprintf(format, args);
1347 va_end(args);
1348
1349 p->done = true;
1350 }
1351 }
1352 \f
1353 #define SPACES_PER_LEVEL 2
1354
1355 struct json_serializer {
1356 struct ds ds;
1357 int depth;
1358 int flags;
1359 };
1360
1361 static void json_to_ds(const struct json *, struct json_serializer *);
1362 static void json_object_to_ds(const struct shash *object,
1363 struct json_serializer *);
1364 static void json_array_to_ds(const struct json_array *,
1365 struct json_serializer *);
1366 static void json_string_to_ds(const char *string, struct ds *);
1367
1368 /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
1369 * that string. The caller is responsible for freeing the returned string,
1370 * with free(), when it is no longer needed.
1371 *
1372 * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
1373 * nesting level introducing an additional indentation. Otherwise, the
1374 * returned string does not contain any new-line characters.
1375 *
1376 * If 'flags' contains JSSF_SORT, members of objects in the output are sorted
1377 * in bytewise lexicographic order for reproducibility. Otherwise, members of
1378 * objects are output in an indeterminate order.
1379 *
1380 * The returned string is valid JSON only if 'json' represents an array or an
1381 * object, since a bare literal does not satisfy the JSON grammar. */
1382 char *
1383 json_to_string(const struct json *json, int flags)
1384 {
1385 struct json_serializer s;
1386 ds_init(&s.ds);
1387 s.depth = 0;
1388 s.flags = flags;
1389 json_to_ds(json, &s);
1390 return ds_steal_cstr(&s.ds);
1391 }
1392
1393 static void
1394 json_to_ds(const struct json *json, struct json_serializer *s)
1395 {
1396 struct ds *ds = &s->ds;
1397
1398 switch (json->type) {
1399 case JSON_NULL:
1400 ds_put_cstr(ds, "null");
1401 break;
1402
1403 case JSON_FALSE:
1404 ds_put_cstr(ds, "false");
1405 break;
1406
1407 case JSON_TRUE:
1408 ds_put_cstr(ds, "true");
1409 break;
1410
1411 case JSON_OBJECT:
1412 json_object_to_ds(json->u.object, s);
1413 break;
1414
1415 case JSON_ARRAY:
1416 json_array_to_ds(&json->u.array, s);
1417 break;
1418
1419 case JSON_INTEGER:
1420 ds_put_format(ds, "%lld", json->u.integer);
1421 break;
1422
1423 case JSON_REAL:
1424 ds_put_format(ds, "%.*g", DBL_DIG, json->u.real);
1425 break;
1426
1427 case JSON_STRING:
1428 json_string_to_ds(json->u.string, ds);
1429 break;
1430
1431 case JSON_N_TYPES:
1432 default:
1433 NOT_REACHED();
1434 }
1435 }
1436
1437 static void
1438 indent_line(struct json_serializer *s)
1439 {
1440 if (s->flags & JSSF_PRETTY) {
1441 ds_put_char(&s->ds, '\n');
1442 ds_put_char_multiple(&s->ds, ' ', SPACES_PER_LEVEL * s->depth);
1443 }
1444 }
1445
1446 static void
1447 json_object_member_to_ds(size_t i, const struct shash_node *node,
1448 struct json_serializer *s)
1449 {
1450 struct ds *ds = &s->ds;
1451
1452 if (i) {
1453 ds_put_char(ds, ',');
1454 indent_line(s);
1455 }
1456
1457 json_string_to_ds(node->name, ds);
1458 ds_put_char(ds, ':');
1459 if (s->flags & JSSF_PRETTY) {
1460 ds_put_char(ds, ' ');
1461 }
1462 json_to_ds(node->data, s);
1463 }
1464
1465 static void
1466 json_object_to_ds(const struct shash *object, struct json_serializer *s)
1467 {
1468 struct ds *ds = &s->ds;
1469
1470 ds_put_char(ds, '{');
1471
1472 s->depth++;
1473 indent_line(s);
1474
1475 if (s->flags & JSSF_SORT) {
1476 const struct shash_node **nodes;
1477 size_t n, i;
1478
1479 nodes = shash_sort(object);
1480 n = shash_count(object);
1481 for (i = 0; i < n; i++) {
1482 json_object_member_to_ds(i, nodes[i], s);
1483 }
1484 free(nodes);
1485 } else {
1486 struct shash_node *node;
1487 size_t i;
1488
1489 i = 0;
1490 SHASH_FOR_EACH (node, object) {
1491 json_object_member_to_ds(i++, node, s);
1492 }
1493 }
1494
1495 ds_put_char(ds, '}');
1496 s->depth--;
1497 }
1498
1499 static void
1500 json_array_to_ds(const struct json_array *array, struct json_serializer *s)
1501 {
1502 struct ds *ds = &s->ds;
1503 size_t i;
1504
1505 ds_put_char(ds, '[');
1506 s->depth++;
1507
1508 if (array->n > 0) {
1509 indent_line(s);
1510
1511 for (i = 0; i < array->n; i++) {
1512 if (i) {
1513 ds_put_char(ds, ',');
1514 indent_line(s);
1515 }
1516 json_to_ds(array->elems[i], s);
1517 }
1518 }
1519
1520 s->depth--;
1521 ds_put_char(ds, ']');
1522 }
1523
1524 static void
1525 json_string_to_ds(const char *string, struct ds *ds)
1526 {
1527 uint8_t c;
1528
1529 ds_put_char(ds, '"');
1530 while ((c = *string++) != '\0') {
1531 switch (c) {
1532 case '"':
1533 ds_put_cstr(ds, "\\\"");
1534 break;
1535
1536 case '\\':
1537 ds_put_cstr(ds, "\\\\");
1538 break;
1539
1540 case '\b':
1541 ds_put_cstr(ds, "\\b");
1542 break;
1543
1544 case '\f':
1545 ds_put_cstr(ds, "\\f");
1546 break;
1547
1548 case '\n':
1549 ds_put_cstr(ds, "\\n");
1550 break;
1551
1552 case '\r':
1553 ds_put_cstr(ds, "\\r");
1554 break;
1555
1556 case '\t':
1557 ds_put_cstr(ds, "\\t");
1558 break;
1559
1560 default:
1561 if (c >= 32) {
1562 ds_put_char(ds, c);
1563 } else {
1564 ds_put_format(ds, "\\u%04x", c);
1565 }
1566 break;
1567 }
1568 }
1569 ds_put_char(ds, '"');
1570 }