]> git.proxmox.com Git - mirror_ovs.git/blob - lib/json.c
json: New function json_serialized_length().
[mirror_ovs.git] / lib / json.c
1 /*
2 * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "json.h"
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <float.h>
24 #include <limits.h>
25 #include <string.h>
26
27 #include "dynamic-string.h"
28 #include "hash.h"
29 #include "shash.h"
30 #include "unicode.h"
31 #include "util.h"
32
33 /* The type of a JSON token. */
34 enum json_token_type {
35 T_EOF = 0,
36 T_BEGIN_ARRAY = '[',
37 T_END_ARRAY = ']',
38 T_BEGIN_OBJECT = '{',
39 T_END_OBJECT = '}',
40 T_NAME_SEPARATOR = ':',
41 T_VALUE_SEPARATOR = ',',
42 T_FALSE = UCHAR_MAX + 1,
43 T_NULL,
44 T_TRUE,
45 T_INTEGER,
46 T_REAL,
47 T_STRING
48 };
49
50 /* A JSON token.
51 *
52 * RFC 4627 doesn't define a lexical structure for JSON but I believe this to
53 * be compliant with the standard.
54 */
55 struct json_token {
56 enum json_token_type type;
57 union {
58 double real;
59 long long int integer;
60 const char *string;
61 } u;
62 };
63
64 enum json_lex_state {
65 JSON_LEX_START, /* Not inside a token. */
66 JSON_LEX_NUMBER, /* Reading a number. */
67 JSON_LEX_KEYWORD, /* Reading a keyword. */
68 JSON_LEX_STRING, /* Reading a quoted string. */
69 JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
70 };
71
72 enum json_parse_state {
73 JSON_PARSE_START, /* Beginning of input. */
74 JSON_PARSE_END, /* End of input. */
75
76 /* Objects. */
77 JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
78 JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
79 JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
80 JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
81 JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
82
83 /* Arrays. */
84 JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
85 JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
86 JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
87 };
88
89 struct json_parser_node {
90 struct json *json;
91 };
92
93 /* A JSON parser. */
94 struct json_parser {
95 int flags;
96
97 /* Lexical analysis. */
98 enum json_lex_state lex_state;
99 struct ds buffer; /* Buffer for accumulating token text. */
100 int line_number;
101 int column_number;
102 int byte_number;
103
104 /* Parsing. */
105 enum json_parse_state parse_state;
106 #define JSON_MAX_HEIGHT 1000
107 struct json_parser_node *stack;
108 size_t height, allocated_height;
109 char *member_name;
110
111 /* Parse status. */
112 bool done;
113 char *error; /* Error message, if any, null if none yet. */
114 };
115
116 static struct json *json_create(enum json_type type);
117 static void json_parser_input(struct json_parser *, struct json_token *);
118
119 static void json_error(struct json_parser *p, const char *format, ...)
120 PRINTF_FORMAT(2, 3);
121 \f
122 const char *
123 json_type_to_string(enum json_type type)
124 {
125 switch (type) {
126 case JSON_NULL:
127 return "null";
128
129 case JSON_FALSE:
130 return "false";
131
132 case JSON_TRUE:
133 return "true";
134
135 case JSON_OBJECT:
136 return "object";
137
138 case JSON_ARRAY:
139 return "array";
140
141 case JSON_INTEGER:
142 case JSON_REAL:
143 return "number";
144
145 case JSON_STRING:
146 return "string";
147
148 case JSON_N_TYPES:
149 default:
150 return "<invalid>";
151 }
152 }
153 \f
154 /* Functions for manipulating struct json. */
155
156 struct json *
157 json_null_create(void)
158 {
159 return json_create(JSON_NULL);
160 }
161
162 struct json *
163 json_boolean_create(bool b)
164 {
165 return json_create(b ? JSON_TRUE : JSON_FALSE);
166 }
167
168 struct json *
169 json_string_create_nocopy(char *s)
170 {
171 struct json *json = json_create(JSON_STRING);
172 json->u.string = s;
173 return json;
174 }
175
176 struct json *
177 json_string_create(const char *s)
178 {
179 return json_string_create_nocopy(xstrdup(s));
180 }
181
182 struct json *
183 json_array_create_empty(void)
184 {
185 struct json *json = json_create(JSON_ARRAY);
186 json->u.array.elems = NULL;
187 json->u.array.n = 0;
188 json->u.array.n_allocated = 0;
189 return json;
190 }
191
192 void
193 json_array_add(struct json *array_, struct json *element)
194 {
195 struct json_array *array = json_array(array_);
196 if (array->n >= array->n_allocated) {
197 array->elems = x2nrealloc(array->elems, &array->n_allocated,
198 sizeof *array->elems);
199 }
200 array->elems[array->n++] = element;
201 }
202
203 void
204 json_array_trim(struct json *array_)
205 {
206 struct json_array *array = json_array(array_);
207 if (array->n < array->n_allocated){
208 array->n_allocated = array->n;
209 array->elems = xrealloc(array->elems, array->n * sizeof *array->elems);
210 }
211 }
212
213 struct json *
214 json_array_create(struct json **elements, size_t n)
215 {
216 struct json *json = json_create(JSON_ARRAY);
217 json->u.array.elems = elements;
218 json->u.array.n = n;
219 json->u.array.n_allocated = n;
220 return json;
221 }
222
223 struct json *
224 json_array_create_1(struct json *elem0)
225 {
226 struct json **elems = xmalloc(sizeof *elems);
227 elems[0] = elem0;
228 return json_array_create(elems, 1);
229 }
230
231 struct json *
232 json_array_create_2(struct json *elem0, struct json *elem1)
233 {
234 struct json **elems = xmalloc(2 * sizeof *elems);
235 elems[0] = elem0;
236 elems[1] = elem1;
237 return json_array_create(elems, 2);
238 }
239
240 struct json *
241 json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
242 {
243 struct json **elems = xmalloc(3 * sizeof *elems);
244 elems[0] = elem0;
245 elems[1] = elem1;
246 elems[2] = elem2;
247 return json_array_create(elems, 3);
248 }
249
250 struct json *
251 json_object_create(void)
252 {
253 struct json *json = json_create(JSON_OBJECT);
254 json->u.object = xmalloc(sizeof *json->u.object);
255 shash_init(json->u.object);
256 return json;
257 }
258
259 struct json *
260 json_integer_create(long long int integer)
261 {
262 struct json *json = json_create(JSON_INTEGER);
263 json->u.integer = integer;
264 return json;
265 }
266
267 struct json *
268 json_real_create(double real)
269 {
270 struct json *json = json_create(JSON_REAL);
271 json->u.real = real;
272 return json;
273 }
274
275 void
276 json_object_put(struct json *json, const char *name, struct json *value)
277 {
278 json_destroy(shash_replace(json->u.object, name, value));
279 }
280
281 void
282 json_object_put_string(struct json *json, const char *name, const char *value)
283 {
284 json_object_put(json, name, json_string_create(value));
285 }
286
287 const char *
288 json_string(const struct json *json)
289 {
290 ovs_assert(json->type == JSON_STRING);
291 return json->u.string;
292 }
293
294 struct json_array *
295 json_array(const struct json *json)
296 {
297 ovs_assert(json->type == JSON_ARRAY);
298 return CONST_CAST(struct json_array *, &json->u.array);
299 }
300
301 struct shash *
302 json_object(const struct json *json)
303 {
304 ovs_assert(json->type == JSON_OBJECT);
305 return CONST_CAST(struct shash *, json->u.object);
306 }
307
308 bool
309 json_boolean(const struct json *json)
310 {
311 ovs_assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
312 return json->type == JSON_TRUE;
313 }
314
315 double
316 json_real(const struct json *json)
317 {
318 ovs_assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
319 return json->type == JSON_REAL ? json->u.real : json->u.integer;
320 }
321
322 int64_t
323 json_integer(const struct json *json)
324 {
325 ovs_assert(json->type == JSON_INTEGER);
326 return json->u.integer;
327 }
328 \f
329 static void json_destroy_object(struct shash *object);
330 static void json_destroy_array(struct json_array *array);
331
332 /* Frees 'json' and everything it points to, recursively. */
333 void
334 json_destroy(struct json *json)
335 {
336 if (json) {
337 switch (json->type) {
338 case JSON_OBJECT:
339 json_destroy_object(json->u.object);
340 break;
341
342 case JSON_ARRAY:
343 json_destroy_array(&json->u.array);
344 break;
345
346 case JSON_STRING:
347 free(json->u.string);
348 break;
349
350 case JSON_NULL:
351 case JSON_FALSE:
352 case JSON_TRUE:
353 case JSON_INTEGER:
354 case JSON_REAL:
355 break;
356
357 case JSON_N_TYPES:
358 NOT_REACHED();
359 }
360 free(json);
361 }
362 }
363
364 static void
365 json_destroy_object(struct shash *object)
366 {
367 struct shash_node *node, *next;
368
369 SHASH_FOR_EACH_SAFE (node, next, object) {
370 struct json *value = node->data;
371
372 json_destroy(value);
373 shash_delete(object, node);
374 }
375 shash_destroy(object);
376 free(object);
377 }
378
379 static void
380 json_destroy_array(struct json_array *array)
381 {
382 size_t i;
383
384 for (i = 0; i < array->n; i++) {
385 json_destroy(array->elems[i]);
386 }
387 free(array->elems);
388 }
389 \f
390 static struct json *json_clone_object(const struct shash *object);
391 static struct json *json_clone_array(const struct json_array *array);
392
393 /* Returns a deep copy of 'json'. */
394 struct json *
395 json_clone(const struct json *json)
396 {
397 switch (json->type) {
398 case JSON_OBJECT:
399 return json_clone_object(json->u.object);
400
401 case JSON_ARRAY:
402 return json_clone_array(&json->u.array);
403
404 case JSON_STRING:
405 return json_string_create(json->u.string);
406
407 case JSON_NULL:
408 case JSON_FALSE:
409 case JSON_TRUE:
410 return json_create(json->type);
411
412 case JSON_INTEGER:
413 return json_integer_create(json->u.integer);
414
415 case JSON_REAL:
416 return json_real_create(json->u.real);
417
418 case JSON_N_TYPES:
419 default:
420 NOT_REACHED();
421 }
422 }
423
424 static struct json *
425 json_clone_object(const struct shash *object)
426 {
427 struct shash_node *node;
428 struct json *json;
429
430 json = json_object_create();
431 SHASH_FOR_EACH (node, object) {
432 struct json *value = node->data;
433 json_object_put(json, node->name, json_clone(value));
434 }
435 return json;
436 }
437
438 static struct json *
439 json_clone_array(const struct json_array *array)
440 {
441 struct json **elems;
442 size_t i;
443
444 elems = xmalloc(array->n * sizeof *elems);
445 for (i = 0; i < array->n; i++) {
446 elems[i] = json_clone(array->elems[i]);
447 }
448 return json_array_create(elems, array->n);
449 }
450 \f
451 static size_t
452 json_hash_object(const struct shash *object, size_t basis)
453 {
454 const struct shash_node **nodes;
455 size_t n, i;
456
457 nodes = shash_sort(object);
458 n = shash_count(object);
459 for (i = 0; i < n; i++) {
460 const struct shash_node *node = nodes[i];
461 basis = hash_string(node->name, basis);
462 basis = json_hash(node->data, basis);
463 }
464 return basis;
465 }
466
467 static size_t
468 json_hash_array(const struct json_array *array, size_t basis)
469 {
470 size_t i;
471
472 basis = hash_int(array->n, basis);
473 for (i = 0; i < array->n; i++) {
474 basis = json_hash(array->elems[i], basis);
475 }
476 return basis;
477 }
478
479 size_t
480 json_hash(const struct json *json, size_t basis)
481 {
482 switch (json->type) {
483 case JSON_OBJECT:
484 return json_hash_object(json->u.object, basis);
485
486 case JSON_ARRAY:
487 return json_hash_array(&json->u.array, basis);
488
489 case JSON_STRING:
490 return hash_string(json->u.string, basis);
491
492 case JSON_NULL:
493 case JSON_FALSE:
494 case JSON_TRUE:
495 return hash_int(json->type << 8, basis);
496
497 case JSON_INTEGER:
498 return hash_int(json->u.integer, basis);
499
500 case JSON_REAL:
501 return hash_double(json->u.real, basis);
502
503 case JSON_N_TYPES:
504 default:
505 NOT_REACHED();
506 }
507 }
508
509 static bool
510 json_equal_object(const struct shash *a, const struct shash *b)
511 {
512 struct shash_node *a_node;
513
514 if (shash_count(a) != shash_count(b)) {
515 return false;
516 }
517
518 SHASH_FOR_EACH (a_node, a) {
519 struct shash_node *b_node = shash_find(b, a_node->name);
520 if (!b_node || !json_equal(a_node->data, b_node->data)) {
521 return false;
522 }
523 }
524
525 return true;
526 }
527
528 static bool
529 json_equal_array(const struct json_array *a, const struct json_array *b)
530 {
531 size_t i;
532
533 if (a->n != b->n) {
534 return false;
535 }
536
537 for (i = 0; i < a->n; i++) {
538 if (!json_equal(a->elems[i], b->elems[i])) {
539 return false;
540 }
541 }
542
543 return true;
544 }
545
546 bool
547 json_equal(const struct json *a, const struct json *b)
548 {
549 if (a->type != b->type) {
550 return false;
551 }
552
553 switch (a->type) {
554 case JSON_OBJECT:
555 return json_equal_object(a->u.object, b->u.object);
556
557 case JSON_ARRAY:
558 return json_equal_array(&a->u.array, &b->u.array);
559
560 case JSON_STRING:
561 return !strcmp(a->u.string, b->u.string);
562
563 case JSON_NULL:
564 case JSON_FALSE:
565 case JSON_TRUE:
566 return true;
567
568 case JSON_INTEGER:
569 return a->u.integer == b->u.integer;
570
571 case JSON_REAL:
572 return a->u.real == b->u.real;
573
574 case JSON_N_TYPES:
575 default:
576 NOT_REACHED();
577 }
578 }
579 \f
580 /* Lexical analysis. */
581
582 static void
583 json_lex_keyword(struct json_parser *p)
584 {
585 struct json_token token;
586 const char *s;
587
588 s = ds_cstr(&p->buffer);
589 if (!strcmp(s, "false")) {
590 token.type = T_FALSE;
591 } else if (!strcmp(s, "true")) {
592 token.type = T_TRUE;
593 } else if (!strcmp(s, "null")) {
594 token.type = T_NULL;
595 } else {
596 json_error(p, "invalid keyword '%s'", s);
597 return;
598 }
599 json_parser_input(p, &token);
600 }
601
602 static void
603 json_lex_number(struct json_parser *p)
604 {
605 const char *cp = ds_cstr(&p->buffer);
606 unsigned long long int significand = 0;
607 struct json_token token;
608 bool imprecise = false;
609 bool negative = false;
610 int pow10 = 0;
611
612 /* Leading minus sign. */
613 if (*cp == '-') {
614 negative = true;
615 cp++;
616 }
617
618 /* At least one integer digit, but 0 may not be used as a leading digit for
619 * a longer number. */
620 significand = 0;
621 if (*cp == '0') {
622 cp++;
623 if (isdigit(*cp)) {
624 json_error(p, "leading zeros not allowed");
625 return;
626 }
627 } else if (isdigit(*cp)) {
628 do {
629 if (significand <= ULLONG_MAX / 10) {
630 significand = significand * 10 + (*cp - '0');
631 } else {
632 pow10++;
633 if (*cp != '0') {
634 imprecise = true;
635 }
636 }
637 cp++;
638 } while (isdigit(*cp));
639 } else {
640 json_error(p, "'-' must be followed by digit");
641 return;
642 }
643
644 /* Optional fraction. */
645 if (*cp == '.') {
646 cp++;
647 if (!isdigit(*cp)) {
648 json_error(p, "decimal point must be followed by digit");
649 return;
650 }
651 do {
652 if (significand <= ULLONG_MAX / 10) {
653 significand = significand * 10 + (*cp - '0');
654 pow10--;
655 } else if (*cp != '0') {
656 imprecise = true;
657 }
658 cp++;
659 } while (isdigit(*cp));
660 }
661
662 /* Optional exponent. */
663 if (*cp == 'e' || *cp == 'E') {
664 bool negative_exponent = false;
665 int exponent;
666
667 cp++;
668 if (*cp == '+') {
669 cp++;
670 } else if (*cp == '-') {
671 negative_exponent = true;
672 cp++;
673 }
674
675 if (!isdigit(*cp)) {
676 json_error(p, "exponent must contain at least one digit");
677 return;
678 }
679
680 exponent = 0;
681 do {
682 if (exponent >= INT_MAX / 10) {
683 json_error(p, "exponent outside valid range");
684 return;
685 }
686 exponent = exponent * 10 + (*cp - '0');
687 cp++;
688 } while (isdigit(*cp));
689
690 if (negative_exponent) {
691 pow10 -= exponent;
692 } else {
693 pow10 += exponent;
694 }
695 }
696
697 if (*cp != '\0') {
698 json_error(p, "syntax error in number");
699 return;
700 }
701
702 /* Figure out number.
703 *
704 * We suppress negative zeros as a matter of policy. */
705 if (!significand) {
706 token.type = T_INTEGER;
707 token.u.integer = 0;
708 json_parser_input(p, &token);
709 return;
710 }
711
712 if (!imprecise) {
713 while (pow10 > 0 && significand < ULLONG_MAX / 10) {
714 significand *= 10;
715 pow10--;
716 }
717 while (pow10 < 0 && significand % 10 == 0) {
718 significand /= 10;
719 pow10++;
720 }
721 if (pow10 == 0
722 && significand <= (negative
723 ? (unsigned long long int) LLONG_MAX + 1
724 : LLONG_MAX)) {
725 token.type = T_INTEGER;
726 token.u.integer = negative ? -significand : significand;
727 json_parser_input(p, &token);
728 return;
729 }
730 }
731
732 token.type = T_REAL;
733 if (!str_to_double(ds_cstr(&p->buffer), &token.u.real)) {
734 json_error(p, "number outside valid range");
735 return;
736 }
737 /* Suppress negative zero. */
738 if (token.u.real == 0) {
739 token.u.real = 0;
740 }
741 json_parser_input(p, &token);
742 }
743
744 static const char *
745 json_lex_4hex(const char *cp, const char *end, int *valuep)
746 {
747 unsigned int value;
748
749 if (cp + 4 > end) {
750 return "quoted string ends within \\u escape";
751 }
752
753 value = hexits_value(cp, 4, NULL);
754 if (value == UINT_MAX) {
755 return "malformed \\u escape";
756 }
757 if (!value) {
758 return "null bytes not supported in quoted strings";
759 }
760 *valuep = value;
761 return NULL;
762 }
763
764 static const char *
765 json_lex_unicode(const char *cp, const char *end, struct ds *out)
766 {
767 const char *error;
768 int c0, c1;
769
770 error = json_lex_4hex(cp, end, &c0);
771 if (error) {
772 ds_clear(out);
773 ds_put_cstr(out, error);
774 return NULL;
775 }
776 cp += 4;
777 if (!uc_is_leading_surrogate(c0)) {
778 ds_put_utf8(out, c0);
779 return cp;
780 }
781
782 if (cp + 2 > end || *cp++ != '\\' || *cp++ != 'u') {
783 ds_clear(out);
784 ds_put_cstr(out, "malformed escaped surrogate pair");
785 return NULL;
786 }
787
788 error = json_lex_4hex(cp, end, &c1);
789 if (error) {
790 ds_clear(out);
791 ds_put_cstr(out, error);
792 return NULL;
793 }
794 cp += 4;
795 if (!uc_is_trailing_surrogate(c1)) {
796 ds_clear(out);
797 ds_put_cstr(out, "second half of escaped surrogate pair is not "
798 "trailing surrogate");
799 return NULL;
800 }
801
802 ds_put_utf8(out, utf16_decode_surrogate_pair(c0, c1));
803 return cp;
804 }
805
806 bool
807 json_string_unescape(const char *in, size_t in_len, char **outp)
808 {
809 const char *end = in + in_len;
810 bool ok = false;
811 struct ds out;
812
813 ds_init(&out);
814 ds_reserve(&out, in_len);
815 if (in_len > 0 && in[in_len - 1] == '\\') {
816 ds_put_cstr(&out, "quoted string may not end with backslash");
817 goto exit;
818 }
819 while (in < end) {
820 if (*in == '"') {
821 ds_clear(&out);
822 ds_put_cstr(&out, "quoted string may not include unescaped \"");
823 goto exit;
824 }
825 if (*in != '\\') {
826 ds_put_char(&out, *in++);
827 continue;
828 }
829
830 in++;
831 switch (*in++) {
832 case '"': case '\\': case '/':
833 ds_put_char(&out, in[-1]);
834 break;
835
836 case 'b':
837 ds_put_char(&out, '\b');
838 break;
839
840 case 'f':
841 ds_put_char(&out, '\f');
842 break;
843
844 case 'n':
845 ds_put_char(&out, '\n');
846 break;
847
848 case 'r':
849 ds_put_char(&out, '\r');
850 break;
851
852 case 't':
853 ds_put_char(&out, '\t');
854 break;
855
856 case 'u':
857 in = json_lex_unicode(in, end, &out);
858 if (!in) {
859 goto exit;
860 }
861 break;
862
863 default:
864 ds_clear(&out);
865 ds_put_format(&out, "bad escape \\%c", in[-1]);
866 goto exit;
867 }
868 }
869 ok = true;
870
871 exit:
872 *outp = ds_cstr(&out);
873 return ok;
874 }
875
876 static void
877 json_parser_input_string(struct json_parser *p, const char *s)
878 {
879 struct json_token token;
880
881 token.type = T_STRING;
882 token.u.string = s;
883 json_parser_input(p, &token);
884 }
885
886 static void
887 json_lex_string(struct json_parser *p)
888 {
889 const char *raw = ds_cstr(&p->buffer);
890 if (!strchr(raw, '\\')) {
891 json_parser_input_string(p, raw);
892 } else {
893 char *cooked;
894
895 if (json_string_unescape(raw, strlen(raw), &cooked)) {
896 json_parser_input_string(p, cooked);
897 } else {
898 json_error(p, "%s", cooked);
899 }
900
901 free(cooked);
902 }
903 }
904
905 static bool
906 json_lex_input(struct json_parser *p, unsigned char c)
907 {
908 struct json_token token;
909
910 switch (p->lex_state) {
911 case JSON_LEX_START:
912 switch (c) {
913 case ' ': case '\t': case '\n': case '\r':
914 /* Nothing to do. */
915 return true;
916
917 case 'a': case 'b': case 'c': case 'd': case 'e':
918 case 'f': case 'g': case 'h': case 'i': case 'j':
919 case 'k': case 'l': case 'm': case 'n': case 'o':
920 case 'p': case 'q': case 'r': case 's': case 't':
921 case 'u': case 'v': case 'w': case 'x': case 'y':
922 case 'z':
923 p->lex_state = JSON_LEX_KEYWORD;
924 break;
925
926 case '[': case '{': case ']': case '}': case ':': case ',':
927 token.type = c;
928 json_parser_input(p, &token);
929 return true;
930
931 case '-':
932 case '0': case '1': case '2': case '3': case '4':
933 case '5': case '6': case '7': case '8': case '9':
934 p->lex_state = JSON_LEX_NUMBER;
935 break;
936
937 case '"':
938 p->lex_state = JSON_LEX_STRING;
939 return true;
940
941 default:
942 if (isprint(c)) {
943 json_error(p, "invalid character '%c'", c);
944 } else {
945 json_error(p, "invalid character U+%04x", c);
946 }
947 return true;
948 }
949 break;
950
951 case JSON_LEX_KEYWORD:
952 if (!isalpha((unsigned char) c)) {
953 json_lex_keyword(p);
954 return false;
955 }
956 break;
957
958 case JSON_LEX_NUMBER:
959 if (!strchr(".0123456789eE-+", c)) {
960 json_lex_number(p);
961 return false;
962 }
963 break;
964
965 case JSON_LEX_STRING:
966 if (c == '\\') {
967 p->lex_state = JSON_LEX_ESCAPE;
968 } else if (c == '"') {
969 json_lex_string(p);
970 return true;
971 } else if (c < 0x20) {
972 json_error(p, "U+%04X must be escaped in quoted string", c);
973 return true;
974 }
975 break;
976
977 case JSON_LEX_ESCAPE:
978 p->lex_state = JSON_LEX_STRING;
979 break;
980
981 default:
982 abort();
983 }
984 ds_put_char(&p->buffer, c);
985 return true;
986 }
987 \f
988 /* Parsing. */
989
990 /* Parses 'string' as a JSON object or array and returns a newly allocated
991 * 'struct json'. The caller must free the returned structure with
992 * json_destroy() when it is no longer needed.
993 *
994 * 'string' must be encoded in UTF-8.
995 *
996 * If 'string' is valid JSON, then the returned 'struct json' will be either an
997 * object (JSON_OBJECT) or an array (JSON_ARRAY).
998 *
999 * If 'string' is not valid JSON, then the returned 'struct json' will be a
1000 * string (JSON_STRING) that describes the particular error encountered during
1001 * parsing. (This is an acceptable means of error reporting because at its top
1002 * level JSON must be either an object or an array; a bare string is not
1003 * valid.) */
1004 struct json *
1005 json_from_string(const char *string)
1006 {
1007 struct json_parser *p = json_parser_create(JSPF_TRAILER);
1008 json_parser_feed(p, string, strlen(string));
1009 return json_parser_finish(p);
1010 }
1011
1012 /* Reads the file named 'file_name', parses its contents as a JSON object or
1013 * array, and returns a newly allocated 'struct json'. The caller must free
1014 * the returned structure with json_destroy() when it is no longer needed.
1015 *
1016 * The file must be encoded in UTF-8.
1017 *
1018 * See json_from_string() for return value semantics.
1019 */
1020 struct json *
1021 json_from_file(const char *file_name)
1022 {
1023 struct json *json;
1024 FILE *stream;
1025
1026 stream = fopen(file_name, "r");
1027 if (!stream) {
1028 return json_string_create_nocopy(
1029 xasprintf("error opening \"%s\": %s", file_name, strerror(errno)));
1030 }
1031 json = json_from_stream(stream);
1032 fclose(stream);
1033
1034 return json;
1035 }
1036
1037 /* Parses the contents of 'stream' as a JSON object or array, and returns a
1038 * newly allocated 'struct json'. The caller must free the returned structure
1039 * with json_destroy() when it is no longer needed.
1040 *
1041 * The file must be encoded in UTF-8.
1042 *
1043 * See json_from_string() for return value semantics.
1044 */
1045 struct json *
1046 json_from_stream(FILE *stream)
1047 {
1048 struct json_parser *p;
1049 struct json *json;
1050
1051 p = json_parser_create(JSPF_TRAILER);
1052 for (;;) {
1053 char buffer[BUFSIZ];
1054 size_t n;
1055
1056 n = fread(buffer, 1, sizeof buffer, stream);
1057 if (!n || json_parser_feed(p, buffer, n) != n) {
1058 break;
1059 }
1060 }
1061 json = json_parser_finish(p);
1062
1063 if (ferror(stream)) {
1064 json_destroy(json);
1065 json = json_string_create_nocopy(
1066 xasprintf("error reading JSON stream: %s", strerror(errno)));
1067 }
1068
1069 return json;
1070 }
1071
1072 struct json_parser *
1073 json_parser_create(int flags)
1074 {
1075 struct json_parser *p = xzalloc(sizeof *p);
1076 p->flags = flags;
1077 return p;
1078 }
1079
1080 size_t
1081 json_parser_feed(struct json_parser *p, const char *input, size_t n)
1082 {
1083 size_t i;
1084 for (i = 0; !p->done && i < n; ) {
1085 if (json_lex_input(p, input[i])) {
1086 p->byte_number++;
1087 if (input[i] == '\n') {
1088 p->column_number = 0;
1089 p->line_number++;
1090 } else {
1091 p->column_number++;
1092 }
1093 i++;
1094 }
1095 }
1096 return i;
1097 }
1098
1099 bool
1100 json_parser_is_done(const struct json_parser *p)
1101 {
1102 return p->done;
1103 }
1104
1105 struct json *
1106 json_parser_finish(struct json_parser *p)
1107 {
1108 struct json *json;
1109
1110 switch (p->lex_state) {
1111 case JSON_LEX_START:
1112 break;
1113
1114 case JSON_LEX_STRING:
1115 case JSON_LEX_ESCAPE:
1116 json_error(p, "unexpected end of input in quoted string");
1117 break;
1118
1119 case JSON_LEX_NUMBER:
1120 case JSON_LEX_KEYWORD:
1121 json_lex_input(p, ' ');
1122 break;
1123 }
1124
1125 if (p->parse_state == JSON_PARSE_START) {
1126 json_error(p, "empty input stream");
1127 } else if (p->parse_state != JSON_PARSE_END) {
1128 json_error(p, "unexpected end of input");
1129 }
1130
1131 if (!p->error) {
1132 ovs_assert(p->height == 1);
1133 ovs_assert(p->stack[0].json != NULL);
1134 json = p->stack[--p->height].json;
1135 } else {
1136 json = json_string_create_nocopy(p->error);
1137 p->error = NULL;
1138 }
1139
1140 json_parser_abort(p);
1141
1142 return json;
1143 }
1144
1145 void
1146 json_parser_abort(struct json_parser *p)
1147 {
1148 if (p) {
1149 ds_destroy(&p->buffer);
1150 if (p->height) {
1151 json_destroy(p->stack[0].json);
1152 }
1153 free(p->stack);
1154 free(p->member_name);
1155 free(p->error);
1156 free(p);
1157 }
1158 }
1159
1160 static struct json_parser_node *
1161 json_parser_top(struct json_parser *p)
1162 {
1163 return &p->stack[p->height - 1];
1164 }
1165
1166 static void
1167 json_parser_put_value(struct json_parser *p, struct json *value)
1168 {
1169 struct json_parser_node *node = json_parser_top(p);
1170 if (node->json->type == JSON_OBJECT) {
1171 json_object_put(node->json, p->member_name, value);
1172 free(p->member_name);
1173 p->member_name = NULL;
1174 } else if (node->json->type == JSON_ARRAY) {
1175 json_array_add(node->json, value);
1176 } else {
1177 NOT_REACHED();
1178 }
1179 }
1180
1181 static void
1182 json_parser_push(struct json_parser *p,
1183 struct json *new_json, enum json_parse_state new_state)
1184 {
1185 if (p->height < JSON_MAX_HEIGHT) {
1186 struct json_parser_node *node;
1187
1188 if (p->height >= p->allocated_height) {
1189 p->stack = x2nrealloc(p->stack, &p->allocated_height,
1190 sizeof *p->stack);
1191 }
1192
1193 if (p->height > 0) {
1194 json_parser_put_value(p, new_json);
1195 }
1196
1197 node = &p->stack[p->height++];
1198 node->json = new_json;
1199 p->parse_state = new_state;
1200 } else {
1201 json_destroy(new_json);
1202 json_error(p, "input exceeds maximum nesting depth %d",
1203 JSON_MAX_HEIGHT);
1204 }
1205 }
1206
1207 static void
1208 json_parser_push_object(struct json_parser *p)
1209 {
1210 json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
1211 }
1212
1213 static void
1214 json_parser_push_array(struct json_parser *p)
1215 {
1216 json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
1217 }
1218
1219 static void
1220 json_parse_value(struct json_parser *p, struct json_token *token,
1221 enum json_parse_state next_state)
1222 {
1223 struct json *value;
1224
1225 switch (token->type) {
1226 case T_FALSE:
1227 value = json_boolean_create(false);
1228 break;
1229
1230 case T_NULL:
1231 value = json_null_create();
1232 break;
1233
1234 case T_TRUE:
1235 value = json_boolean_create(true);
1236 break;
1237
1238 case '{':
1239 json_parser_push_object(p);
1240 return;
1241
1242 case '[':
1243 json_parser_push_array(p);
1244 return;
1245
1246 case T_INTEGER:
1247 value = json_integer_create(token->u.integer);
1248 break;
1249
1250 case T_REAL:
1251 value = json_real_create(token->u.real);
1252 break;
1253
1254 case T_STRING:
1255 value = json_string_create(token->u.string);
1256 break;
1257
1258 case T_EOF:
1259 case '}':
1260 case ']':
1261 case ':':
1262 case ',':
1263 default:
1264 json_error(p, "syntax error expecting value");
1265 return;
1266 }
1267
1268 json_parser_put_value(p, value);
1269 p->parse_state = next_state;
1270 }
1271
1272 static void
1273 json_parser_pop(struct json_parser *p)
1274 {
1275 struct json_parser_node *node;
1276
1277 /* Conserve memory. */
1278 node = json_parser_top(p);
1279 if (node->json->type == JSON_ARRAY) {
1280 json_array_trim(node->json);
1281 }
1282
1283 /* Pop off the top-of-stack. */
1284 if (p->height == 1) {
1285 p->parse_state = JSON_PARSE_END;
1286 if (!(p->flags & JSPF_TRAILER)) {
1287 p->done = true;
1288 }
1289 } else {
1290 p->height--;
1291 node = json_parser_top(p);
1292 if (node->json->type == JSON_ARRAY) {
1293 p->parse_state = JSON_PARSE_ARRAY_NEXT;
1294 } else if (node->json->type == JSON_OBJECT) {
1295 p->parse_state = JSON_PARSE_OBJECT_NEXT;
1296 } else {
1297 NOT_REACHED();
1298 }
1299 }
1300 }
1301
1302 static void
1303 json_parser_input(struct json_parser *p, struct json_token *token)
1304 {
1305 switch (p->parse_state) {
1306 case JSON_PARSE_START:
1307 if (token->type == '{') {
1308 json_parser_push_object(p);
1309 } else if (token->type == '[') {
1310 json_parser_push_array(p);
1311 } else {
1312 json_error(p, "syntax error at beginning of input");
1313 }
1314 break;
1315
1316 case JSON_PARSE_END:
1317 json_error(p, "trailing garbage at end of input");
1318 break;
1319
1320 case JSON_PARSE_OBJECT_INIT:
1321 if (token->type == '}') {
1322 json_parser_pop(p);
1323 break;
1324 }
1325 /* Fall through. */
1326 case JSON_PARSE_OBJECT_NAME:
1327 if (token->type == T_STRING) {
1328 p->member_name = xstrdup(token->u.string);
1329 p->parse_state = JSON_PARSE_OBJECT_COLON;
1330 } else {
1331 json_error(p, "syntax error parsing object expecting string");
1332 }
1333 break;
1334
1335 case JSON_PARSE_OBJECT_COLON:
1336 if (token->type == ':') {
1337 p->parse_state = JSON_PARSE_OBJECT_VALUE;
1338 } else {
1339 json_error(p, "syntax error parsing object expecting ':'");
1340 }
1341 break;
1342
1343 case JSON_PARSE_OBJECT_VALUE:
1344 json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
1345 break;
1346
1347 case JSON_PARSE_OBJECT_NEXT:
1348 if (token->type == ',') {
1349 p->parse_state = JSON_PARSE_OBJECT_NAME;
1350 } else if (token->type == '}') {
1351 json_parser_pop(p);
1352 } else {
1353 json_error(p, "syntax error expecting '}' or ','");
1354 }
1355 break;
1356
1357 case JSON_PARSE_ARRAY_INIT:
1358 if (token->type == ']') {
1359 json_parser_pop(p);
1360 break;
1361 }
1362 /* Fall through. */
1363 case JSON_PARSE_ARRAY_VALUE:
1364 json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
1365 break;
1366
1367 case JSON_PARSE_ARRAY_NEXT:
1368 if (token->type == ',') {
1369 p->parse_state = JSON_PARSE_ARRAY_VALUE;
1370 } else if (token->type == ']') {
1371 json_parser_pop(p);
1372 } else {
1373 json_error(p, "syntax error expecting ']' or ','");
1374 }
1375 break;
1376
1377 default:
1378 abort();
1379 }
1380
1381 p->lex_state = JSON_LEX_START;
1382 ds_clear(&p->buffer);
1383 }
1384
1385 static struct json *
1386 json_create(enum json_type type)
1387 {
1388 struct json *json = xmalloc(sizeof *json);
1389 json->type = type;
1390 return json;
1391 }
1392
1393 static void
1394 json_error(struct json_parser *p, const char *format, ...)
1395 {
1396 if (!p->error) {
1397 struct ds msg;
1398 va_list args;
1399
1400 ds_init(&msg);
1401 ds_put_format(&msg, "line %d, column %d, byte %d: ",
1402 p->line_number, p->column_number, p->byte_number);
1403 va_start(args, format);
1404 ds_put_format_valist(&msg, format, args);
1405 va_end(args);
1406
1407 p->error = ds_steal_cstr(&msg);
1408
1409 p->done = true;
1410 }
1411 }
1412 \f
1413 #define SPACES_PER_LEVEL 2
1414
1415 struct json_serializer {
1416 struct ds *ds;
1417 int depth;
1418 int flags;
1419 };
1420
1421 static void json_serialize(const struct json *, struct json_serializer *);
1422 static void json_serialize_object(const struct shash *object,
1423 struct json_serializer *);
1424 static void json_serialize_array(const struct json_array *,
1425 struct json_serializer *);
1426 static void json_serialize_string(const char *, struct ds *);
1427
1428 /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
1429 * that string. The caller is responsible for freeing the returned string,
1430 * with free(), when it is no longer needed.
1431 *
1432 * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
1433 * nesting level introducing an additional indentation. Otherwise, the
1434 * returned string does not contain any new-line characters.
1435 *
1436 * If 'flags' contains JSSF_SORT, members of objects in the output are sorted
1437 * in bytewise lexicographic order for reproducibility. Otherwise, members of
1438 * objects are output in an indeterminate order.
1439 *
1440 * The returned string is valid JSON only if 'json' represents an array or an
1441 * object, since a bare literal does not satisfy the JSON grammar. */
1442 char *
1443 json_to_string(const struct json *json, int flags)
1444 {
1445 struct ds ds;
1446
1447 ds_init(&ds);
1448 json_to_ds(json, flags, &ds);
1449 return ds_steal_cstr(&ds);
1450 }
1451
1452 /* Same as json_to_string(), but the output is appended to 'ds'. */
1453 void
1454 json_to_ds(const struct json *json, int flags, struct ds *ds)
1455 {
1456 struct json_serializer s;
1457
1458 s.ds = ds;
1459 s.depth = 0;
1460 s.flags = flags;
1461 json_serialize(json, &s);
1462 }
1463
1464 static void
1465 json_serialize(const struct json *json, struct json_serializer *s)
1466 {
1467 struct ds *ds = s->ds;
1468
1469 switch (json->type) {
1470 case JSON_NULL:
1471 ds_put_cstr(ds, "null");
1472 break;
1473
1474 case JSON_FALSE:
1475 ds_put_cstr(ds, "false");
1476 break;
1477
1478 case JSON_TRUE:
1479 ds_put_cstr(ds, "true");
1480 break;
1481
1482 case JSON_OBJECT:
1483 json_serialize_object(json->u.object, s);
1484 break;
1485
1486 case JSON_ARRAY:
1487 json_serialize_array(&json->u.array, s);
1488 break;
1489
1490 case JSON_INTEGER:
1491 ds_put_format(ds, "%lld", json->u.integer);
1492 break;
1493
1494 case JSON_REAL:
1495 ds_put_format(ds, "%.*g", DBL_DIG, json->u.real);
1496 break;
1497
1498 case JSON_STRING:
1499 json_serialize_string(json->u.string, ds);
1500 break;
1501
1502 case JSON_N_TYPES:
1503 default:
1504 NOT_REACHED();
1505 }
1506 }
1507
1508 static void
1509 indent_line(struct json_serializer *s)
1510 {
1511 if (s->flags & JSSF_PRETTY) {
1512 ds_put_char(s->ds, '\n');
1513 ds_put_char_multiple(s->ds, ' ', SPACES_PER_LEVEL * s->depth);
1514 }
1515 }
1516
1517 static void
1518 json_serialize_object_member(size_t i, const struct shash_node *node,
1519 struct json_serializer *s)
1520 {
1521 struct ds *ds = s->ds;
1522
1523 if (i) {
1524 ds_put_char(ds, ',');
1525 indent_line(s);
1526 }
1527
1528 json_serialize_string(node->name, ds);
1529 ds_put_char(ds, ':');
1530 if (s->flags & JSSF_PRETTY) {
1531 ds_put_char(ds, ' ');
1532 }
1533 json_serialize(node->data, s);
1534 }
1535
1536 static void
1537 json_serialize_object(const struct shash *object, struct json_serializer *s)
1538 {
1539 struct ds *ds = s->ds;
1540
1541 ds_put_char(ds, '{');
1542
1543 s->depth++;
1544 indent_line(s);
1545
1546 if (s->flags & JSSF_SORT) {
1547 const struct shash_node **nodes;
1548 size_t n, i;
1549
1550 nodes = shash_sort(object);
1551 n = shash_count(object);
1552 for (i = 0; i < n; i++) {
1553 json_serialize_object_member(i, nodes[i], s);
1554 }
1555 free(nodes);
1556 } else {
1557 struct shash_node *node;
1558 size_t i;
1559
1560 i = 0;
1561 SHASH_FOR_EACH (node, object) {
1562 json_serialize_object_member(i++, node, s);
1563 }
1564 }
1565
1566 ds_put_char(ds, '}');
1567 s->depth--;
1568 }
1569
1570 static void
1571 json_serialize_array(const struct json_array *array, struct json_serializer *s)
1572 {
1573 struct ds *ds = s->ds;
1574 size_t i;
1575
1576 ds_put_char(ds, '[');
1577 s->depth++;
1578
1579 if (array->n > 0) {
1580 indent_line(s);
1581
1582 for (i = 0; i < array->n; i++) {
1583 if (i) {
1584 ds_put_char(ds, ',');
1585 indent_line(s);
1586 }
1587 json_serialize(array->elems[i], s);
1588 }
1589 }
1590
1591 s->depth--;
1592 ds_put_char(ds, ']');
1593 }
1594
1595 static void
1596 json_serialize_string(const char *string, struct ds *ds)
1597 {
1598 uint8_t c;
1599
1600 ds_put_char(ds, '"');
1601 while ((c = *string++) != '\0') {
1602 switch (c) {
1603 case '"':
1604 ds_put_cstr(ds, "\\\"");
1605 break;
1606
1607 case '\\':
1608 ds_put_cstr(ds, "\\\\");
1609 break;
1610
1611 case '\b':
1612 ds_put_cstr(ds, "\\b");
1613 break;
1614
1615 case '\f':
1616 ds_put_cstr(ds, "\\f");
1617 break;
1618
1619 case '\n':
1620 ds_put_cstr(ds, "\\n");
1621 break;
1622
1623 case '\r':
1624 ds_put_cstr(ds, "\\r");
1625 break;
1626
1627 case '\t':
1628 ds_put_cstr(ds, "\\t");
1629 break;
1630
1631 default:
1632 if (c >= 32) {
1633 ds_put_char(ds, c);
1634 } else {
1635 ds_put_format(ds, "\\u%04x", c);
1636 }
1637 break;
1638 }
1639 }
1640 ds_put_char(ds, '"');
1641 }
1642 \f
1643 static size_t
1644 json_string_serialized_length(const char *string)
1645 {
1646 size_t length;
1647 uint8_t c;
1648
1649 length = strlen("\"\"");
1650
1651 while ((c = *string++) != '\0') {
1652 switch (c) {
1653 case '"':
1654 case '\\':
1655 case '\b':
1656 case '\f':
1657 case '\n':
1658 case '\r':
1659 case '\t':
1660 length += 2;
1661 break;
1662
1663 default:
1664 if (c >= 32) {
1665 length++;
1666 } else {
1667 /* \uXXXX */
1668 length += 6;
1669 }
1670 break;
1671 }
1672 }
1673
1674 return length;
1675 }
1676
1677 static size_t
1678 json_object_serialized_length(const struct shash *object)
1679 {
1680 size_t length = strlen("{}");
1681
1682 if (!shash_is_empty(object)) {
1683 struct shash_node *node;
1684
1685 /* Commas and colons. */
1686 length += 2 * shash_count(object) - 1;
1687
1688 SHASH_FOR_EACH (node, object) {
1689 const struct json *value = node->data;
1690
1691 length += json_string_serialized_length(node->name);
1692 length += json_serialized_length(value);
1693 }
1694 }
1695
1696 return length;
1697 }
1698
1699 static size_t
1700 json_array_serialized_length(const struct json_array *array)
1701 {
1702 size_t length = strlen("[]");
1703
1704 if (array->n) {
1705 size_t i;
1706
1707 /* Commas. */
1708 length += array->n - 1;
1709
1710 for (i = 0; i < array->n; i++) {
1711 length += json_serialized_length(array->elems[i]);
1712 }
1713 }
1714
1715 return length;
1716 }
1717
1718 /* Returns strlen(json_to_string(json, 0)), that is, the number of bytes in the
1719 * JSON output by json_to_string() for 'json' when JSSF_PRETTY is not
1720 * requested. (JSSF_SORT does not affect the length of json_to_string()'s
1721 * output.) */
1722 size_t
1723 json_serialized_length(const struct json *json)
1724 {
1725 switch (json->type) {
1726 case JSON_NULL:
1727 return strlen("null");
1728
1729 case JSON_FALSE:
1730 return strlen("false");
1731
1732 case JSON_TRUE:
1733 return strlen("true");
1734
1735 case JSON_OBJECT:
1736 return json_object_serialized_length(json->u.object);
1737
1738 case JSON_ARRAY:
1739 return json_array_serialized_length(&json->u.array);
1740
1741 case JSON_INTEGER:
1742 return snprintf(NULL, 0, "%lld", json->u.integer);
1743
1744 case JSON_REAL:
1745 return snprintf(NULL, 0, "%.*g", DBL_DIG, json->u.real);
1746
1747 case JSON_STRING:
1748 return json_string_serialized_length(json->u.string);
1749
1750 case JSON_N_TYPES:
1751 default:
1752 NOT_REACHED();
1753 }
1754 }