]> git.proxmox.com Git - mirror_ovs.git/blob - lib/json.c
util: New macro CONST_CAST.
[mirror_ovs.git] / lib / json.c
1 /*
2 * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "json.h"
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <float.h>
25 #include <limits.h>
26 #include <string.h>
27
28 #include "dynamic-string.h"
29 #include "hash.h"
30 #include "shash.h"
31 #include "unicode.h"
32 #include "util.h"
33
34 /* The type of a JSON token. */
35 enum json_token_type {
36 T_EOF = 0,
37 T_BEGIN_ARRAY = '[',
38 T_END_ARRAY = ']',
39 T_BEGIN_OBJECT = '{',
40 T_END_OBJECT = '}',
41 T_NAME_SEPARATOR = ':',
42 T_VALUE_SEPARATOR = ',',
43 T_FALSE = UCHAR_MAX + 1,
44 T_NULL,
45 T_TRUE,
46 T_INTEGER,
47 T_REAL,
48 T_STRING
49 };
50
51 /* A JSON token.
52 *
53 * RFC 4627 doesn't define a lexical structure for JSON but I believe this to
54 * be compliant with the standard.
55 */
56 struct json_token {
57 enum json_token_type type;
58 union {
59 double real;
60 long long int integer;
61 const char *string;
62 } u;
63 };
64
65 enum json_lex_state {
66 JSON_LEX_START, /* Not inside a token. */
67 JSON_LEX_NUMBER, /* Reading a number. */
68 JSON_LEX_KEYWORD, /* Reading a keyword. */
69 JSON_LEX_STRING, /* Reading a quoted string. */
70 JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
71 };
72
73 enum json_parse_state {
74 JSON_PARSE_START, /* Beginning of input. */
75 JSON_PARSE_END, /* End of input. */
76
77 /* Objects. */
78 JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
79 JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
80 JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
81 JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
82 JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
83
84 /* Arrays. */
85 JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
86 JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
87 JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
88 };
89
90 struct json_parser_node {
91 struct json *json;
92 };
93
94 /* A JSON parser. */
95 struct json_parser {
96 int flags;
97
98 /* Lexical analysis. */
99 enum json_lex_state lex_state;
100 struct ds buffer; /* Buffer for accumulating token text. */
101 int line_number;
102 int column_number;
103 int byte_number;
104
105 /* Parsing. */
106 enum json_parse_state parse_state;
107 #define JSON_MAX_HEIGHT 1000
108 struct json_parser_node *stack;
109 size_t height, allocated_height;
110 char *member_name;
111
112 /* Parse status. */
113 bool done;
114 char *error; /* Error message, if any, null if none yet. */
115 };
116
117 static struct json *json_create(enum json_type type);
118 static void json_parser_input(struct json_parser *, struct json_token *);
119
120 static void json_error(struct json_parser *p, const char *format, ...)
121 PRINTF_FORMAT(2, 3);
122 \f
123 const char *
124 json_type_to_string(enum json_type type)
125 {
126 switch (type) {
127 case JSON_NULL:
128 return "null";
129
130 case JSON_FALSE:
131 return "false";
132
133 case JSON_TRUE:
134 return "true";
135
136 case JSON_OBJECT:
137 return "object";
138
139 case JSON_ARRAY:
140 return "array";
141
142 case JSON_INTEGER:
143 case JSON_REAL:
144 return "number";
145
146 case JSON_STRING:
147 return "string";
148
149 case JSON_N_TYPES:
150 default:
151 return "<invalid>";
152 }
153 }
154 \f
155 /* Functions for manipulating struct json. */
156
157 struct json *
158 json_null_create(void)
159 {
160 return json_create(JSON_NULL);
161 }
162
163 struct json *
164 json_boolean_create(bool b)
165 {
166 return json_create(b ? JSON_TRUE : JSON_FALSE);
167 }
168
169 struct json *
170 json_string_create_nocopy(char *s)
171 {
172 struct json *json = json_create(JSON_STRING);
173 json->u.string = s;
174 return json;
175 }
176
177 struct json *
178 json_string_create(const char *s)
179 {
180 return json_string_create_nocopy(xstrdup(s));
181 }
182
183 struct json *
184 json_array_create_empty(void)
185 {
186 struct json *json = json_create(JSON_ARRAY);
187 json->u.array.elems = NULL;
188 json->u.array.n = 0;
189 json->u.array.n_allocated = 0;
190 return json;
191 }
192
193 void
194 json_array_add(struct json *array_, struct json *element)
195 {
196 struct json_array *array = json_array(array_);
197 if (array->n >= array->n_allocated) {
198 array->elems = x2nrealloc(array->elems, &array->n_allocated,
199 sizeof *array->elems);
200 }
201 array->elems[array->n++] = element;
202 }
203
204 void
205 json_array_trim(struct json *array_)
206 {
207 struct json_array *array = json_array(array_);
208 if (array->n < array->n_allocated){
209 array->n_allocated = array->n;
210 array->elems = xrealloc(array->elems, array->n * sizeof *array->elems);
211 }
212 }
213
214 struct json *
215 json_array_create(struct json **elements, size_t n)
216 {
217 struct json *json = json_create(JSON_ARRAY);
218 json->u.array.elems = elements;
219 json->u.array.n = n;
220 json->u.array.n_allocated = n;
221 return json;
222 }
223
224 struct json *
225 json_array_create_1(struct json *elem0)
226 {
227 struct json **elems = xmalloc(sizeof *elems);
228 elems[0] = elem0;
229 return json_array_create(elems, 1);
230 }
231
232 struct json *
233 json_array_create_2(struct json *elem0, struct json *elem1)
234 {
235 struct json **elems = xmalloc(2 * sizeof *elems);
236 elems[0] = elem0;
237 elems[1] = elem1;
238 return json_array_create(elems, 2);
239 }
240
241 struct json *
242 json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
243 {
244 struct json **elems = xmalloc(3 * sizeof *elems);
245 elems[0] = elem0;
246 elems[1] = elem1;
247 elems[2] = elem2;
248 return json_array_create(elems, 3);
249 }
250
251 struct json *
252 json_object_create(void)
253 {
254 struct json *json = json_create(JSON_OBJECT);
255 json->u.object = xmalloc(sizeof *json->u.object);
256 shash_init(json->u.object);
257 return json;
258 }
259
260 struct json *
261 json_integer_create(long long int integer)
262 {
263 struct json *json = json_create(JSON_INTEGER);
264 json->u.integer = integer;
265 return json;
266 }
267
268 struct json *
269 json_real_create(double real)
270 {
271 struct json *json = json_create(JSON_REAL);
272 json->u.real = real;
273 return json;
274 }
275
276 void
277 json_object_put(struct json *json, const char *name, struct json *value)
278 {
279 json_destroy(shash_replace(json->u.object, name, value));
280 }
281
282 void
283 json_object_put_string(struct json *json, const char *name, const char *value)
284 {
285 json_object_put(json, name, json_string_create(value));
286 }
287
288 const char *
289 json_string(const struct json *json)
290 {
291 assert(json->type == JSON_STRING);
292 return json->u.string;
293 }
294
295 struct json_array *
296 json_array(const struct json *json)
297 {
298 assert(json->type == JSON_ARRAY);
299 return CONST_CAST(struct json_array *, &json->u.array);
300 }
301
302 struct shash *
303 json_object(const struct json *json)
304 {
305 assert(json->type == JSON_OBJECT);
306 return CONST_CAST(struct shash *, json->u.object);
307 }
308
309 bool
310 json_boolean(const struct json *json)
311 {
312 assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
313 return json->type == JSON_TRUE;
314 }
315
316 double
317 json_real(const struct json *json)
318 {
319 assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
320 return json->type == JSON_REAL ? json->u.real : json->u.integer;
321 }
322
323 int64_t
324 json_integer(const struct json *json)
325 {
326 assert(json->type == JSON_INTEGER);
327 return json->u.integer;
328 }
329 \f
330 static void json_destroy_object(struct shash *object);
331 static void json_destroy_array(struct json_array *array);
332
333 /* Frees 'json' and everything it points to, recursively. */
334 void
335 json_destroy(struct json *json)
336 {
337 if (json) {
338 switch (json->type) {
339 case JSON_OBJECT:
340 json_destroy_object(json->u.object);
341 break;
342
343 case JSON_ARRAY:
344 json_destroy_array(&json->u.array);
345 break;
346
347 case JSON_STRING:
348 free(json->u.string);
349 break;
350
351 case JSON_NULL:
352 case JSON_FALSE:
353 case JSON_TRUE:
354 case JSON_INTEGER:
355 case JSON_REAL:
356 break;
357
358 case JSON_N_TYPES:
359 NOT_REACHED();
360 }
361 free(json);
362 }
363 }
364
365 static void
366 json_destroy_object(struct shash *object)
367 {
368 struct shash_node *node, *next;
369
370 SHASH_FOR_EACH_SAFE (node, next, object) {
371 struct json *value = node->data;
372
373 json_destroy(value);
374 shash_delete(object, node);
375 }
376 shash_destroy(object);
377 free(object);
378 }
379
380 static void
381 json_destroy_array(struct json_array *array)
382 {
383 size_t i;
384
385 for (i = 0; i < array->n; i++) {
386 json_destroy(array->elems[i]);
387 }
388 free(array->elems);
389 }
390 \f
391 static struct json *json_clone_object(const struct shash *object);
392 static struct json *json_clone_array(const struct json_array *array);
393
394 /* Returns a deep copy of 'json'. */
395 struct json *
396 json_clone(const struct json *json)
397 {
398 switch (json->type) {
399 case JSON_OBJECT:
400 return json_clone_object(json->u.object);
401
402 case JSON_ARRAY:
403 return json_clone_array(&json->u.array);
404
405 case JSON_STRING:
406 return json_string_create(json->u.string);
407
408 case JSON_NULL:
409 case JSON_FALSE:
410 case JSON_TRUE:
411 return json_create(json->type);
412
413 case JSON_INTEGER:
414 return json_integer_create(json->u.integer);
415
416 case JSON_REAL:
417 return json_real_create(json->u.real);
418
419 case JSON_N_TYPES:
420 default:
421 NOT_REACHED();
422 }
423 }
424
425 static struct json *
426 json_clone_object(const struct shash *object)
427 {
428 struct shash_node *node;
429 struct json *json;
430
431 json = json_object_create();
432 SHASH_FOR_EACH (node, object) {
433 struct json *value = node->data;
434 json_object_put(json, node->name, json_clone(value));
435 }
436 return json;
437 }
438
439 static struct json *
440 json_clone_array(const struct json_array *array)
441 {
442 struct json **elems;
443 size_t i;
444
445 elems = xmalloc(array->n * sizeof *elems);
446 for (i = 0; i < array->n; i++) {
447 elems[i] = json_clone(array->elems[i]);
448 }
449 return json_array_create(elems, array->n);
450 }
451 \f
452 static size_t
453 json_hash_object(const struct shash *object, size_t basis)
454 {
455 const struct shash_node **nodes;
456 size_t n, i;
457
458 nodes = shash_sort(object);
459 n = shash_count(object);
460 for (i = 0; i < n; i++) {
461 const struct shash_node *node = nodes[i];
462 basis = hash_string(node->name, basis);
463 basis = json_hash(node->data, basis);
464 }
465 return basis;
466 }
467
468 static size_t
469 json_hash_array(const struct json_array *array, size_t basis)
470 {
471 size_t i;
472
473 basis = hash_int(array->n, basis);
474 for (i = 0; i < array->n; i++) {
475 basis = json_hash(array->elems[i], basis);
476 }
477 return basis;
478 }
479
480 size_t
481 json_hash(const struct json *json, size_t basis)
482 {
483 switch (json->type) {
484 case JSON_OBJECT:
485 return json_hash_object(json->u.object, basis);
486
487 case JSON_ARRAY:
488 return json_hash_array(&json->u.array, basis);
489
490 case JSON_STRING:
491 return hash_string(json->u.string, basis);
492
493 case JSON_NULL:
494 case JSON_FALSE:
495 case JSON_TRUE:
496 return hash_int(json->type << 8, basis);
497
498 case JSON_INTEGER:
499 return hash_int(json->u.integer, basis);
500
501 case JSON_REAL:
502 return hash_double(json->u.real, basis);
503
504 case JSON_N_TYPES:
505 default:
506 NOT_REACHED();
507 }
508 }
509
510 static bool
511 json_equal_object(const struct shash *a, const struct shash *b)
512 {
513 struct shash_node *a_node;
514
515 if (shash_count(a) != shash_count(b)) {
516 return false;
517 }
518
519 SHASH_FOR_EACH (a_node, a) {
520 struct shash_node *b_node = shash_find(b, a_node->name);
521 if (!b_node || !json_equal(a_node->data, b_node->data)) {
522 return false;
523 }
524 }
525
526 return true;
527 }
528
529 static bool
530 json_equal_array(const struct json_array *a, const struct json_array *b)
531 {
532 size_t i;
533
534 if (a->n != b->n) {
535 return false;
536 }
537
538 for (i = 0; i < a->n; i++) {
539 if (!json_equal(a->elems[i], b->elems[i])) {
540 return false;
541 }
542 }
543
544 return true;
545 }
546
547 bool
548 json_equal(const struct json *a, const struct json *b)
549 {
550 if (a->type != b->type) {
551 return false;
552 }
553
554 switch (a->type) {
555 case JSON_OBJECT:
556 return json_equal_object(a->u.object, b->u.object);
557
558 case JSON_ARRAY:
559 return json_equal_array(&a->u.array, &b->u.array);
560
561 case JSON_STRING:
562 return !strcmp(a->u.string, b->u.string);
563
564 case JSON_NULL:
565 case JSON_FALSE:
566 case JSON_TRUE:
567 return true;
568
569 case JSON_INTEGER:
570 return a->u.integer == b->u.integer;
571
572 case JSON_REAL:
573 return a->u.real == b->u.real;
574
575 case JSON_N_TYPES:
576 default:
577 NOT_REACHED();
578 }
579 }
580 \f
581 /* Lexical analysis. */
582
583 static void
584 json_lex_keyword(struct json_parser *p)
585 {
586 struct json_token token;
587 const char *s;
588
589 s = ds_cstr(&p->buffer);
590 if (!strcmp(s, "false")) {
591 token.type = T_FALSE;
592 } else if (!strcmp(s, "true")) {
593 token.type = T_TRUE;
594 } else if (!strcmp(s, "null")) {
595 token.type = T_NULL;
596 } else {
597 json_error(p, "invalid keyword '%s'", s);
598 return;
599 }
600 json_parser_input(p, &token);
601 }
602
603 static void
604 json_lex_number(struct json_parser *p)
605 {
606 const char *cp = ds_cstr(&p->buffer);
607 unsigned long long int significand = 0;
608 struct json_token token;
609 bool imprecise = false;
610 bool negative = false;
611 int pow10 = 0;
612
613 /* Leading minus sign. */
614 if (*cp == '-') {
615 negative = true;
616 cp++;
617 }
618
619 /* At least one integer digit, but 0 may not be used as a leading digit for
620 * a longer number. */
621 significand = 0;
622 if (*cp == '0') {
623 cp++;
624 if (isdigit(*cp)) {
625 json_error(p, "leading zeros not allowed");
626 return;
627 }
628 } else if (isdigit(*cp)) {
629 do {
630 if (significand <= ULLONG_MAX / 10) {
631 significand = significand * 10 + (*cp - '0');
632 } else {
633 pow10++;
634 if (*cp != '0') {
635 imprecise = true;
636 }
637 }
638 cp++;
639 } while (isdigit(*cp));
640 } else {
641 json_error(p, "'-' must be followed by digit");
642 return;
643 }
644
645 /* Optional fraction. */
646 if (*cp == '.') {
647 cp++;
648 if (!isdigit(*cp)) {
649 json_error(p, "decimal point must be followed by digit");
650 return;
651 }
652 do {
653 if (significand <= ULLONG_MAX / 10) {
654 significand = significand * 10 + (*cp - '0');
655 pow10--;
656 } else if (*cp != '0') {
657 imprecise = true;
658 }
659 cp++;
660 } while (isdigit(*cp));
661 }
662
663 /* Optional exponent. */
664 if (*cp == 'e' || *cp == 'E') {
665 bool negative_exponent = false;
666 int exponent;
667
668 cp++;
669 if (*cp == '+') {
670 cp++;
671 } else if (*cp == '-') {
672 negative_exponent = true;
673 cp++;
674 }
675
676 if (!isdigit(*cp)) {
677 json_error(p, "exponent must contain at least one digit");
678 return;
679 }
680
681 exponent = 0;
682 do {
683 if (exponent >= INT_MAX / 10) {
684 json_error(p, "exponent outside valid range");
685 return;
686 }
687 exponent = exponent * 10 + (*cp - '0');
688 cp++;
689 } while (isdigit(*cp));
690
691 if (negative_exponent) {
692 pow10 -= exponent;
693 } else {
694 pow10 += exponent;
695 }
696 }
697
698 if (*cp != '\0') {
699 json_error(p, "syntax error in number");
700 return;
701 }
702
703 /* Figure out number.
704 *
705 * We suppress negative zeros as a matter of policy. */
706 if (!significand) {
707 token.type = T_INTEGER;
708 token.u.integer = 0;
709 json_parser_input(p, &token);
710 return;
711 }
712
713 if (!imprecise) {
714 while (pow10 > 0 && significand < ULLONG_MAX / 10) {
715 significand *= 10;
716 pow10--;
717 }
718 while (pow10 < 0 && significand % 10 == 0) {
719 significand /= 10;
720 pow10++;
721 }
722 if (pow10 == 0
723 && significand <= (negative
724 ? (unsigned long long int) LLONG_MAX + 1
725 : LLONG_MAX)) {
726 token.type = T_INTEGER;
727 token.u.integer = negative ? -significand : significand;
728 json_parser_input(p, &token);
729 return;
730 }
731 }
732
733 token.type = T_REAL;
734 if (!str_to_double(ds_cstr(&p->buffer), &token.u.real)) {
735 json_error(p, "number outside valid range");
736 return;
737 }
738 /* Suppress negative zero. */
739 if (token.u.real == 0) {
740 token.u.real = 0;
741 }
742 json_parser_input(p, &token);
743 }
744
745 static const char *
746 json_lex_4hex(const char *cp, const char *end, int *valuep)
747 {
748 unsigned int value;
749
750 if (cp + 4 > end) {
751 return "quoted string ends within \\u escape";
752 }
753
754 value = hexits_value(cp, 4, NULL);
755 if (value == UINT_MAX) {
756 return "malformed \\u escape";
757 }
758 if (!value) {
759 return "null bytes not supported in quoted strings";
760 }
761 *valuep = value;
762 return NULL;
763 }
764
765 static const char *
766 json_lex_unicode(const char *cp, const char *end, struct ds *out)
767 {
768 const char *error;
769 int c0, c1;
770
771 error = json_lex_4hex(cp, end, &c0);
772 if (error) {
773 ds_clear(out);
774 ds_put_cstr(out, error);
775 return NULL;
776 }
777 cp += 4;
778 if (!uc_is_leading_surrogate(c0)) {
779 ds_put_utf8(out, c0);
780 return cp;
781 }
782
783 if (cp + 2 > end || *cp++ != '\\' || *cp++ != 'u') {
784 ds_clear(out);
785 ds_put_cstr(out, "malformed escaped surrogate pair");
786 return NULL;
787 }
788
789 error = json_lex_4hex(cp, end, &c1);
790 if (error) {
791 ds_clear(out);
792 ds_put_cstr(out, error);
793 return NULL;
794 }
795 cp += 4;
796 if (!uc_is_trailing_surrogate(c1)) {
797 ds_clear(out);
798 ds_put_cstr(out, "second half of escaped surrogate pair is not "
799 "trailing surrogate");
800 return NULL;
801 }
802
803 ds_put_utf8(out, utf16_decode_surrogate_pair(c0, c1));
804 return cp;
805 }
806
807 bool
808 json_string_unescape(const char *in, size_t in_len, char **outp)
809 {
810 const char *end = in + in_len;
811 bool ok = false;
812 struct ds out;
813
814 ds_init(&out);
815 ds_reserve(&out, in_len);
816 if (in_len > 0 && in[in_len - 1] == '\\') {
817 ds_put_cstr(&out, "quoted string may not end with backslash");
818 goto exit;
819 }
820 while (in < end) {
821 if (*in == '"') {
822 ds_clear(&out);
823 ds_put_cstr(&out, "quoted string may not include unescaped \"");
824 goto exit;
825 }
826 if (*in != '\\') {
827 ds_put_char(&out, *in++);
828 continue;
829 }
830
831 in++;
832 switch (*in++) {
833 case '"': case '\\': case '/':
834 ds_put_char(&out, in[-1]);
835 break;
836
837 case 'b':
838 ds_put_char(&out, '\b');
839 break;
840
841 case 'f':
842 ds_put_char(&out, '\f');
843 break;
844
845 case 'n':
846 ds_put_char(&out, '\n');
847 break;
848
849 case 'r':
850 ds_put_char(&out, '\r');
851 break;
852
853 case 't':
854 ds_put_char(&out, '\t');
855 break;
856
857 case 'u':
858 in = json_lex_unicode(in, end, &out);
859 if (!in) {
860 goto exit;
861 }
862 break;
863
864 default:
865 ds_clear(&out);
866 ds_put_format(&out, "bad escape \\%c", in[-1]);
867 goto exit;
868 }
869 }
870 ok = true;
871
872 exit:
873 *outp = ds_cstr(&out);
874 return ok;
875 }
876
877 static void
878 json_parser_input_string(struct json_parser *p, const char *s)
879 {
880 struct json_token token;
881
882 token.type = T_STRING;
883 token.u.string = s;
884 json_parser_input(p, &token);
885 }
886
887 static void
888 json_lex_string(struct json_parser *p)
889 {
890 const char *raw = ds_cstr(&p->buffer);
891 if (!strchr(raw, '\\')) {
892 json_parser_input_string(p, raw);
893 } else {
894 char *cooked;
895
896 if (json_string_unescape(raw, strlen(raw), &cooked)) {
897 json_parser_input_string(p, cooked);
898 } else {
899 json_error(p, "%s", cooked);
900 }
901
902 free(cooked);
903 }
904 }
905
906 static bool
907 json_lex_input(struct json_parser *p, unsigned char c)
908 {
909 struct json_token token;
910
911 switch (p->lex_state) {
912 case JSON_LEX_START:
913 switch (c) {
914 case ' ': case '\t': case '\n': case '\r':
915 /* Nothing to do. */
916 return true;
917
918 case 'a': case 'b': case 'c': case 'd': case 'e':
919 case 'f': case 'g': case 'h': case 'i': case 'j':
920 case 'k': case 'l': case 'm': case 'n': case 'o':
921 case 'p': case 'q': case 'r': case 's': case 't':
922 case 'u': case 'v': case 'w': case 'x': case 'y':
923 case 'z':
924 p->lex_state = JSON_LEX_KEYWORD;
925 break;
926
927 case '[': case '{': case ']': case '}': case ':': case ',':
928 token.type = c;
929 json_parser_input(p, &token);
930 return true;
931
932 case '-':
933 case '0': case '1': case '2': case '3': case '4':
934 case '5': case '6': case '7': case '8': case '9':
935 p->lex_state = JSON_LEX_NUMBER;
936 break;
937
938 case '"':
939 p->lex_state = JSON_LEX_STRING;
940 return true;
941
942 default:
943 if (isprint(c)) {
944 json_error(p, "invalid character '%c'", c);
945 } else {
946 json_error(p, "invalid character U+%04x", c);
947 }
948 return true;
949 }
950 break;
951
952 case JSON_LEX_KEYWORD:
953 if (!isalpha((unsigned char) c)) {
954 json_lex_keyword(p);
955 return false;
956 }
957 break;
958
959 case JSON_LEX_NUMBER:
960 if (!strchr(".0123456789eE-+", c)) {
961 json_lex_number(p);
962 return false;
963 }
964 break;
965
966 case JSON_LEX_STRING:
967 if (c == '\\') {
968 p->lex_state = JSON_LEX_ESCAPE;
969 } else if (c == '"') {
970 json_lex_string(p);
971 return true;
972 } else if (c < 0x20) {
973 json_error(p, "U+%04X must be escaped in quoted string", c);
974 return true;
975 }
976 break;
977
978 case JSON_LEX_ESCAPE:
979 p->lex_state = JSON_LEX_STRING;
980 break;
981
982 default:
983 abort();
984 }
985 ds_put_char(&p->buffer, c);
986 return true;
987 }
988 \f
989 /* Parsing. */
990
991 /* Parses 'string' as a JSON object or array and returns a newly allocated
992 * 'struct json'. The caller must free the returned structure with
993 * json_destroy() when it is no longer needed.
994 *
995 * 'string' must be encoded in UTF-8.
996 *
997 * If 'string' is valid JSON, then the returned 'struct json' will be either an
998 * object (JSON_OBJECT) or an array (JSON_ARRAY).
999 *
1000 * If 'string' is not valid JSON, then the returned 'struct json' will be a
1001 * string (JSON_STRING) that describes the particular error encountered during
1002 * parsing. (This is an acceptable means of error reporting because at its top
1003 * level JSON must be either an object or an array; a bare string is not
1004 * valid.) */
1005 struct json *
1006 json_from_string(const char *string)
1007 {
1008 struct json_parser *p = json_parser_create(JSPF_TRAILER);
1009 json_parser_feed(p, string, strlen(string));
1010 return json_parser_finish(p);
1011 }
1012
1013 /* Reads the file named 'file_name', parses its contents as a JSON object or
1014 * array, and returns a newly allocated 'struct json'. The caller must free
1015 * the returned structure with json_destroy() when it is no longer needed.
1016 *
1017 * The file must be encoded in UTF-8.
1018 *
1019 * See json_from_string() for return value semantics.
1020 */
1021 struct json *
1022 json_from_file(const char *file_name)
1023 {
1024 struct json *json;
1025 FILE *stream;
1026
1027 stream = fopen(file_name, "r");
1028 if (!stream) {
1029 return json_string_create_nocopy(
1030 xasprintf("error opening \"%s\": %s", file_name, strerror(errno)));
1031 }
1032 json = json_from_stream(stream);
1033 fclose(stream);
1034
1035 return json;
1036 }
1037
1038 /* Parses the contents of 'stream' as a JSON object or array, and returns a
1039 * newly allocated 'struct json'. The caller must free the returned structure
1040 * with json_destroy() when it is no longer needed.
1041 *
1042 * The file must be encoded in UTF-8.
1043 *
1044 * See json_from_string() for return value semantics.
1045 */
1046 struct json *
1047 json_from_stream(FILE *stream)
1048 {
1049 struct json_parser *p;
1050 struct json *json;
1051
1052 p = json_parser_create(JSPF_TRAILER);
1053 for (;;) {
1054 char buffer[BUFSIZ];
1055 size_t n;
1056
1057 n = fread(buffer, 1, sizeof buffer, stream);
1058 if (!n || json_parser_feed(p, buffer, n) != n) {
1059 break;
1060 }
1061 }
1062 json = json_parser_finish(p);
1063
1064 if (ferror(stream)) {
1065 json_destroy(json);
1066 json = json_string_create_nocopy(
1067 xasprintf("error reading JSON stream: %s", strerror(errno)));
1068 }
1069
1070 return json;
1071 }
1072
1073 struct json_parser *
1074 json_parser_create(int flags)
1075 {
1076 struct json_parser *p = xzalloc(sizeof *p);
1077 p->flags = flags;
1078 return p;
1079 }
1080
1081 size_t
1082 json_parser_feed(struct json_parser *p, const char *input, size_t n)
1083 {
1084 size_t i;
1085 for (i = 0; !p->done && i < n; ) {
1086 if (json_lex_input(p, input[i])) {
1087 p->byte_number++;
1088 if (input[i] == '\n') {
1089 p->column_number = 0;
1090 p->line_number++;
1091 } else {
1092 p->column_number++;
1093 }
1094 i++;
1095 }
1096 }
1097 return i;
1098 }
1099
1100 bool
1101 json_parser_is_done(const struct json_parser *p)
1102 {
1103 return p->done;
1104 }
1105
1106 struct json *
1107 json_parser_finish(struct json_parser *p)
1108 {
1109 struct json *json;
1110
1111 switch (p->lex_state) {
1112 case JSON_LEX_START:
1113 break;
1114
1115 case JSON_LEX_STRING:
1116 case JSON_LEX_ESCAPE:
1117 json_error(p, "unexpected end of input in quoted string");
1118 break;
1119
1120 case JSON_LEX_NUMBER:
1121 case JSON_LEX_KEYWORD:
1122 json_lex_input(p, ' ');
1123 break;
1124 }
1125
1126 if (p->parse_state == JSON_PARSE_START) {
1127 json_error(p, "empty input stream");
1128 } else if (p->parse_state != JSON_PARSE_END) {
1129 json_error(p, "unexpected end of input");
1130 }
1131
1132 if (!p->error) {
1133 assert(p->height == 1);
1134 assert(p->stack[0].json != NULL);
1135 json = p->stack[--p->height].json;
1136 } else {
1137 json = json_string_create_nocopy(p->error);
1138 p->error = NULL;
1139 }
1140
1141 json_parser_abort(p);
1142
1143 return json;
1144 }
1145
1146 void
1147 json_parser_abort(struct json_parser *p)
1148 {
1149 if (p) {
1150 ds_destroy(&p->buffer);
1151 if (p->height) {
1152 json_destroy(p->stack[0].json);
1153 }
1154 free(p->stack);
1155 free(p->member_name);
1156 free(p->error);
1157 free(p);
1158 }
1159 }
1160
1161 static struct json_parser_node *
1162 json_parser_top(struct json_parser *p)
1163 {
1164 return &p->stack[p->height - 1];
1165 }
1166
1167 static void
1168 json_parser_put_value(struct json_parser *p, struct json *value)
1169 {
1170 struct json_parser_node *node = json_parser_top(p);
1171 if (node->json->type == JSON_OBJECT) {
1172 json_object_put(node->json, p->member_name, value);
1173 free(p->member_name);
1174 p->member_name = NULL;
1175 } else if (node->json->type == JSON_ARRAY) {
1176 json_array_add(node->json, value);
1177 } else {
1178 NOT_REACHED();
1179 }
1180 }
1181
1182 static void
1183 json_parser_push(struct json_parser *p,
1184 struct json *new_json, enum json_parse_state new_state)
1185 {
1186 if (p->height < JSON_MAX_HEIGHT) {
1187 struct json_parser_node *node;
1188
1189 if (p->height >= p->allocated_height) {
1190 p->stack = x2nrealloc(p->stack, &p->allocated_height,
1191 sizeof *p->stack);
1192 }
1193
1194 if (p->height > 0) {
1195 json_parser_put_value(p, new_json);
1196 }
1197
1198 node = &p->stack[p->height++];
1199 node->json = new_json;
1200 p->parse_state = new_state;
1201 } else {
1202 json_destroy(new_json);
1203 json_error(p, "input exceeds maximum nesting depth %d",
1204 JSON_MAX_HEIGHT);
1205 }
1206 }
1207
1208 static void
1209 json_parser_push_object(struct json_parser *p)
1210 {
1211 json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
1212 }
1213
1214 static void
1215 json_parser_push_array(struct json_parser *p)
1216 {
1217 json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
1218 }
1219
1220 static void
1221 json_parse_value(struct json_parser *p, struct json_token *token,
1222 enum json_parse_state next_state)
1223 {
1224 struct json *value;
1225
1226 switch (token->type) {
1227 case T_FALSE:
1228 value = json_boolean_create(false);
1229 break;
1230
1231 case T_NULL:
1232 value = json_null_create();
1233 break;
1234
1235 case T_TRUE:
1236 value = json_boolean_create(true);
1237 break;
1238
1239 case '{':
1240 json_parser_push_object(p);
1241 return;
1242
1243 case '[':
1244 json_parser_push_array(p);
1245 return;
1246
1247 case T_INTEGER:
1248 value = json_integer_create(token->u.integer);
1249 break;
1250
1251 case T_REAL:
1252 value = json_real_create(token->u.real);
1253 break;
1254
1255 case T_STRING:
1256 value = json_string_create(token->u.string);
1257 break;
1258
1259 case T_EOF:
1260 case '}':
1261 case ']':
1262 case ':':
1263 case ',':
1264 default:
1265 json_error(p, "syntax error expecting value");
1266 return;
1267 }
1268
1269 json_parser_put_value(p, value);
1270 p->parse_state = next_state;
1271 }
1272
1273 static void
1274 json_parser_pop(struct json_parser *p)
1275 {
1276 struct json_parser_node *node;
1277
1278 /* Conserve memory. */
1279 node = json_parser_top(p);
1280 if (node->json->type == JSON_ARRAY) {
1281 json_array_trim(node->json);
1282 }
1283
1284 /* Pop off the top-of-stack. */
1285 if (p->height == 1) {
1286 p->parse_state = JSON_PARSE_END;
1287 if (!(p->flags & JSPF_TRAILER)) {
1288 p->done = true;
1289 }
1290 } else {
1291 p->height--;
1292 node = json_parser_top(p);
1293 if (node->json->type == JSON_ARRAY) {
1294 p->parse_state = JSON_PARSE_ARRAY_NEXT;
1295 } else if (node->json->type == JSON_OBJECT) {
1296 p->parse_state = JSON_PARSE_OBJECT_NEXT;
1297 } else {
1298 NOT_REACHED();
1299 }
1300 }
1301 }
1302
1303 static void
1304 json_parser_input(struct json_parser *p, struct json_token *token)
1305 {
1306 switch (p->parse_state) {
1307 case JSON_PARSE_START:
1308 if (token->type == '{') {
1309 json_parser_push_object(p);
1310 } else if (token->type == '[') {
1311 json_parser_push_array(p);
1312 } else {
1313 json_error(p, "syntax error at beginning of input");
1314 }
1315 break;
1316
1317 case JSON_PARSE_END:
1318 json_error(p, "trailing garbage at end of input");
1319 break;
1320
1321 case JSON_PARSE_OBJECT_INIT:
1322 if (token->type == '}') {
1323 json_parser_pop(p);
1324 break;
1325 }
1326 /* Fall through. */
1327 case JSON_PARSE_OBJECT_NAME:
1328 if (token->type == T_STRING) {
1329 p->member_name = xstrdup(token->u.string);
1330 p->parse_state = JSON_PARSE_OBJECT_COLON;
1331 } else {
1332 json_error(p, "syntax error parsing object expecting string");
1333 }
1334 break;
1335
1336 case JSON_PARSE_OBJECT_COLON:
1337 if (token->type == ':') {
1338 p->parse_state = JSON_PARSE_OBJECT_VALUE;
1339 } else {
1340 json_error(p, "syntax error parsing object expecting ':'");
1341 }
1342 break;
1343
1344 case JSON_PARSE_OBJECT_VALUE:
1345 json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
1346 break;
1347
1348 case JSON_PARSE_OBJECT_NEXT:
1349 if (token->type == ',') {
1350 p->parse_state = JSON_PARSE_OBJECT_NAME;
1351 } else if (token->type == '}') {
1352 json_parser_pop(p);
1353 } else {
1354 json_error(p, "syntax error expecting '}' or ','");
1355 }
1356 break;
1357
1358 case JSON_PARSE_ARRAY_INIT:
1359 if (token->type == ']') {
1360 json_parser_pop(p);
1361 break;
1362 }
1363 /* Fall through. */
1364 case JSON_PARSE_ARRAY_VALUE:
1365 json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
1366 break;
1367
1368 case JSON_PARSE_ARRAY_NEXT:
1369 if (token->type == ',') {
1370 p->parse_state = JSON_PARSE_ARRAY_VALUE;
1371 } else if (token->type == ']') {
1372 json_parser_pop(p);
1373 } else {
1374 json_error(p, "syntax error expecting ']' or ','");
1375 }
1376 break;
1377
1378 default:
1379 abort();
1380 }
1381
1382 p->lex_state = JSON_LEX_START;
1383 ds_clear(&p->buffer);
1384 }
1385
1386 static struct json *
1387 json_create(enum json_type type)
1388 {
1389 struct json *json = xmalloc(sizeof *json);
1390 json->type = type;
1391 return json;
1392 }
1393
1394 static void
1395 json_error(struct json_parser *p, const char *format, ...)
1396 {
1397 if (!p->error) {
1398 struct ds msg;
1399 va_list args;
1400
1401 ds_init(&msg);
1402 ds_put_format(&msg, "line %d, column %d, byte %d: ",
1403 p->line_number, p->column_number, p->byte_number);
1404 va_start(args, format);
1405 ds_put_format_valist(&msg, format, args);
1406 va_end(args);
1407
1408 p->error = ds_steal_cstr(&msg);
1409
1410 p->done = true;
1411 }
1412 }
1413 \f
1414 #define SPACES_PER_LEVEL 2
1415
1416 struct json_serializer {
1417 struct ds *ds;
1418 int depth;
1419 int flags;
1420 };
1421
1422 static void json_serialize(const struct json *, struct json_serializer *);
1423 static void json_serialize_object(const struct shash *object,
1424 struct json_serializer *);
1425 static void json_serialize_array(const struct json_array *,
1426 struct json_serializer *);
1427 static void json_serialize_string(const char *, struct ds *);
1428
1429 /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
1430 * that string. The caller is responsible for freeing the returned string,
1431 * with free(), when it is no longer needed.
1432 *
1433 * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
1434 * nesting level introducing an additional indentation. Otherwise, the
1435 * returned string does not contain any new-line characters.
1436 *
1437 * If 'flags' contains JSSF_SORT, members of objects in the output are sorted
1438 * in bytewise lexicographic order for reproducibility. Otherwise, members of
1439 * objects are output in an indeterminate order.
1440 *
1441 * The returned string is valid JSON only if 'json' represents an array or an
1442 * object, since a bare literal does not satisfy the JSON grammar. */
1443 char *
1444 json_to_string(const struct json *json, int flags)
1445 {
1446 struct ds ds;
1447
1448 ds_init(&ds);
1449 json_to_ds(json, flags, &ds);
1450 return ds_steal_cstr(&ds);
1451 }
1452
1453 /* Same as json_to_string(), but the output is appended to 'ds'. */
1454 void
1455 json_to_ds(const struct json *json, int flags, struct ds *ds)
1456 {
1457 struct json_serializer s;
1458
1459 s.ds = ds;
1460 s.depth = 0;
1461 s.flags = flags;
1462 json_serialize(json, &s);
1463 }
1464
1465 static void
1466 json_serialize(const struct json *json, struct json_serializer *s)
1467 {
1468 struct ds *ds = s->ds;
1469
1470 switch (json->type) {
1471 case JSON_NULL:
1472 ds_put_cstr(ds, "null");
1473 break;
1474
1475 case JSON_FALSE:
1476 ds_put_cstr(ds, "false");
1477 break;
1478
1479 case JSON_TRUE:
1480 ds_put_cstr(ds, "true");
1481 break;
1482
1483 case JSON_OBJECT:
1484 json_serialize_object(json->u.object, s);
1485 break;
1486
1487 case JSON_ARRAY:
1488 json_serialize_array(&json->u.array, s);
1489 break;
1490
1491 case JSON_INTEGER:
1492 ds_put_format(ds, "%lld", json->u.integer);
1493 break;
1494
1495 case JSON_REAL:
1496 ds_put_format(ds, "%.*g", DBL_DIG, json->u.real);
1497 break;
1498
1499 case JSON_STRING:
1500 json_serialize_string(json->u.string, ds);
1501 break;
1502
1503 case JSON_N_TYPES:
1504 default:
1505 NOT_REACHED();
1506 }
1507 }
1508
1509 static void
1510 indent_line(struct json_serializer *s)
1511 {
1512 if (s->flags & JSSF_PRETTY) {
1513 ds_put_char(s->ds, '\n');
1514 ds_put_char_multiple(s->ds, ' ', SPACES_PER_LEVEL * s->depth);
1515 }
1516 }
1517
1518 static void
1519 json_serialize_object_member(size_t i, const struct shash_node *node,
1520 struct json_serializer *s)
1521 {
1522 struct ds *ds = s->ds;
1523
1524 if (i) {
1525 ds_put_char(ds, ',');
1526 indent_line(s);
1527 }
1528
1529 json_serialize_string(node->name, ds);
1530 ds_put_char(ds, ':');
1531 if (s->flags & JSSF_PRETTY) {
1532 ds_put_char(ds, ' ');
1533 }
1534 json_serialize(node->data, s);
1535 }
1536
1537 static void
1538 json_serialize_object(const struct shash *object, struct json_serializer *s)
1539 {
1540 struct ds *ds = s->ds;
1541
1542 ds_put_char(ds, '{');
1543
1544 s->depth++;
1545 indent_line(s);
1546
1547 if (s->flags & JSSF_SORT) {
1548 const struct shash_node **nodes;
1549 size_t n, i;
1550
1551 nodes = shash_sort(object);
1552 n = shash_count(object);
1553 for (i = 0; i < n; i++) {
1554 json_serialize_object_member(i, nodes[i], s);
1555 }
1556 free(nodes);
1557 } else {
1558 struct shash_node *node;
1559 size_t i;
1560
1561 i = 0;
1562 SHASH_FOR_EACH (node, object) {
1563 json_serialize_object_member(i++, node, s);
1564 }
1565 }
1566
1567 ds_put_char(ds, '}');
1568 s->depth--;
1569 }
1570
1571 static void
1572 json_serialize_array(const struct json_array *array, struct json_serializer *s)
1573 {
1574 struct ds *ds = s->ds;
1575 size_t i;
1576
1577 ds_put_char(ds, '[');
1578 s->depth++;
1579
1580 if (array->n > 0) {
1581 indent_line(s);
1582
1583 for (i = 0; i < array->n; i++) {
1584 if (i) {
1585 ds_put_char(ds, ',');
1586 indent_line(s);
1587 }
1588 json_serialize(array->elems[i], s);
1589 }
1590 }
1591
1592 s->depth--;
1593 ds_put_char(ds, ']');
1594 }
1595
1596 static void
1597 json_serialize_string(const char *string, struct ds *ds)
1598 {
1599 uint8_t c;
1600
1601 ds_put_char(ds, '"');
1602 while ((c = *string++) != '\0') {
1603 switch (c) {
1604 case '"':
1605 ds_put_cstr(ds, "\\\"");
1606 break;
1607
1608 case '\\':
1609 ds_put_cstr(ds, "\\\\");
1610 break;
1611
1612 case '\b':
1613 ds_put_cstr(ds, "\\b");
1614 break;
1615
1616 case '\f':
1617 ds_put_cstr(ds, "\\f");
1618 break;
1619
1620 case '\n':
1621 ds_put_cstr(ds, "\\n");
1622 break;
1623
1624 case '\r':
1625 ds_put_cstr(ds, "\\r");
1626 break;
1627
1628 case '\t':
1629 ds_put_cstr(ds, "\\t");
1630 break;
1631
1632 default:
1633 if (c >= 32) {
1634 ds_put_char(ds, c);
1635 } else {
1636 ds_put_format(ds, "\\u%04x", c);
1637 }
1638 break;
1639 }
1640 }
1641 ds_put_char(ds, '"');
1642 }