]> git.proxmox.com Git - ovs.git/blame - lib/json.c
json: New function json_to_ds().
[ovs.git] / lib / json.c
CommitLineData
f38b84ea 1/*
7d23a63a 2 * Copyright (c) 2009, 2010 Nicira Networks.
f38b84ea
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18
19#include "json.h"
20
21#include <assert.h>
22#include <ctype.h>
23#include <errno.h>
24#include <float.h>
25#include <limits.h>
26#include <math.h>
27#include <string.h>
28
29#include "dynamic-string.h"
30#include "hash.h"
31#include "shash.h"
32#include "unicode.h"
33#include "util.h"
34
35/* The type of a JSON token. */
36enum json_token_type {
37 T_EOF = 0,
38 T_BEGIN_ARRAY = '[',
39 T_END_ARRAY = ']',
40 T_BEGIN_OBJECT = '{',
41 T_END_OBJECT = '}',
42 T_NAME_SEPARATOR = ':',
43 T_VALUE_SEPARATOR = ',',
44 T_FALSE = UCHAR_MAX + 1,
45 T_NULL,
46 T_TRUE,
47 T_INTEGER,
48 T_REAL,
49 T_STRING
50};
51
52/* A JSON token.
53 *
54 * RFC 4627 doesn't define a lexical structure for JSON but I believe this to
55 * be compliant with the standard.
56 */
57struct json_token {
58 enum json_token_type type;
59 union {
60 double real;
61 long long int integer;
62 const char *string;
63 } u;
64};
65
66enum json_lex_state {
67 JSON_LEX_START, /* Not inside a token. */
68 JSON_LEX_NUMBER, /* Reading a number. */
69 JSON_LEX_KEYWORD, /* Reading a keyword. */
70 JSON_LEX_STRING, /* Reading a quoted string. */
71 JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
72};
73
74enum json_parse_state {
75 JSON_PARSE_START, /* Beginning of input. */
76 JSON_PARSE_END, /* End of input. */
77
78 /* Objects. */
79 JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
80 JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
81 JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
82 JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
83 JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
84
85 /* Arrays. */
86 JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
87 JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
88 JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
89};
90
91struct json_parser_node {
92 struct json *json;
93};
94
95/* A JSON parser. */
96struct json_parser {
97 int flags;
98
99 /* Lexical analysis. */
100 enum json_lex_state lex_state;
101 struct ds buffer; /* Buffer for accumulating token text. */
0bdf342a
BP
102 int line_number;
103 int column_number;
104 int byte_number;
f38b84ea
BP
105
106 /* Parsing. */
107 enum json_parse_state parse_state;
108#define JSON_MAX_HEIGHT 1000
109 struct json_parser_node *stack;
110 size_t height, allocated_height;
111 char *member_name;
112
113 /* Parse status. */
114 bool done;
115 char *error; /* Error message, if any, null if none yet. */
116};
117
118static struct json *json_create(enum json_type type);
119static void json_parser_input(struct json_parser *, struct json_token *);
120
121static void json_error(struct json_parser *p, const char *format, ...)
122 PRINTF_FORMAT(2, 3);
123\f
124const char *
125json_type_to_string(enum json_type type)
126{
127 switch (type) {
128 case JSON_NULL:
129 return "null";
130
131 case JSON_FALSE:
132 return "false";
133
134 case JSON_TRUE:
135 return "true";
136
137 case JSON_OBJECT:
138 return "object";
139
140 case JSON_ARRAY:
141 return "array";
142
143 case JSON_INTEGER:
144 case JSON_REAL:
145 return "number";
146
147 case JSON_STRING:
148 return "string";
149
150 case JSON_N_TYPES:
151 default:
152 return "<invalid>";
153 }
154}
155\f
156/* Functions for manipulating struct json. */
157
158struct json *
159json_null_create(void)
160{
161 return json_create(JSON_NULL);
162}
163
164struct json *
165json_boolean_create(bool b)
166{
167 return json_create(b ? JSON_TRUE : JSON_FALSE);
168}
169
170struct json *
171json_string_create_nocopy(char *s)
172{
173 struct json *json = json_create(JSON_STRING);
174 json->u.string = s;
175 return json;
176}
177
178struct json *
179json_string_create(const char *s)
180{
181 return json_string_create_nocopy(xstrdup(s));
182}
183
184struct json *
185json_array_create_empty(void)
186{
187 struct json *json = json_create(JSON_ARRAY);
188 json->u.array.elems = NULL;
189 json->u.array.n = 0;
190 json->u.array.n_allocated = 0;
191 return json;
192}
193
194void
195json_array_add(struct json *array_, struct json *element)
196{
197 struct json_array *array = json_array(array_);
198 if (array->n >= array->n_allocated) {
199 array->elems = x2nrealloc(array->elems, &array->n_allocated,
200 sizeof *array->elems);
201 }
202 array->elems[array->n++] = element;
203}
204
205void
206json_array_trim(struct json *array_)
207{
208 struct json_array *array = json_array(array_);
209 if (array->n < array->n_allocated){
210 array->n_allocated = array->n;
211 array->elems = xrealloc(array->elems, array->n * sizeof *array->elems);
212 }
213}
214
215struct json *
216json_array_create(struct json **elements, size_t n)
217{
218 struct json *json = json_create(JSON_ARRAY);
219 json->u.array.elems = elements;
220 json->u.array.n = n;
221 json->u.array.n_allocated = n;
222 return json;
223}
224
f6f8c3ba
BP
225struct json *
226json_array_create_1(struct json *elem0)
227{
228 struct json **elems = xmalloc(sizeof *elems);
229 elems[0] = elem0;
230 return json_array_create(elems, 1);
231}
232
f38b84ea
BP
233struct json *
234json_array_create_2(struct json *elem0, struct json *elem1)
235{
236 struct json **elems = xmalloc(2 * sizeof *elems);
237 elems[0] = elem0;
238 elems[1] = elem1;
239 return json_array_create(elems, 2);
240}
241
242struct json *
243json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
244{
245 struct json **elems = xmalloc(3 * sizeof *elems);
246 elems[0] = elem0;
247 elems[1] = elem1;
248 elems[2] = elem2;
249 return json_array_create(elems, 3);
250}
251
252struct json *
253json_object_create(void)
254{
255 struct json *json = json_create(JSON_OBJECT);
256 json->u.object = xmalloc(sizeof *json->u.object);
257 shash_init(json->u.object);
258 return json;
259}
260
261struct json *
262json_integer_create(long long int integer)
263{
264 struct json *json = json_create(JSON_INTEGER);
265 json->u.integer = integer;
266 return json;
267}
268
269struct json *
270json_real_create(double real)
271{
272 struct json *json = json_create(JSON_REAL);
273 json->u.real = real;
274 return json;
275}
276
277void
278json_object_put(struct json *json, const char *name, struct json *value)
279{
280 shash_add(json->u.object, name, value);
281}
282
283void
284json_object_put_string(struct json *json, const char *name, const char *value)
285{
286 json_object_put(json, name, json_string_create(value));
287}
288
289const char *
290json_string(const struct json *json)
291{
292 assert(json->type == JSON_STRING);
293 return json->u.string;
294}
295
296struct json_array *
297json_array(const struct json *json)
298{
299 assert(json->type == JSON_ARRAY);
300 return (struct json_array *) &json->u.array;
301}
302
303struct shash *
304json_object(const struct json *json)
305{
306 assert(json->type == JSON_OBJECT);
307 return (struct shash *) json->u.object;
308}
309
310bool
311json_boolean(const struct json *json)
312{
313 assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
314 return json->type == JSON_TRUE;
315}
316
317double
318json_real(const struct json *json)
319{
320 assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
321 return json->type == JSON_REAL ? json->u.real : json->u.integer;
322}
323
324int64_t
325json_integer(const struct json *json)
326{
327 assert(json->type == JSON_INTEGER);
328 return json->u.integer;
329}
330\f
331static void json_destroy_object(struct shash *object);
332static void json_destroy_array(struct json_array *array);
333
334/* Frees 'json' and everything it points to, recursively. */
335void
336json_destroy(struct json *json)
337{
338 if (json) {
339 switch (json->type) {
340 case JSON_OBJECT:
341 json_destroy_object(json->u.object);
342 break;
343
344 case JSON_ARRAY:
345 json_destroy_array(&json->u.array);
346 break;
347
348 case JSON_STRING:
349 free(json->u.string);
350 break;
351
352 case JSON_NULL:
353 case JSON_FALSE:
354 case JSON_TRUE:
355 case JSON_INTEGER:
356 case JSON_REAL:
357 break;
358
359 case JSON_N_TYPES:
360 NOT_REACHED();
361 }
362 free(json);
363 }
364}
365
366static void
367json_destroy_object(struct shash *object)
368{
369 struct shash_node *node, *next;
370
371 SHASH_FOR_EACH_SAFE (node, next, object) {
372 struct json *value = node->data;
373
374 json_destroy(value);
375 shash_delete(object, node);
376 }
377 shash_destroy(object);
378 free(object);
379}
380
381static void
382json_destroy_array(struct json_array *array)
383{
384 size_t i;
385
386 for (i = 0; i < array->n; i++) {
387 json_destroy(array->elems[i]);
388 }
389 free(array->elems);
390}
391\f
392static struct json *json_clone_object(const struct shash *object);
393static struct json *json_clone_array(const struct json_array *array);
394
395/* Returns a deep copy of 'json'. */
396struct json *
397json_clone(const struct json *json)
398{
399 switch (json->type) {
400 case JSON_OBJECT:
401 return json_clone_object(json->u.object);
402
403 case JSON_ARRAY:
404 return json_clone_array(&json->u.array);
405
406 case JSON_STRING:
407 return json_string_create(json->u.string);
408
409 case JSON_NULL:
410 case JSON_FALSE:
411 case JSON_TRUE:
412 return json_create(json->type);
413
414 case JSON_INTEGER:
415 return json_integer_create(json->u.integer);
416
417 case JSON_REAL:
418 return json_real_create(json->u.real);
419
420 case JSON_N_TYPES:
421 default:
422 NOT_REACHED();
423 }
424}
425
426static struct json *
427json_clone_object(const struct shash *object)
428{
429 struct shash_node *node;
430 struct json *json;
431
432 json = json_object_create();
433 SHASH_FOR_EACH (node, object) {
434 struct json *value = node->data;
435 json_object_put(json, node->name, json_clone(value));
436 }
437 return json;
438}
439
440static struct json *
441json_clone_array(const struct json_array *array)
442{
443 struct json **elems;
444 size_t i;
445
446 elems = xmalloc(array->n * sizeof *elems);
447 for (i = 0; i < array->n; i++) {
448 elems[i] = json_clone(array->elems[i]);
449 }
450 return json_array_create(elems, array->n);
451}
452\f
453static size_t
454json_hash_object(const struct shash *object, size_t basis)
455{
456 const struct shash_node **nodes;
457 size_t n, i;
458
459 nodes = shash_sort(object);
460 n = shash_count(object);
461 for (i = 0; i < n; i++) {
462 const struct shash_node *node = nodes[i];
463 basis = hash_string(node->name, basis);
464 basis = json_hash(node->data, basis);
465 }
466 return basis;
467}
468
469static size_t
470json_hash_array(const struct json_array *array, size_t basis)
471{
472 size_t i;
473
474 basis = hash_int(array->n, basis);
475 for (i = 0; i < array->n; i++) {
476 basis = json_hash(array->elems[i], basis);
477 }
478 return basis;
479}
480
481size_t
482json_hash(const struct json *json, size_t basis)
483{
484 switch (json->type) {
485 case JSON_OBJECT:
486 return json_hash_object(json->u.object, basis);
487
488 case JSON_ARRAY:
489 return json_hash_array(&json->u.array, basis);
490
491 case JSON_STRING:
492 return hash_string(json->u.string, basis);
493
494 case JSON_NULL:
495 case JSON_FALSE:
496 case JSON_TRUE:
497 return hash_int(json->type << 8, basis);
498
499 case JSON_INTEGER:
500 return hash_int(json->u.integer, basis);
501
502 case JSON_REAL:
503 return hash_double(json->u.real, basis);
504
505 case JSON_N_TYPES:
506 default:
507 NOT_REACHED();
508 }
509}
510
511static bool
512json_equal_object(const struct shash *a, const struct shash *b)
513{
6e57173f 514 struct shash_node *a_node;
f38b84ea
BP
515
516 if (shash_count(a) != shash_count(b)) {
517 return false;
518 }
519
6e57173f
BP
520 SHASH_FOR_EACH (a_node, a) {
521 struct shash_node *b_node = shash_find(b, a_node->name);
522 if (!b_node || !json_equal(a_node->data, b_node->data)) {
f38b84ea
BP
523 return false;
524 }
525 }
526
527 return true;
528}
529
530static bool
531json_equal_array(const struct json_array *a, const struct json_array *b)
532{
533 size_t i;
534
535 if (a->n != b->n) {
536 return false;
537 }
538
539 for (i = 0; i < a->n; i++) {
540 if (!json_equal(a->elems[i], b->elems[i])) {
541 return false;
542 }
543 }
544
545 return true;
546}
547
548bool
549json_equal(const struct json *a, const struct json *b)
550{
551 if (a->type != b->type) {
552 return false;
553 }
554
555 switch (a->type) {
556 case JSON_OBJECT:
557 return json_equal_object(a->u.object, b->u.object);
558
559 case JSON_ARRAY:
560 return json_equal_array(&a->u.array, &b->u.array);
561
562 case JSON_STRING:
563 return !strcmp(a->u.string, b->u.string);
564
565 case JSON_NULL:
566 case JSON_FALSE:
567 case JSON_TRUE:
568 return true;
569
570 case JSON_INTEGER:
571 return a->u.integer == b->u.integer;
572
573 case JSON_REAL:
574 return a->u.real == b->u.real;
575
576 case JSON_N_TYPES:
577 default:
578 NOT_REACHED();
579 }
580}
581\f
582/* Lexical analysis. */
583
584static void
585json_lex_keyword(struct json_parser *p)
586{
587 struct json_token token;
588 const char *s;
589
590 s = ds_cstr(&p->buffer);
591 if (!strcmp(s, "false")) {
592 token.type = T_FALSE;
593 } else if (!strcmp(s, "true")) {
594 token.type = T_TRUE;
595 } else if (!strcmp(s, "null")) {
596 token.type = T_NULL;
597 } else {
598 json_error(p, "invalid keyword '%s'", s);
599 return;
600 }
601 json_parser_input(p, &token);
602}
603
604static void
605json_lex_number(struct json_parser *p)
606{
607 const char *cp = ds_cstr(&p->buffer);
608 unsigned long long int significand = 0;
a105c27b 609 struct json_token token;
f38b84ea
BP
610 int sig_digits = 0;
611 bool imprecise = false;
612 bool negative = false;
613 int pow10 = 0;
614
615 /* Leading minus sign. */
616 if (*cp == '-') {
617 negative = true;
618 cp++;
619 }
620
621 /* At least one integer digit, but 0 may not be used as a leading digit for
622 * a longer number. */
623 significand = 0;
624 sig_digits = 0;
625 if (*cp == '0') {
626 cp++;
627 if (isdigit(*cp)) {
628 json_error(p, "leading zeros not allowed");
629 return;
630 }
631 } else if (isdigit(*cp)) {
632 do {
633 if (significand <= ULLONG_MAX / 10) {
634 significand = significand * 10 + (*cp - '0');
635 sig_digits++;
636 } else {
637 pow10++;
638 if (*cp != '0') {
639 imprecise = true;
640 }
641 }
642 cp++;
643 } while (isdigit(*cp));
644 } else {
645 json_error(p, "'-' must be followed by digit");
646 return;
647 }
648
649 /* Optional fraction. */
650 if (*cp == '.') {
651 cp++;
652 if (!isdigit(*cp)) {
653 json_error(p, "decimal point must be followed by digit");
654 return;
655 }
656 do {
657 if (significand <= ULLONG_MAX / 10) {
658 significand = significand * 10 + (*cp - '0');
659 sig_digits++;
660 pow10--;
661 } else if (*cp != '0') {
662 imprecise = true;
663 }
664 cp++;
665 } while (isdigit(*cp));
666 }
667
668 /* Optional exponent. */
669 if (*cp == 'e' || *cp == 'E') {
670 bool negative_exponent = false;
671 int exponent;
672
673 cp++;
674 if (*cp == '+') {
675 cp++;
676 } else if (*cp == '-') {
677 negative_exponent = true;
678 cp++;
679 }
680
681 if (!isdigit(*cp)) {
682 json_error(p, "exponent must contain at least one digit");
683 return;
684 }
685
686 exponent = 0;
687 do {
688 if (exponent >= INT_MAX / 10) {
689 json_error(p, "exponent outside valid range");
690 return;
691 }
692 exponent = exponent * 10 + (*cp - '0');
693 cp++;
694 } while (isdigit(*cp));
695
696 if (negative_exponent) {
697 pow10 -= exponent;
698 } else {
699 pow10 += exponent;
700 }
701 }
702
703 if (*cp != '\0') {
704 json_error(p, "syntax error in number");
705 return;
706 }
707
708 /* Figure out number.
709 *
710 * We suppress negative zeros as a matter of policy. */
711 if (!significand) {
712 struct json_token token;
713 token.type = T_INTEGER;
714 token.u.integer = 0;
715 json_parser_input(p, &token);
716 return;
717 }
718
719 if (!imprecise) {
720 while (pow10 > 0 && significand < ULLONG_MAX / 10) {
721 significand *= 10;
722 sig_digits++;
723 pow10--;
724 }
725 while (pow10 < 0 && significand % 10 == 0) {
726 significand /= 10;
727 sig_digits--;
728 pow10++;
729 }
730 if (pow10 == 0
731 && significand <= (negative
732 ? (unsigned long long int) LLONG_MAX + 1
733 : LLONG_MAX)) {
f38b84ea
BP
734 token.type = T_INTEGER;
735 token.u.integer = negative ? -significand : significand;
736 json_parser_input(p, &token);
737 return;
738 }
739 }
740
a105c27b
BP
741 token.type = T_REAL;
742 if (!str_to_double(ds_cstr(&p->buffer), &token.u.real)) {
743 json_error(p, "number outside valid range");
744 return;
f38b84ea 745 }
a105c27b
BP
746 /* Suppress negative zero. */
747 if (token.u.real == 0) {
748 token.u.real = 0;
749 }
750 json_parser_input(p, &token);
f38b84ea
BP
751}
752
7d23a63a
BP
753static const char *
754json_lex_4hex(const char *cp, const char *end, int *valuep)
f38b84ea
BP
755{
756 int value, i;
757
7d23a63a
BP
758 if (cp + 4 > end) {
759 return "quoted string ends within \\u escape";
760 }
761
f38b84ea
BP
762 value = 0;
763 for (i = 0; i < 4; i++) {
764 unsigned char c = *cp++;
765 if (!isxdigit(c)) {
7d23a63a 766 return "malformed \\u escape";
f38b84ea
BP
767 }
768 value = (value << 4) | hexit_value(c);
769 }
770 if (!value) {
7d23a63a 771 return "null bytes not supported in quoted strings";
f38b84ea
BP
772 }
773 *valuep = value;
7d23a63a 774 return NULL;
f38b84ea
BP
775}
776
777static const char *
7d23a63a 778json_lex_unicode(const char *cp, const char *end, struct ds *out)
f38b84ea 779{
7d23a63a 780 const char *error;
f38b84ea
BP
781 int c0, c1;
782
7d23a63a
BP
783 error = json_lex_4hex(cp, end, &c0);
784 if (error) {
785 ds_clear(out);
786 ds_put_cstr(out, error);
f38b84ea
BP
787 return NULL;
788 }
789 cp += 4;
790 if (!uc_is_leading_surrogate(c0)) {
7d23a63a 791 ds_put_utf8(out, c0);
f38b84ea
BP
792 return cp;
793 }
794
7d23a63a
BP
795 if (cp + 2 > end || *cp++ != '\\' || *cp++ != 'u') {
796 ds_clear(out);
797 ds_put_cstr(out, "malformed escaped surrogate pair");
f38b84ea
BP
798 return NULL;
799 }
800
7d23a63a
BP
801 error = json_lex_4hex(cp, end, &c1);
802 if (error) {
803 ds_clear(out);
804 ds_put_cstr(out, error);
f38b84ea
BP
805 return NULL;
806 }
807 cp += 4;
808 if (!uc_is_trailing_surrogate(c1)) {
7d23a63a
BP
809 ds_clear(out);
810 ds_put_cstr(out, "second half of escaped surrogate pair is not "
811 "trailing surrogate");
f38b84ea
BP
812 return NULL;
813 }
814
7d23a63a 815 ds_put_utf8(out, utf16_decode_surrogate_pair(c0, c1));
f38b84ea
BP
816 return cp;
817}
818
7d23a63a
BP
819bool
820json_string_unescape(const char *in, size_t in_len, char **outp)
821{
822 const char *end = in + in_len;
823 bool ok = false;
824 struct ds out;
825
826 ds_init(&out);
827 ds_reserve(&out, in_len);
828 if (in_len > 0 && in[in_len - 1] == '\\') {
829 ds_put_cstr(&out, "quoted string may not end with backslash");
830 goto exit;
831 }
832 while (in < end) {
833 if (*in == '"') {
834 ds_clear(&out);
835 ds_put_cstr(&out, "quoted string may not include unescape \"");
836 goto exit;
837 }
838 if (*in != '\\') {
839 ds_put_char(&out, *in++);
f38b84ea
BP
840 continue;
841 }
842
7d23a63a
BP
843 in++;
844 switch (*in++) {
f38b84ea 845 case '"': case '\\': case '/':
7d23a63a 846 ds_put_char(&out, in[-1]);
f38b84ea
BP
847 break;
848
849 case 'b':
7d23a63a 850 ds_put_char(&out, '\b');
f38b84ea
BP
851 break;
852
853 case 'f':
7d23a63a 854 ds_put_char(&out, '\f');
f38b84ea
BP
855 break;
856
857 case 'n':
7d23a63a 858 ds_put_char(&out, '\n');
f38b84ea
BP
859 break;
860
861 case 'r':
7d23a63a 862 ds_put_char(&out, '\r');
f38b84ea
BP
863 break;
864
865 case 't':
7d23a63a 866 ds_put_char(&out, '\t');
f38b84ea
BP
867 break;
868
869 case 'u':
7d23a63a
BP
870 in = json_lex_unicode(in, end, &out);
871 if (!in) {
f38b84ea
BP
872 goto exit;
873 }
874 break;
875
876 default:
7d23a63a
BP
877 ds_clear(&out);
878 ds_put_format(&out, "bad escape \\%c", in[-1]);
f38b84ea
BP
879 goto exit;
880 }
881 }
7d23a63a
BP
882 ok = true;
883
884exit:
885 *outp = ds_cstr(&out);
886 return ok;
887}
888
889static void
890json_parser_input_string(struct json_parser *p, const char *s)
891{
892 struct json_token token;
f38b84ea
BP
893
894 token.type = T_STRING;
7d23a63a 895 token.u.string = s;
f38b84ea 896 json_parser_input(p, &token);
7d23a63a 897}
f38b84ea 898
7d23a63a
BP
899static void
900json_lex_string(struct json_parser *p)
901{
902 const char *raw = ds_cstr(&p->buffer);
903 if (!strchr(raw, '\\')) {
904 json_parser_input_string(p, raw);
905 } else {
906 char *cooked;
907
908 if (json_string_unescape(raw, strlen(raw), &cooked)) {
909 json_parser_input_string(p, cooked);
910 } else {
911 json_error(p, "%s", cooked);
912 }
913
914 free(cooked);
915 }
f38b84ea
BP
916}
917
918static bool
0bdf342a 919json_lex_input(struct json_parser *p, unsigned char c)
f38b84ea
BP
920{
921 struct json_token token;
922
0bdf342a
BP
923 p->byte_number++;
924 if (c == '\n') {
925 p->column_number = 0;
926 p->line_number++;
927 } else {
928 p->column_number++;
929 }
930
f38b84ea
BP
931 switch (p->lex_state) {
932 case JSON_LEX_START:
933 switch (c) {
934 case ' ': case '\t': case '\n': case '\r':
935 /* Nothing to do. */
936 return true;
937
938 case 'a': case 'b': case 'c': case 'd': case 'e':
939 case 'f': case 'g': case 'h': case 'i': case 'j':
940 case 'k': case 'l': case 'm': case 'n': case 'o':
941 case 'p': case 'q': case 'r': case 's': case 't':
942 case 'u': case 'v': case 'w': case 'x': case 'y':
943 case 'z':
944 p->lex_state = JSON_LEX_KEYWORD;
945 break;
946
947 case '[': case '{': case ']': case '}': case ':': case ',':
948 token.type = c;
949 json_parser_input(p, &token);
950 return true;
951
952 case '-':
953 case '0': case '1': case '2': case '3': case '4':
954 case '5': case '6': case '7': case '8': case '9':
955 p->lex_state = JSON_LEX_NUMBER;
956 break;
957
958 case '"':
959 p->lex_state = JSON_LEX_STRING;
960 return true;
961
962 default:
963 if (isprint(c)) {
964 json_error(p, "invalid character '%c'", c);
965 } else {
966 json_error(p, "invalid character U+%04x", c);
967 }
968 return true;
969 }
970 break;
971
972 case JSON_LEX_KEYWORD:
973 if (!isalpha((unsigned char) c)) {
974 json_lex_keyword(p);
975 return false;
976 }
977 break;
978
979 case JSON_LEX_NUMBER:
980 if (!strchr(".0123456789eE-+", c)) {
981 json_lex_number(p);
982 return false;
983 }
984 break;
985
986 case JSON_LEX_STRING:
987 if (c == '\\') {
988 p->lex_state = JSON_LEX_ESCAPE;
989 } else if (c == '"') {
990 json_lex_string(p);
991 return true;
992 } else if (c < 0x20) {
993 json_error(p, "U+%04X must be escaped in quoted string", c);
994 return true;
995 }
996 break;
997
998 case JSON_LEX_ESCAPE:
999 p->lex_state = JSON_LEX_STRING;
1000 break;
1001
1002 default:
1003 abort();
1004 }
1005 ds_put_char(&p->buffer, c);
1006 return true;
1007}
1008\f
1009/* Parsing. */
1010
1011/* Parses 'string' as a JSON object or array and returns a newly allocated
1012 * 'struct json'. The caller must free the returned structure with
1013 * json_destroy() when it is no longer needed.
1014 *
1015 * 'string' must be encoded in UTF-8.
1016 *
1017 * If 'string' is valid JSON, then the returned 'struct json' will be either an
1018 * object (JSON_OBJECT) or an array (JSON_ARRAY).
1019 *
1020 * If 'string' is not valid JSON, then the returned 'struct json' will be a
1021 * string (JSON_STRING) that describes the particular error encountered during
1022 * parsing. (This is an acceptable means of error reporting because at its top
1023 * level JSON must be either an object or an array; a bare string is not
1024 * valid.) */
1025struct json *
1026json_from_string(const char *string)
1027{
1028 struct json_parser *p = json_parser_create(JSPF_TRAILER);
1029 json_parser_feed(p, string, strlen(string));
1030 return json_parser_finish(p);
1031}
1032
1033/* Reads the file named 'file_name', parses its contents as a JSON object or
1034 * array, and returns a newly allocated 'struct json'. The caller must free
1035 * the returned structure with json_destroy() when it is no longer needed.
1036 *
1037 * The file must be encoded in UTF-8.
1038 *
1039 * See json_from_string() for return value semantics.
1040 */
1041struct json *
1042json_from_file(const char *file_name)
1043{
f38b84ea
BP
1044 struct json *json;
1045 FILE *stream;
1046
f38b84ea
BP
1047 stream = fopen(file_name, "r");
1048 if (!stream) {
1049 return json_string_create_nocopy(
1050 xasprintf("error opening \"%s\": %s", file_name, strerror(errno)));
1051 }
5562d6f5
BP
1052 json = json_from_stream(stream);
1053 fclose(stream);
1054
1055 return json;
1056}
1057
1058/* Parses the contents of 'stream' as a JSON object or array, and returns a
1059 * newly allocated 'struct json'. The caller must free the returned structure
1060 * with json_destroy() when it is no longer needed.
1061 *
1062 * The file must be encoded in UTF-8.
1063 *
1064 * See json_from_string() for return value semantics.
1065 */
1066struct json *
1067json_from_stream(FILE *stream)
1068{
1069 struct json_parser *p;
1070 struct json *json;
f38b84ea 1071
f38b84ea
BP
1072 p = json_parser_create(JSPF_TRAILER);
1073 for (;;) {
1074 char buffer[BUFSIZ];
1075 size_t n;
1076
1077 n = fread(buffer, 1, sizeof buffer, stream);
1078 if (!n || json_parser_feed(p, buffer, n) != n) {
1079 break;
1080 }
1081 }
1082 json = json_parser_finish(p);
1083
f38b84ea
BP
1084 if (ferror(stream)) {
1085 json_destroy(json);
1086 json = json_string_create_nocopy(
5562d6f5 1087 xasprintf("error reading JSON stream: %s", strerror(errno)));
f38b84ea 1088 }
f38b84ea
BP
1089
1090 return json;
1091}
1092
1093struct json_parser *
1094json_parser_create(int flags)
1095{
1096 struct json_parser *p = xzalloc(sizeof *p);
1097 p->flags = flags;
1098 return p;
1099}
1100
1101size_t
1102json_parser_feed(struct json_parser *p, const char *input, size_t n)
1103{
1104 size_t i;
1105 for (i = 0; !p->done && i < n; ) {
1106 if (json_lex_input(p, input[i])) {
1107 i++;
1108 }
1109 }
1110 return i;
1111}
1112
1113bool
1114json_parser_is_done(const struct json_parser *p)
1115{
1116 return p->done;
1117}
1118
1119struct json *
1120json_parser_finish(struct json_parser *p)
1121{
1122 struct json *json;
1123
1124 switch (p->lex_state) {
1125 case JSON_LEX_START:
1126 break;
1127
1128 case JSON_LEX_STRING:
1129 case JSON_LEX_ESCAPE:
1130 json_error(p, "unexpected end of input in quoted string");
1131 break;
1132
1133 case JSON_LEX_NUMBER:
1134 case JSON_LEX_KEYWORD:
1135 json_lex_input(p, ' ');
1136 break;
1137 }
1138
1139 if (p->parse_state == JSON_PARSE_START) {
1140 json_error(p, "empty input stream");
1141 } else if (p->parse_state != JSON_PARSE_END) {
1142 json_error(p, "unexpected end of input");
1143 }
1144
1145 if (!p->error) {
1146 assert(p->height == 1);
1147 assert(p->stack[0].json != NULL);
1148 json = p->stack[--p->height].json;
1149 } else {
1150 json = json_string_create_nocopy(p->error);
1151 p->error = NULL;
1152 }
1153
1154 json_parser_abort(p);
1155
1156 return json;
1157}
1158
1159void
1160json_parser_abort(struct json_parser *p)
1161{
1162 if (p) {
1163 ds_destroy(&p->buffer);
1164 if (p->height) {
1165 json_destroy(p->stack[0].json);
1166 }
1167 free(p->stack);
1168 free(p->member_name);
1169 free(p->error);
1170 free(p);
1171 }
1172}
1173
1174static struct json_parser_node *
1175json_parser_top(struct json_parser *p)
1176{
1177 return &p->stack[p->height - 1];
1178}
1179
1180static void
1181json_parser_put_value(struct json_parser *p, struct json *value)
1182{
1183 struct json_parser_node *node = json_parser_top(p);
1184 if (node->json->type == JSON_OBJECT) {
1185 json_object_put(node->json, p->member_name, value);
1186 free(p->member_name);
1187 p->member_name = NULL;
1188 } else if (node->json->type == JSON_ARRAY) {
1189 json_array_add(node->json, value);
1190 } else {
1191 NOT_REACHED();
1192 }
1193}
1194
1195static struct json_parser_node *
1196json_parser_push(struct json_parser *p,
1197 struct json *new_json, enum json_parse_state new_state)
1198{
1199 if (p->height < JSON_MAX_HEIGHT) {
1200 struct json_parser_node *node;
1201
1202 if (p->height >= p->allocated_height) {
1203 p->stack = x2nrealloc(p->stack, &p->allocated_height,
1204 sizeof *p->stack);
1205 }
1206
1207 if (p->height > 0) {
1208 json_parser_put_value(p, new_json);
1209 }
1210
1211 node = &p->stack[p->height++];
1212 node->json = new_json;
1213 p->parse_state = new_state;
1214 return node;
1215 } else {
1216 json_error(p, "input exceeds maximum nesting depth %d",
1217 JSON_MAX_HEIGHT);
1218 return NULL;
1219 }
1220}
1221
1222static void
1223json_parser_push_object(struct json_parser *p)
1224{
1225 json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
1226}
1227
1228static void
1229json_parser_push_array(struct json_parser *p)
1230{
1231 json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
1232}
1233
1234static void
1235json_parse_value(struct json_parser *p, struct json_token *token,
1236 enum json_parse_state next_state)
1237{
1238 struct json *value;
1239
1240 switch (token->type) {
1241 case T_FALSE:
1242 value = json_boolean_create(false);
1243 break;
1244
1245 case T_NULL:
1246 value = json_null_create();
1247 break;
1248
1249 case T_TRUE:
1250 value = json_boolean_create(true);
1251 break;
1252
1253 case '{':
1254 json_parser_push_object(p);
1255 return;
1256
1257 case '[':
1258 json_parser_push_array(p);
1259 return;
1260
1261 case T_INTEGER:
1262 value = json_integer_create(token->u.integer);
1263 break;
1264
1265 case T_REAL:
1266 value = json_real_create(token->u.real);
1267 break;
1268
1269 case T_STRING:
1270 value = json_string_create(token->u.string);
1271 break;
1272
1273 case T_EOF:
1274 case '}':
1275 case ']':
1276 case ':':
1277 case ',':
1278 default:
1279 json_error(p, "syntax error expecting value");
1280 return;
1281 }
1282
1283 json_parser_put_value(p, value);
1284 p->parse_state = next_state;
1285}
1286
1287static void
1288json_parser_pop(struct json_parser *p)
1289{
1290 struct json_parser_node *node;
1291
1292 /* Conserve memory. */
1293 node = json_parser_top(p);
1294 if (node->json->type == JSON_ARRAY) {
1295 json_array_trim(node->json);
1296 }
1297
1298 /* Pop off the top-of-stack. */
1299 if (p->height == 1) {
1300 p->parse_state = JSON_PARSE_END;
1301 if (!(p->flags & JSPF_TRAILER)) {
1302 p->done = true;
1303 }
1304 } else {
1305 p->height--;
1306 node = json_parser_top(p);
1307 if (node->json->type == JSON_ARRAY) {
1308 p->parse_state = JSON_PARSE_ARRAY_NEXT;
1309 } else if (node->json->type == JSON_OBJECT) {
1310 p->parse_state = JSON_PARSE_OBJECT_NEXT;
1311 } else {
1312 NOT_REACHED();
1313 }
1314 }
1315}
1316
1317static void
1318json_parser_input(struct json_parser *p, struct json_token *token)
1319{
1320 switch (p->parse_state) {
1321 case JSON_PARSE_START:
1322 if (token->type == '{') {
1323 json_parser_push_object(p);
1324 } else if (token->type == '[') {
1325 json_parser_push_array(p);
1326 } else {
1327 json_error(p, "syntax error at beginning of input");
1328 }
1329 break;
1330
1331 case JSON_PARSE_END:
1332 json_error(p, "trailing garbage at end of input");
1333 break;
1334
1335 case JSON_PARSE_OBJECT_INIT:
1336 if (token->type == '}') {
1337 json_parser_pop(p);
1338 break;
1339 }
1340 /* Fall through. */
1341 case JSON_PARSE_OBJECT_NAME:
1342 if (token->type == T_STRING) {
1343 p->member_name = xstrdup(token->u.string);
1344 p->parse_state = JSON_PARSE_OBJECT_COLON;
1345 } else {
1346 json_error(p, "syntax error parsing object expecting string");
1347 }
1348 break;
1349
1350 case JSON_PARSE_OBJECT_COLON:
1351 if (token->type == ':') {
1352 p->parse_state = JSON_PARSE_OBJECT_VALUE;
1353 } else {
1354 json_error(p, "syntax error parsing object expecting ':'");
1355 }
1356 break;
1357
1358 case JSON_PARSE_OBJECT_VALUE:
1359 json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
1360 break;
1361
1362 case JSON_PARSE_OBJECT_NEXT:
1363 if (token->type == ',') {
1364 p->parse_state = JSON_PARSE_OBJECT_NAME;
1365 } else if (token->type == '}') {
1366 json_parser_pop(p);
1367 } else {
1368 json_error(p, "syntax error expecting '}' or ','");
1369 }
1370 break;
1371
1372 case JSON_PARSE_ARRAY_INIT:
1373 if (token->type == ']') {
1374 json_parser_pop(p);
1375 break;
1376 }
1377 /* Fall through. */
1378 case JSON_PARSE_ARRAY_VALUE:
1379 json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
1380 break;
1381
1382 case JSON_PARSE_ARRAY_NEXT:
1383 if (token->type == ',') {
1384 p->parse_state = JSON_PARSE_ARRAY_VALUE;
1385 } else if (token->type == ']') {
1386 json_parser_pop(p);
1387 } else {
1388 json_error(p, "syntax error expecting ']' or ','");
1389 }
1390 break;
1391
1392 default:
1393 abort();
1394 }
1395
1396 p->lex_state = JSON_LEX_START;
1397 ds_clear(&p->buffer);
1398}
1399
1400static struct json *
1401json_create(enum json_type type)
1402{
1403 struct json *json = xmalloc(sizeof *json);
1404 json->type = type;
1405 return json;
1406}
1407
1408static void
1409json_error(struct json_parser *p, const char *format, ...)
1410{
1411 if (!p->error) {
0bdf342a 1412 struct ds msg;
f38b84ea
BP
1413 va_list args;
1414
0bdf342a
BP
1415 ds_init(&msg);
1416 ds_put_format(&msg, "line %d, column %d, byte %d: ",
1417 p->line_number, p->column_number, p->byte_number);
f38b84ea 1418 va_start(args, format);
0bdf342a 1419 ds_put_format_valist(&msg, format, args);
f38b84ea
BP
1420 va_end(args);
1421
0bdf342a
BP
1422 p->error = ds_steal_cstr(&msg);
1423
f38b84ea
BP
1424 p->done = true;
1425 }
1426}
1427\f
1428#define SPACES_PER_LEVEL 2
1429
1430struct json_serializer {
36d802ae 1431 struct ds *ds;
f38b84ea
BP
1432 int depth;
1433 int flags;
1434};
1435
36d802ae
BP
1436static void json_serialize(const struct json *, struct json_serializer *);
1437static void json_serialize_object(const struct shash *object,
1438 struct json_serializer *);
1439static void json_serialize_array(const struct json_array *,
1440 struct json_serializer *);
1441static void json_serialize_string(const char *, struct ds *);
f38b84ea
BP
1442
1443/* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
1444 * that string. The caller is responsible for freeing the returned string,
1445 * with free(), when it is no longer needed.
1446 *
1447 * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
1448 * nesting level introducing an additional indentation. Otherwise, the
1449 * returned string does not contain any new-line characters.
1450 *
1451 * If 'flags' contains JSSF_SORT, members of objects in the output are sorted
1452 * in bytewise lexicographic order for reproducibility. Otherwise, members of
1453 * objects are output in an indeterminate order.
1454 *
1455 * The returned string is valid JSON only if 'json' represents an array or an
1456 * object, since a bare literal does not satisfy the JSON grammar. */
1457char *
1458json_to_string(const struct json *json, int flags)
36d802ae
BP
1459{
1460 struct ds ds;
1461
1462 ds_init(&ds);
1463 json_to_ds(json, flags, &ds);
1464 return ds_steal_cstr(&ds);
1465}
1466
1467/* Same as json_to_string(), but the output is appended to 'ds'. */
1468void
1469json_to_ds(const struct json *json, int flags, struct ds *ds)
f38b84ea
BP
1470{
1471 struct json_serializer s;
36d802ae
BP
1472
1473 s.ds = ds;
f38b84ea
BP
1474 s.depth = 0;
1475 s.flags = flags;
36d802ae 1476 json_serialize(json, &s);
f38b84ea
BP
1477}
1478
1479static void
36d802ae 1480json_serialize(const struct json *json, struct json_serializer *s)
f38b84ea 1481{
36d802ae 1482 struct ds *ds = s->ds;
f38b84ea
BP
1483
1484 switch (json->type) {
1485 case JSON_NULL:
1486 ds_put_cstr(ds, "null");
1487 break;
1488
1489 case JSON_FALSE:
1490 ds_put_cstr(ds, "false");
1491 break;
1492
1493 case JSON_TRUE:
1494 ds_put_cstr(ds, "true");
1495 break;
1496
1497 case JSON_OBJECT:
36d802ae 1498 json_serialize_object(json->u.object, s);
f38b84ea
BP
1499 break;
1500
1501 case JSON_ARRAY:
36d802ae 1502 json_serialize_array(&json->u.array, s);
f38b84ea
BP
1503 break;
1504
1505 case JSON_INTEGER:
1506 ds_put_format(ds, "%lld", json->u.integer);
1507 break;
1508
1509 case JSON_REAL:
1510 ds_put_format(ds, "%.*g", DBL_DIG, json->u.real);
1511 break;
1512
1513 case JSON_STRING:
36d802ae 1514 json_serialize_string(json->u.string, ds);
f38b84ea
BP
1515 break;
1516
1517 case JSON_N_TYPES:
1518 default:
1519 NOT_REACHED();
1520 }
1521}
1522
1523static void
1524indent_line(struct json_serializer *s)
1525{
1526 if (s->flags & JSSF_PRETTY) {
36d802ae
BP
1527 ds_put_char(s->ds, '\n');
1528 ds_put_char_multiple(s->ds, ' ', SPACES_PER_LEVEL * s->depth);
f38b84ea
BP
1529 }
1530}
1531
1532static void
36d802ae
BP
1533json_serialize_object_member(size_t i, const struct shash_node *node,
1534 struct json_serializer *s)
f38b84ea 1535{
36d802ae 1536 struct ds *ds = s->ds;
f38b84ea
BP
1537
1538 if (i) {
1539 ds_put_char(ds, ',');
1540 indent_line(s);
1541 }
1542
36d802ae 1543 json_serialize_string(node->name, ds);
f38b84ea
BP
1544 ds_put_char(ds, ':');
1545 if (s->flags & JSSF_PRETTY) {
1546 ds_put_char(ds, ' ');
1547 }
36d802ae 1548 json_serialize(node->data, s);
f38b84ea
BP
1549}
1550
1551static void
36d802ae 1552json_serialize_object(const struct shash *object, struct json_serializer *s)
f38b84ea 1553{
36d802ae 1554 struct ds *ds = s->ds;
f38b84ea
BP
1555
1556 ds_put_char(ds, '{');
1557
1558 s->depth++;
1559 indent_line(s);
1560
1561 if (s->flags & JSSF_SORT) {
1562 const struct shash_node **nodes;
1563 size_t n, i;
1564
1565 nodes = shash_sort(object);
1566 n = shash_count(object);
1567 for (i = 0; i < n; i++) {
36d802ae 1568 json_serialize_object_member(i, nodes[i], s);
f38b84ea
BP
1569 }
1570 free(nodes);
1571 } else {
1572 struct shash_node *node;
1573 size_t i;
1574
1575 i = 0;
1576 SHASH_FOR_EACH (node, object) {
36d802ae 1577 json_serialize_object_member(i++, node, s);
f38b84ea
BP
1578 }
1579 }
1580
1581 ds_put_char(ds, '}');
1582 s->depth--;
1583}
1584
1585static void
36d802ae 1586json_serialize_array(const struct json_array *array, struct json_serializer *s)
f38b84ea 1587{
36d802ae 1588 struct ds *ds = s->ds;
f38b84ea
BP
1589 size_t i;
1590
1591 ds_put_char(ds, '[');
1592 s->depth++;
1593
1594 if (array->n > 0) {
1595 indent_line(s);
1596
1597 for (i = 0; i < array->n; i++) {
1598 if (i) {
1599 ds_put_char(ds, ',');
1600 indent_line(s);
1601 }
36d802ae 1602 json_serialize(array->elems[i], s);
f38b84ea
BP
1603 }
1604 }
1605
1606 s->depth--;
1607 ds_put_char(ds, ']');
1608}
1609
1610static void
36d802ae 1611json_serialize_string(const char *string, struct ds *ds)
f38b84ea
BP
1612{
1613 uint8_t c;
1614
1615 ds_put_char(ds, '"');
1616 while ((c = *string++) != '\0') {
1617 switch (c) {
1618 case '"':
1619 ds_put_cstr(ds, "\\\"");
1620 break;
1621
1622 case '\\':
1623 ds_put_cstr(ds, "\\\\");
1624 break;
1625
1626 case '\b':
1627 ds_put_cstr(ds, "\\b");
1628 break;
1629
1630 case '\f':
1631 ds_put_cstr(ds, "\\f");
1632 break;
1633
1634 case '\n':
1635 ds_put_cstr(ds, "\\n");
1636 break;
1637
1638 case '\r':
1639 ds_put_cstr(ds, "\\r");
1640 break;
1641
1642 case '\t':
1643 ds_put_cstr(ds, "\\t");
1644 break;
1645
1646 default:
1647 if (c >= 32) {
1648 ds_put_char(ds, c);
1649 } else {
1650 ds_put_format(ds, "\\u%04x", c);
1651 }
1652 break;
1653 }
1654 }
1655 ds_put_char(ds, '"');
1656}