]> git.proxmox.com Git - mirror_qemu.git/blob - qobject/json-parser.c
qjson: Give each of the six structural chars its own token type
[mirror_qemu.git] / qobject / json-parser.c
1 /*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include <stdarg.h>
15
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
25
26 typedef struct JSONParserContext
27 {
28 Error *err;
29 struct {
30 QObject **buf;
31 size_t pos;
32 size_t count;
33 } tokens;
34 } JSONParserContext;
35
36 #define BUG_ON(cond) assert(!(cond))
37
38 /**
39 * TODO
40 *
41 * 0) make errors meaningful again
42 * 1) add geometry information to tokens
43 * 3) should we return a parsed size?
44 * 4) deal with premature EOI
45 */
46
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48
49 /**
50 * Token manipulators
51 *
52 * tokens are dictionaries that contain a type, a string value, and geometry information
53 * about a token identified by the lexer. These are routines that make working with
54 * these objects a bit easier.
55 */
56 static const char *token_get_value(QObject *obj)
57 {
58 return qdict_get_str(qobject_to_qdict(obj), "token");
59 }
60
61 static JSONTokenType token_get_type(QObject *obj)
62 {
63 return qdict_get_int(qobject_to_qdict(obj), "type");
64 }
65
66 static int token_is_keyword(QObject *obj, const char *value)
67 {
68 if (token_get_type(obj) != JSON_KEYWORD) {
69 return 0;
70 }
71
72 return strcmp(token_get_value(obj), value) == 0;
73 }
74
75 static int token_is_escape(QObject *obj, const char *value)
76 {
77 if (token_get_type(obj) != JSON_ESCAPE) {
78 return 0;
79 }
80
81 return (strcmp(token_get_value(obj), value) == 0);
82 }
83
84 /**
85 * Error handler
86 */
87 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
88 QObject *token, const char *msg, ...)
89 {
90 va_list ap;
91 char message[1024];
92 va_start(ap, msg);
93 vsnprintf(message, sizeof(message), msg, ap);
94 va_end(ap);
95 if (ctxt->err) {
96 error_free(ctxt->err);
97 ctxt->err = NULL;
98 }
99 error_setg(&ctxt->err, "JSON parse error, %s", message);
100 }
101
102 /**
103 * String helpers
104 *
105 * These helpers are used to unescape strings.
106 */
107 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
108 {
109 if (wchar <= 0x007F) {
110 BUG_ON(buffer_length < 2);
111
112 buffer[0] = wchar & 0x7F;
113 buffer[1] = 0;
114 } else if (wchar <= 0x07FF) {
115 BUG_ON(buffer_length < 3);
116
117 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
118 buffer[1] = 0x80 | (wchar & 0x3F);
119 buffer[2] = 0;
120 } else {
121 BUG_ON(buffer_length < 4);
122
123 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
124 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
125 buffer[2] = 0x80 | (wchar & 0x3F);
126 buffer[3] = 0;
127 }
128 }
129
130 static int hex2decimal(char ch)
131 {
132 if (ch >= '0' && ch <= '9') {
133 return (ch - '0');
134 } else if (ch >= 'a' && ch <= 'f') {
135 return 10 + (ch - 'a');
136 } else if (ch >= 'A' && ch <= 'F') {
137 return 10 + (ch - 'A');
138 }
139
140 return -1;
141 }
142
143 /**
144 * parse_string(): Parse a json string and return a QObject
145 *
146 * string
147 * ""
148 * " chars "
149 * chars
150 * char
151 * char chars
152 * char
153 * any-Unicode-character-
154 * except-"-or-\-or-
155 * control-character
156 * \"
157 * \\
158 * \/
159 * \b
160 * \f
161 * \n
162 * \r
163 * \t
164 * \u four-hex-digits
165 */
166 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
167 {
168 const char *ptr = token_get_value(token);
169 QString *str;
170 int double_quote = 1;
171
172 if (*ptr == '"') {
173 double_quote = 1;
174 } else {
175 double_quote = 0;
176 }
177 ptr++;
178
179 str = qstring_new();
180 while (*ptr &&
181 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
182 if (*ptr == '\\') {
183 ptr++;
184
185 switch (*ptr) {
186 case '"':
187 qstring_append(str, "\"");
188 ptr++;
189 break;
190 case '\'':
191 qstring_append(str, "'");
192 ptr++;
193 break;
194 case '\\':
195 qstring_append(str, "\\");
196 ptr++;
197 break;
198 case '/':
199 qstring_append(str, "/");
200 ptr++;
201 break;
202 case 'b':
203 qstring_append(str, "\b");
204 ptr++;
205 break;
206 case 'f':
207 qstring_append(str, "\f");
208 ptr++;
209 break;
210 case 'n':
211 qstring_append(str, "\n");
212 ptr++;
213 break;
214 case 'r':
215 qstring_append(str, "\r");
216 ptr++;
217 break;
218 case 't':
219 qstring_append(str, "\t");
220 ptr++;
221 break;
222 case 'u': {
223 uint16_t unicode_char = 0;
224 char utf8_char[4];
225 int i = 0;
226
227 ptr++;
228
229 for (i = 0; i < 4; i++) {
230 if (qemu_isxdigit(*ptr)) {
231 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
232 } else {
233 parse_error(ctxt, token,
234 "invalid hex escape sequence in string");
235 goto out;
236 }
237 ptr++;
238 }
239
240 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
241 qstring_append(str, utf8_char);
242 } break;
243 default:
244 parse_error(ctxt, token, "invalid escape sequence in string");
245 goto out;
246 }
247 } else {
248 char dummy[2];
249
250 dummy[0] = *ptr++;
251 dummy[1] = 0;
252
253 qstring_append(str, dummy);
254 }
255 }
256
257 return str;
258
259 out:
260 QDECREF(str);
261 return NULL;
262 }
263
264 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
265 {
266 QObject *token;
267 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
268 token = ctxt->tokens.buf[ctxt->tokens.pos];
269 ctxt->tokens.pos++;
270 return token;
271 }
272
273 /* Note: parser_context_{peek|pop}_token do not increment the
274 * token object's refcount. In both cases the references will continue
275 * to be tracked and cleaned up in parser_context_free(), so do not
276 * attempt to free the token object.
277 */
278 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
279 {
280 QObject *token;
281 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
282 token = ctxt->tokens.buf[ctxt->tokens.pos];
283 return token;
284 }
285
286 static JSONParserContext parser_context_save(JSONParserContext *ctxt)
287 {
288 JSONParserContext saved_ctxt = {0};
289 saved_ctxt.tokens.pos = ctxt->tokens.pos;
290 saved_ctxt.tokens.count = ctxt->tokens.count;
291 saved_ctxt.tokens.buf = ctxt->tokens.buf;
292 return saved_ctxt;
293 }
294
295 static void parser_context_restore(JSONParserContext *ctxt,
296 JSONParserContext saved_ctxt)
297 {
298 ctxt->tokens.pos = saved_ctxt.tokens.pos;
299 ctxt->tokens.count = saved_ctxt.tokens.count;
300 ctxt->tokens.buf = saved_ctxt.tokens.buf;
301 }
302
303 static void tokens_append_from_iter(QObject *obj, void *opaque)
304 {
305 JSONParserContext *ctxt = opaque;
306 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
307 ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
308 qobject_incref(obj);
309 }
310
311 static JSONParserContext *parser_context_new(QList *tokens)
312 {
313 JSONParserContext *ctxt;
314 size_t count;
315
316 if (!tokens) {
317 return NULL;
318 }
319
320 count = qlist_size(tokens);
321 if (count == 0) {
322 return NULL;
323 }
324
325 ctxt = g_malloc0(sizeof(JSONParserContext));
326 ctxt->tokens.pos = 0;
327 ctxt->tokens.count = count;
328 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
329 qlist_iter(tokens, tokens_append_from_iter, ctxt);
330 ctxt->tokens.pos = 0;
331
332 return ctxt;
333 }
334
335 /* to support error propagation, ctxt->err must be freed separately */
336 static void parser_context_free(JSONParserContext *ctxt)
337 {
338 int i;
339 if (ctxt) {
340 for (i = 0; i < ctxt->tokens.count; i++) {
341 qobject_decref(ctxt->tokens.buf[i]);
342 }
343 g_free(ctxt->tokens.buf);
344 g_free(ctxt);
345 }
346 }
347
348 /**
349 * Parsing rules
350 */
351 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
352 {
353 QObject *key = NULL, *token = NULL, *value, *peek;
354 JSONParserContext saved_ctxt = parser_context_save(ctxt);
355
356 peek = parser_context_peek_token(ctxt);
357 if (peek == NULL) {
358 parse_error(ctxt, NULL, "premature EOI");
359 goto out;
360 }
361
362 key = parse_value(ctxt, ap);
363 if (!key || qobject_type(key) != QTYPE_QSTRING) {
364 parse_error(ctxt, peek, "key is not a string in object");
365 goto out;
366 }
367
368 token = parser_context_pop_token(ctxt);
369 if (token == NULL) {
370 parse_error(ctxt, NULL, "premature EOI");
371 goto out;
372 }
373
374 if (token_get_type(token) != JSON_COLON) {
375 parse_error(ctxt, token, "missing : in object pair");
376 goto out;
377 }
378
379 value = parse_value(ctxt, ap);
380 if (value == NULL) {
381 parse_error(ctxt, token, "Missing value in dict");
382 goto out;
383 }
384
385 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
386
387 qobject_decref(key);
388
389 return 0;
390
391 out:
392 parser_context_restore(ctxt, saved_ctxt);
393 qobject_decref(key);
394
395 return -1;
396 }
397
398 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
399 {
400 QDict *dict = NULL;
401 QObject *token, *peek;
402 JSONParserContext saved_ctxt = parser_context_save(ctxt);
403
404 token = parser_context_pop_token(ctxt);
405 if (token == NULL) {
406 goto out;
407 }
408
409 if (token_get_type(token) != JSON_LCURLY) {
410 goto out;
411 }
412
413 dict = qdict_new();
414
415 peek = parser_context_peek_token(ctxt);
416 if (peek == NULL) {
417 parse_error(ctxt, NULL, "premature EOI");
418 goto out;
419 }
420
421 if (token_get_type(peek) != JSON_RCURLY) {
422 if (parse_pair(ctxt, dict, ap) == -1) {
423 goto out;
424 }
425
426 token = parser_context_pop_token(ctxt);
427 if (token == NULL) {
428 parse_error(ctxt, NULL, "premature EOI");
429 goto out;
430 }
431
432 while (token_get_type(token) != JSON_RCURLY) {
433 if (token_get_type(token) != JSON_COMMA) {
434 parse_error(ctxt, token, "expected separator in dict");
435 goto out;
436 }
437
438 if (parse_pair(ctxt, dict, ap) == -1) {
439 goto out;
440 }
441
442 token = parser_context_pop_token(ctxt);
443 if (token == NULL) {
444 parse_error(ctxt, NULL, "premature EOI");
445 goto out;
446 }
447 }
448 } else {
449 (void)parser_context_pop_token(ctxt);
450 }
451
452 return QOBJECT(dict);
453
454 out:
455 parser_context_restore(ctxt, saved_ctxt);
456 QDECREF(dict);
457 return NULL;
458 }
459
460 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
461 {
462 QList *list = NULL;
463 QObject *token, *peek;
464 JSONParserContext saved_ctxt = parser_context_save(ctxt);
465
466 token = parser_context_pop_token(ctxt);
467 if (token == NULL) {
468 goto out;
469 }
470
471 if (token_get_type(token) != JSON_LSQUARE) {
472 goto out;
473 }
474
475 list = qlist_new();
476
477 peek = parser_context_peek_token(ctxt);
478 if (peek == NULL) {
479 parse_error(ctxt, NULL, "premature EOI");
480 goto out;
481 }
482
483 if (token_get_type(peek) != JSON_RSQUARE) {
484 QObject *obj;
485
486 obj = parse_value(ctxt, ap);
487 if (obj == NULL) {
488 parse_error(ctxt, token, "expecting value");
489 goto out;
490 }
491
492 qlist_append_obj(list, obj);
493
494 token = parser_context_pop_token(ctxt);
495 if (token == NULL) {
496 parse_error(ctxt, NULL, "premature EOI");
497 goto out;
498 }
499
500 while (token_get_type(token) != JSON_RSQUARE) {
501 if (token_get_type(token) != JSON_COMMA) {
502 parse_error(ctxt, token, "expected separator in list");
503 goto out;
504 }
505
506 obj = parse_value(ctxt, ap);
507 if (obj == NULL) {
508 parse_error(ctxt, token, "expecting value");
509 goto out;
510 }
511
512 qlist_append_obj(list, obj);
513
514 token = parser_context_pop_token(ctxt);
515 if (token == NULL) {
516 parse_error(ctxt, NULL, "premature EOI");
517 goto out;
518 }
519 }
520 } else {
521 (void)parser_context_pop_token(ctxt);
522 }
523
524 return QOBJECT(list);
525
526 out:
527 parser_context_restore(ctxt, saved_ctxt);
528 QDECREF(list);
529 return NULL;
530 }
531
532 static QObject *parse_keyword(JSONParserContext *ctxt)
533 {
534 QObject *token, *ret;
535 JSONParserContext saved_ctxt = parser_context_save(ctxt);
536
537 token = parser_context_pop_token(ctxt);
538 if (token == NULL) {
539 goto out;
540 }
541
542 if (token_get_type(token) != JSON_KEYWORD) {
543 goto out;
544 }
545
546 if (token_is_keyword(token, "true")) {
547 ret = QOBJECT(qbool_from_bool(true));
548 } else if (token_is_keyword(token, "false")) {
549 ret = QOBJECT(qbool_from_bool(false));
550 } else if (token_is_keyword(token, "null")) {
551 ret = qnull();
552 } else {
553 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
554 goto out;
555 }
556
557 return ret;
558
559 out:
560 parser_context_restore(ctxt, saved_ctxt);
561
562 return NULL;
563 }
564
565 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
566 {
567 QObject *token = NULL, *obj;
568 JSONParserContext saved_ctxt = parser_context_save(ctxt);
569
570 if (ap == NULL) {
571 goto out;
572 }
573
574 token = parser_context_pop_token(ctxt);
575 if (token == NULL) {
576 goto out;
577 }
578
579 if (token_is_escape(token, "%p")) {
580 obj = va_arg(*ap, QObject *);
581 } else if (token_is_escape(token, "%i")) {
582 obj = QOBJECT(qbool_from_bool(va_arg(*ap, int)));
583 } else if (token_is_escape(token, "%d")) {
584 obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
585 } else if (token_is_escape(token, "%ld")) {
586 obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
587 } else if (token_is_escape(token, "%lld") ||
588 token_is_escape(token, "%I64d")) {
589 obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
590 } else if (token_is_escape(token, "%s")) {
591 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
592 } else if (token_is_escape(token, "%f")) {
593 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
594 } else {
595 goto out;
596 }
597
598 return obj;
599
600 out:
601 parser_context_restore(ctxt, saved_ctxt);
602
603 return NULL;
604 }
605
606 static QObject *parse_literal(JSONParserContext *ctxt)
607 {
608 QObject *token, *obj;
609 JSONParserContext saved_ctxt = parser_context_save(ctxt);
610
611 token = parser_context_pop_token(ctxt);
612 if (token == NULL) {
613 goto out;
614 }
615
616 switch (token_get_type(token)) {
617 case JSON_STRING:
618 obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
619 break;
620 case JSON_INTEGER: {
621 /* A possibility exists that this is a whole-valued float where the
622 * fractional part was left out due to being 0 (.0). It's not a big
623 * deal to treat these as ints in the parser, so long as users of the
624 * resulting QObject know to expect a QInt in place of a QFloat in
625 * cases like these.
626 *
627 * However, in some cases these values will overflow/underflow a
628 * QInt/int64 container, thus we should assume these are to be handled
629 * as QFloats/doubles rather than silently changing their values.
630 *
631 * strtoll() indicates these instances by setting errno to ERANGE
632 */
633 int64_t value;
634
635 errno = 0; /* strtoll doesn't set errno on success */
636 value = strtoll(token_get_value(token), NULL, 10);
637 if (errno != ERANGE) {
638 obj = QOBJECT(qint_from_int(value));
639 break;
640 }
641 /* fall through to JSON_FLOAT */
642 }
643 case JSON_FLOAT:
644 /* FIXME dependent on locale */
645 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
646 break;
647 default:
648 goto out;
649 }
650
651 return obj;
652
653 out:
654 parser_context_restore(ctxt, saved_ctxt);
655
656 return NULL;
657 }
658
659 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
660 {
661 QObject *obj;
662
663 obj = parse_object(ctxt, ap);
664 if (obj == NULL) {
665 obj = parse_array(ctxt, ap);
666 }
667 if (obj == NULL) {
668 obj = parse_escape(ctxt, ap);
669 }
670 if (obj == NULL) {
671 obj = parse_keyword(ctxt);
672 }
673 if (obj == NULL) {
674 obj = parse_literal(ctxt);
675 }
676
677 return obj;
678 }
679
680 QObject *json_parser_parse(QList *tokens, va_list *ap)
681 {
682 return json_parser_parse_err(tokens, ap, NULL);
683 }
684
685 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
686 {
687 JSONParserContext *ctxt = parser_context_new(tokens);
688 QObject *result;
689
690 if (!ctxt) {
691 return NULL;
692 }
693
694 result = parse_value(ctxt, ap);
695
696 error_propagate(errp, ctxt->err);
697
698 parser_context_free(ctxt);
699
700 return result;
701 }