]> git.proxmox.com Git - mirror_qemu.git/blame - qobject/json-parser.c
qjson: Convert to parser to recursive descent
[mirror_qemu.git] / qobject / json-parser.c
CommitLineData
4a5fcab7
AL
1/*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
c96c84a9 14#include <stdarg.h>
4a5fcab7
AL
15
16#include "qemu-common.h"
7b1b5d19
PB
17#include "qapi/qmp/qstring.h"
18#include "qapi/qmp/qint.h"
19#include "qapi/qmp/qdict.h"
20#include "qapi/qmp/qlist.h"
21#include "qapi/qmp/qfloat.h"
22#include "qapi/qmp/qbool.h"
23#include "qapi/qmp/json-parser.h"
24#include "qapi/qmp/json-lexer.h"
4a5fcab7
AL
25
26typedef struct JSONParserContext
27{
ef749d07 28 Error *err;
65c0f1e9
MR
29 struct {
30 QObject **buf;
31 size_t pos;
32 size_t count;
33 } tokens;
4a5fcab7
AL
34} JSONParserContext;
35
36#define BUG_ON(cond) assert(!(cond))
37
38/**
39 * TODO
40 *
41 * 0) make errors meaningful again
42 * 1) add geometry information to tokens
43 * 3) should we return a parsed size?
44 * 4) deal with premature EOI
45 */
46
65c0f1e9 47static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
4a5fcab7
AL
48
49/**
50 * Token manipulators
51 *
52 * tokens are dictionaries that contain a type, a string value, and geometry information
53 * about a token identified by the lexer. These are routines that make working with
54 * these objects a bit easier.
55 */
56static const char *token_get_value(QObject *obj)
57{
58 return qdict_get_str(qobject_to_qdict(obj), "token");
59}
60
61static JSONTokenType token_get_type(QObject *obj)
62{
63 return qdict_get_int(qobject_to_qdict(obj), "type");
64}
65
4a5fcab7
AL
66/**
67 * Error handler
68 */
8b7968f7
SW
69static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
70 QObject *token, const char *msg, ...)
4a5fcab7 71{
c96c84a9 72 va_list ap;
ef749d07 73 char message[1024];
c96c84a9 74 va_start(ap, msg);
ef749d07 75 vsnprintf(message, sizeof(message), msg, ap);
c96c84a9 76 va_end(ap);
ef749d07
AL
77 if (ctxt->err) {
78 error_free(ctxt->err);
79 ctxt->err = NULL;
80 }
f231b88d 81 error_setg(&ctxt->err, "JSON parse error, %s", message);
4a5fcab7
AL
82}
83
84/**
85 * String helpers
86 *
87 * These helpers are used to unescape strings.
88 */
89static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
90{
91 if (wchar <= 0x007F) {
92 BUG_ON(buffer_length < 2);
93
94 buffer[0] = wchar & 0x7F;
95 buffer[1] = 0;
96 } else if (wchar <= 0x07FF) {
97 BUG_ON(buffer_length < 3);
98
99 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
100 buffer[1] = 0x80 | (wchar & 0x3F);
101 buffer[2] = 0;
102 } else {
103 BUG_ON(buffer_length < 4);
104
105 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
106 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
107 buffer[2] = 0x80 | (wchar & 0x3F);
108 buffer[3] = 0;
109 }
110}
111
112static int hex2decimal(char ch)
113{
114 if (ch >= '0' && ch <= '9') {
115 return (ch - '0');
116 } else if (ch >= 'a' && ch <= 'f') {
117 return 10 + (ch - 'a');
118 } else if (ch >= 'A' && ch <= 'F') {
119 return 10 + (ch - 'A');
120 }
121
122 return -1;
123}
124
125/**
126 * parse_string(): Parse a json string and return a QObject
127 *
128 * string
129 * ""
130 * " chars "
131 * chars
132 * char
133 * char chars
134 * char
135 * any-Unicode-character-
136 * except-"-or-\-or-
137 * control-character
138 * \"
139 * \\
140 * \/
141 * \b
142 * \f
143 * \n
144 * \r
145 * \t
146 * \u four-hex-digits
147 */
148static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
149{
150 const char *ptr = token_get_value(token);
151 QString *str;
152 int double_quote = 1;
153
154 if (*ptr == '"') {
155 double_quote = 1;
156 } else {
157 double_quote = 0;
158 }
159 ptr++;
160
161 str = qstring_new();
162 while (*ptr &&
163 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
164 if (*ptr == '\\') {
165 ptr++;
166
167 switch (*ptr) {
168 case '"':
169 qstring_append(str, "\"");
170 ptr++;
171 break;
172 case '\'':
173 qstring_append(str, "'");
174 ptr++;
175 break;
176 case '\\':
177 qstring_append(str, "\\");
178 ptr++;
179 break;
180 case '/':
181 qstring_append(str, "/");
182 ptr++;
183 break;
184 case 'b':
185 qstring_append(str, "\b");
186 ptr++;
187 break;
bd032695
LC
188 case 'f':
189 qstring_append(str, "\f");
190 ptr++;
191 break;
4a5fcab7
AL
192 case 'n':
193 qstring_append(str, "\n");
194 ptr++;
195 break;
196 case 'r':
197 qstring_append(str, "\r");
198 ptr++;
199 break;
200 case 't':
201 qstring_append(str, "\t");
202 ptr++;
203 break;
204 case 'u': {
205 uint16_t unicode_char = 0;
206 char utf8_char[4];
207 int i = 0;
208
209 ptr++;
210
211 for (i = 0; i < 4; i++) {
212 if (qemu_isxdigit(*ptr)) {
213 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
214 } else {
215 parse_error(ctxt, token,
216 "invalid hex escape sequence in string");
217 goto out;
218 }
219 ptr++;
220 }
221
222 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
223 qstring_append(str, utf8_char);
224 } break;
225 default:
226 parse_error(ctxt, token, "invalid escape sequence in string");
227 goto out;
228 }
229 } else {
230 char dummy[2];
231
232 dummy[0] = *ptr++;
233 dummy[1] = 0;
234
235 qstring_append(str, dummy);
236 }
237 }
238
4a5fcab7
AL
239 return str;
240
241out:
242 QDECREF(str);
243 return NULL;
244}
245
65c0f1e9
MR
246static QObject *parser_context_pop_token(JSONParserContext *ctxt)
247{
248 QObject *token;
249 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
250 token = ctxt->tokens.buf[ctxt->tokens.pos];
251 ctxt->tokens.pos++;
252 return token;
253}
254
255/* Note: parser_context_{peek|pop}_token do not increment the
256 * token object's refcount. In both cases the references will continue
257 * to be tracked and cleaned up in parser_context_free(), so do not
258 * attempt to free the token object.
259 */
260static QObject *parser_context_peek_token(JSONParserContext *ctxt)
261{
262 QObject *token;
263 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
264 token = ctxt->tokens.buf[ctxt->tokens.pos];
265 return token;
266}
267
65c0f1e9
MR
268static void tokens_append_from_iter(QObject *obj, void *opaque)
269{
270 JSONParserContext *ctxt = opaque;
271 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
272 ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
273 qobject_incref(obj);
274}
275
276static JSONParserContext *parser_context_new(QList *tokens)
277{
278 JSONParserContext *ctxt;
279 size_t count;
280
281 if (!tokens) {
282 return NULL;
283 }
284
285 count = qlist_size(tokens);
286 if (count == 0) {
287 return NULL;
288 }
289
290 ctxt = g_malloc0(sizeof(JSONParserContext));
291 ctxt->tokens.pos = 0;
292 ctxt->tokens.count = count;
293 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
294 qlist_iter(tokens, tokens_append_from_iter, ctxt);
295 ctxt->tokens.pos = 0;
296
297 return ctxt;
298}
299
300/* to support error propagation, ctxt->err must be freed separately */
301static void parser_context_free(JSONParserContext *ctxt)
302{
303 int i;
304 if (ctxt) {
305 for (i = 0; i < ctxt->tokens.count; i++) {
306 qobject_decref(ctxt->tokens.buf[i]);
307 }
308 g_free(ctxt->tokens.buf);
309 g_free(ctxt);
310 }
311}
312
4a5fcab7
AL
313/**
314 * Parsing rules
315 */
65c0f1e9 316static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
4a5fcab7 317{
11e8a46c 318 QObject *key = NULL, *token = NULL, *value, *peek;
4a5fcab7 319
65c0f1e9 320 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
321 if (peek == NULL) {
322 parse_error(ctxt, NULL, "premature EOI");
323 goto out;
324 }
325
65c0f1e9 326 key = parse_value(ctxt, ap);
d758d90f 327 if (!key || qobject_type(key) != QTYPE_QSTRING) {
4a5fcab7
AL
328 parse_error(ctxt, peek, "key is not a string in object");
329 goto out;
330 }
331
65c0f1e9 332 token = parser_context_pop_token(ctxt);
11e8a46c
AL
333 if (token == NULL) {
334 parse_error(ctxt, NULL, "premature EOI");
335 goto out;
336 }
337
c5461660 338 if (token_get_type(token) != JSON_COLON) {
4a5fcab7
AL
339 parse_error(ctxt, token, "missing : in object pair");
340 goto out;
341 }
342
65c0f1e9 343 value = parse_value(ctxt, ap);
4a5fcab7
AL
344 if (value == NULL) {
345 parse_error(ctxt, token, "Missing value in dict");
346 goto out;
347 }
348
349 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
350
4a5fcab7 351 qobject_decref(key);
4a5fcab7
AL
352
353 return 0;
354
355out:
4a5fcab7 356 qobject_decref(key);
4a5fcab7
AL
357
358 return -1;
359}
360
65c0f1e9 361static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
4a5fcab7
AL
362{
363 QDict *dict = NULL;
364 QObject *token, *peek;
4a5fcab7 365
65c0f1e9 366 token = parser_context_pop_token(ctxt);
d538b255 367 assert(token && token_get_type(token) == JSON_LCURLY);
4a5fcab7
AL
368
369 dict = qdict_new();
370
65c0f1e9 371 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
372 if (peek == NULL) {
373 parse_error(ctxt, NULL, "premature EOI");
374 goto out;
375 }
376
c5461660 377 if (token_get_type(peek) != JSON_RCURLY) {
65c0f1e9 378 if (parse_pair(ctxt, dict, ap) == -1) {
4a5fcab7
AL
379 goto out;
380 }
381
65c0f1e9 382 token = parser_context_pop_token(ctxt);
11e8a46c
AL
383 if (token == NULL) {
384 parse_error(ctxt, NULL, "premature EOI");
385 goto out;
386 }
387
c5461660
MA
388 while (token_get_type(token) != JSON_RCURLY) {
389 if (token_get_type(token) != JSON_COMMA) {
4a5fcab7
AL
390 parse_error(ctxt, token, "expected separator in dict");
391 goto out;
392 }
4a5fcab7 393
65c0f1e9 394 if (parse_pair(ctxt, dict, ap) == -1) {
4a5fcab7
AL
395 goto out;
396 }
397
65c0f1e9 398 token = parser_context_pop_token(ctxt);
11e8a46c
AL
399 if (token == NULL) {
400 parse_error(ctxt, NULL, "premature EOI");
401 goto out;
402 }
4a5fcab7 403 }
4a5fcab7 404 } else {
a491af47 405 (void)parser_context_pop_token(ctxt);
4a5fcab7
AL
406 }
407
4a5fcab7
AL
408 return QOBJECT(dict);
409
410out:
4a5fcab7
AL
411 QDECREF(dict);
412 return NULL;
413}
414
65c0f1e9 415static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
4a5fcab7
AL
416{
417 QList *list = NULL;
418 QObject *token, *peek;
4a5fcab7 419
65c0f1e9 420 token = parser_context_pop_token(ctxt);
d538b255 421 assert(token && token_get_type(token) == JSON_LSQUARE);
4a5fcab7
AL
422
423 list = qlist_new();
424
65c0f1e9 425 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
426 if (peek == NULL) {
427 parse_error(ctxt, NULL, "premature EOI");
428 goto out;
429 }
430
c5461660 431 if (token_get_type(peek) != JSON_RSQUARE) {
4a5fcab7
AL
432 QObject *obj;
433
65c0f1e9 434 obj = parse_value(ctxt, ap);
4a5fcab7
AL
435 if (obj == NULL) {
436 parse_error(ctxt, token, "expecting value");
437 goto out;
438 }
439
440 qlist_append_obj(list, obj);
441
65c0f1e9 442 token = parser_context_pop_token(ctxt);
11e8a46c
AL
443 if (token == NULL) {
444 parse_error(ctxt, NULL, "premature EOI");
445 goto out;
446 }
447
c5461660
MA
448 while (token_get_type(token) != JSON_RSQUARE) {
449 if (token_get_type(token) != JSON_COMMA) {
4a5fcab7
AL
450 parse_error(ctxt, token, "expected separator in list");
451 goto out;
452 }
453
65c0f1e9 454 obj = parse_value(ctxt, ap);
4a5fcab7
AL
455 if (obj == NULL) {
456 parse_error(ctxt, token, "expecting value");
457 goto out;
458 }
459
460 qlist_append_obj(list, obj);
461
65c0f1e9 462 token = parser_context_pop_token(ctxt);
11e8a46c
AL
463 if (token == NULL) {
464 parse_error(ctxt, NULL, "premature EOI");
465 goto out;
466 }
4a5fcab7 467 }
4a5fcab7 468 } else {
a491af47 469 (void)parser_context_pop_token(ctxt);
4a5fcab7
AL
470 }
471
4a5fcab7
AL
472 return QOBJECT(list);
473
474out:
4a5fcab7
AL
475 QDECREF(list);
476 return NULL;
477}
478
65c0f1e9 479static QObject *parse_keyword(JSONParserContext *ctxt)
4a5fcab7 480{
d538b255 481 QObject *token;
50e2a467 482 const char *val;
4a5fcab7 483
65c0f1e9 484 token = parser_context_pop_token(ctxt);
d538b255 485 assert(token && token_get_type(token) == JSON_KEYWORD);
50e2a467
MA
486 val = token_get_value(token);
487
488 if (!strcmp(val, "true")) {
d538b255 489 return QOBJECT(qbool_from_bool(true));
50e2a467 490 } else if (!strcmp(val, "false")) {
d538b255 491 return QOBJECT(qbool_from_bool(false));
50e2a467 492 } else if (!strcmp(val, "null")) {
d538b255 493 return qnull();
4a5fcab7 494 }
d538b255 495 parse_error(ctxt, token, "invalid keyword '%s'", val);
4a5fcab7
AL
496 return NULL;
497}
498
65c0f1e9 499static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
4a5fcab7 500{
d538b255 501 QObject *token;
6b9606f6 502 const char *val;
4a5fcab7
AL
503
504 if (ap == NULL) {
d538b255 505 return NULL;
4a5fcab7
AL
506 }
507
65c0f1e9 508 token = parser_context_pop_token(ctxt);
d538b255 509 assert(token && token_get_type(token) == JSON_ESCAPE);
6b9606f6
MA
510 val = token_get_value(token);
511
512 if (!strcmp(val, "%p")) {
d538b255 513 return va_arg(*ap, QObject *);
6b9606f6 514 } else if (!strcmp(val, "%i")) {
d538b255 515 return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
6b9606f6 516 } else if (!strcmp(val, "%d")) {
d538b255 517 return QOBJECT(qint_from_int(va_arg(*ap, int)));
6b9606f6 518 } else if (!strcmp(val, "%ld")) {
d538b255 519 return QOBJECT(qint_from_int(va_arg(*ap, long)));
6b9606f6
MA
520 } else if (!strcmp(val, "%lld") ||
521 !strcmp(val, "%I64d")) {
d538b255 522 return QOBJECT(qint_from_int(va_arg(*ap, long long)));
6b9606f6 523 } else if (!strcmp(val, "%s")) {
d538b255 524 return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
6b9606f6 525 } else if (!strcmp(val, "%f")) {
d538b255 526 return QOBJECT(qfloat_from_double(va_arg(*ap, double)));
4a5fcab7 527 }
4a5fcab7
AL
528 return NULL;
529}
530
65c0f1e9 531static QObject *parse_literal(JSONParserContext *ctxt)
4a5fcab7 532{
d538b255 533 QObject *token;
4a5fcab7 534
65c0f1e9 535 token = parser_context_pop_token(ctxt);
d538b255 536 assert(token);
11e8a46c 537
4a5fcab7
AL
538 switch (token_get_type(token)) {
539 case JSON_STRING:
d538b255 540 return QOBJECT(qstring_from_escaped_str(ctxt, token));
3d5b3ec6
MR
541 case JSON_INTEGER: {
542 /* A possibility exists that this is a whole-valued float where the
543 * fractional part was left out due to being 0 (.0). It's not a big
544 * deal to treat these as ints in the parser, so long as users of the
545 * resulting QObject know to expect a QInt in place of a QFloat in
546 * cases like these.
547 *
548 * However, in some cases these values will overflow/underflow a
549 * QInt/int64 container, thus we should assume these are to be handled
550 * as QFloats/doubles rather than silently changing their values.
551 *
552 * strtoll() indicates these instances by setting errno to ERANGE
553 */
554 int64_t value;
555
556 errno = 0; /* strtoll doesn't set errno on success */
557 value = strtoll(token_get_value(token), NULL, 10);
558 if (errno != ERANGE) {
d538b255 559 return QOBJECT(qint_from_int(value));
3d5b3ec6
MR
560 }
561 /* fall through to JSON_FLOAT */
562 }
4a5fcab7
AL
563 case JSON_FLOAT:
564 /* FIXME dependent on locale */
d538b255
MA
565 return QOBJECT(qfloat_from_double(strtod(token_get_value(token),
566 NULL)));
4a5fcab7 567 default:
d538b255 568 abort();
4a5fcab7 569 }
4a5fcab7
AL
570}
571
65c0f1e9 572static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
4a5fcab7 573{
d538b255 574 QObject *token;
4a5fcab7 575
d538b255
MA
576 token = parser_context_peek_token(ctxt);
577 if (token == NULL) {
578 parse_error(ctxt, NULL, "premature EOI");
579 return NULL;
4a5fcab7
AL
580 }
581
d538b255
MA
582 switch (token_get_type(token)) {
583 case JSON_LCURLY:
584 return parse_object(ctxt, ap);
585 case JSON_LSQUARE:
586 return parse_array(ctxt, ap);
587 case JSON_ESCAPE:
588 return parse_escape(ctxt, ap);
589 case JSON_INTEGER:
590 case JSON_FLOAT:
591 case JSON_STRING:
592 return parse_literal(ctxt);
593 case JSON_KEYWORD:
594 return parse_keyword(ctxt);
595 default:
596 parse_error(ctxt, token, "expecting value");
597 return NULL;
598 }
4a5fcab7
AL
599}
600
601QObject *json_parser_parse(QList *tokens, va_list *ap)
ef749d07
AL
602{
603 return json_parser_parse_err(tokens, ap, NULL);
604}
605
606QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
4a5fcab7 607{
65c0f1e9 608 JSONParserContext *ctxt = parser_context_new(tokens);
4a5fcab7
AL
609 QObject *result;
610
65c0f1e9 611 if (!ctxt) {
c1990ebf
MR
612 return NULL;
613 }
4a5fcab7 614
65c0f1e9
MR
615 result = parse_value(ctxt, ap);
616
617 error_propagate(errp, ctxt->err);
4a5fcab7 618
65c0f1e9 619 parser_context_free(ctxt);
ef749d07 620
4a5fcab7
AL
621 return result;
622}