]> git.proxmox.com Git - mirror_qemu.git/blame - qobject/json-parser.c
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
[mirror_qemu.git] / qobject / json-parser.c
CommitLineData
4a5fcab7 1/*
6e8e5cb9 2 * JSON Parser
4a5fcab7
AL
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
f2ad72b3 14#include "qemu/osdep.h"
856dfd8a 15#include "qemu/ctype.h"
2bc7cfea 16#include "qemu/cutils.h"
e59f39d4 17#include "qemu/unicode.h"
da34e65c 18#include "qapi/error.h"
6b673957 19#include "qapi/qmp/qbool.h"
452fcdbc 20#include "qapi/qmp/qdict.h"
47e6b297 21#include "qapi/qmp/qlist.h"
15280c36
MA
22#include "qapi/qmp/qnull.h"
23#include "qapi/qmp/qnum.h"
6b673957 24#include "qapi/qmp/qstring.h"
86cdf9ec 25#include "json-parser-int.h"
4a5fcab7 26
abe7c206
MA
27struct JSONToken {
28 JSONTokenType type;
29 int x;
30 int y;
31 char str[];
32};
33
4a5fcab7
AL
34typedef struct JSONParserContext
35{
ef749d07 36 Error *err;
9bada897 37 JSONToken *current;
95385fe9 38 GQueue *buf;
ada74c3b 39 va_list *ap;
4a5fcab7
AL
40} JSONParserContext;
41
42#define BUG_ON(cond) assert(!(cond))
43
44/**
45 * TODO
46 *
47 * 0) make errors meaningful again
48 * 1) add geometry information to tokens
49 * 3) should we return a parsed size?
50 * 4) deal with premature EOI
51 */
52
ada74c3b 53static QObject *parse_value(JSONParserContext *ctxt);
4a5fcab7 54
4a5fcab7
AL
55/**
56 * Error handler
57 */
8b7968f7 58static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
9bada897 59 JSONToken *token, const char *msg, ...)
4a5fcab7 60{
c96c84a9 61 va_list ap;
ef749d07 62 char message[1024];
574bf16f
MA
63
64 if (ctxt->err) {
65 return;
66 }
c96c84a9 67 va_start(ap, msg);
ef749d07 68 vsnprintf(message, sizeof(message), msg, ap);
c96c84a9 69 va_end(ap);
f231b88d 70 error_setg(&ctxt->err, "JSON parse error, %s", message);
4a5fcab7
AL
71}
72
dc45a07c 73static int cvt4hex(const char *s)
4a5fcab7 74{
dc45a07c
MA
75 int cp, i;
76
77 cp = 0;
78 for (i = 0; i < 4; i++) {
79 if (!qemu_isxdigit(s[i])) {
80 return -1;
81 }
82 cp <<= 4;
83 if (s[i] >= '0' && s[i] <= '9') {
84 cp |= s[i] - '0';
85 } else if (s[i] >= 'a' && s[i] <= 'f') {
86 cp |= 10 + s[i] - 'a';
87 } else if (s[i] >= 'A' && s[i] <= 'F') {
88 cp |= 10 + s[i] - 'A';
89 } else {
90 return -1;
91 }
4a5fcab7 92 }
dc45a07c 93 return cp;
4a5fcab7
AL
94}
95
96/**
b2da4a4d 97 * parse_string(): Parse a JSON string
4a5fcab7 98 *
b2da4a4d
MA
99 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
100 * Interchange Format":
101 *
102 * char = unescaped /
103 * escape (
104 * %x22 / ; " quotation mark U+0022
105 * %x5C / ; \ reverse solidus U+005C
106 * %x2F / ; / solidus U+002F
107 * %x62 / ; b backspace U+0008
108 * %x66 / ; f form feed U+000C
109 * %x6E / ; n line feed U+000A
110 * %x72 / ; r carriage return U+000D
111 * %x74 / ; t tab U+0009
112 * %x75 4HEXDIG ) ; uXXXX U+XXXX
113 * escape = %x5C ; \
114 * quotation-mark = %x22 ; "
115 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
116 *
117 * Extensions over RFC 8259:
118 * - Extra escape sequence in strings:
119 * 0x27 (apostrophe) is recognized after escape, too
120 * - Single-quoted strings:
121 * Like double-quoted strings, except they're delimited by %x27
122 * (apostrophe) instead of %x22 (quotation mark), and can't contain
123 * unescaped apostrophe, but can contain unescaped quotation mark.
124 *
125 * Note:
126 * - Encoding is modified UTF-8.
127 * - Invalid Unicode characters are rejected.
128 * - Control characters \x00..\x1F are rejected by the lexer.
4a5fcab7 129 */
b2da4a4d 130static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
4a5fcab7 131{
9bada897 132 const char *ptr = token->str;
4a5fcab7 133 QString *str;
00ea57fa 134 char quote;
dc45a07c
MA
135 const char *beg;
136 int cp, trailing;
e59f39d4
MA
137 char *end;
138 ssize_t len;
139 char utf8_buf[5];
4a5fcab7 140
00ea57fa
MA
141 assert(*ptr == '"' || *ptr == '\'');
142 quote = *ptr++;
4a5fcab7 143 str = qstring_new();
00ea57fa
MA
144
145 while (*ptr != quote) {
146 assert(*ptr);
16a48599
MA
147 switch (*ptr) {
148 case '\\':
dc45a07c 149 beg = ptr++;
00ea57fa 150 switch (*ptr++) {
4a5fcab7 151 case '"':
de6decfe 152 qstring_append_chr(str, '"');
4a5fcab7
AL
153 break;
154 case '\'':
de6decfe 155 qstring_append_chr(str, '\'');
4a5fcab7
AL
156 break;
157 case '\\':
de6decfe 158 qstring_append_chr(str, '\\');
4a5fcab7
AL
159 break;
160 case '/':
de6decfe 161 qstring_append_chr(str, '/');
4a5fcab7
AL
162 break;
163 case 'b':
de6decfe 164 qstring_append_chr(str, '\b');
4a5fcab7 165 break;
bd032695 166 case 'f':
de6decfe 167 qstring_append_chr(str, '\f');
bd032695 168 break;
4a5fcab7 169 case 'n':
de6decfe 170 qstring_append_chr(str, '\n');
4a5fcab7
AL
171 break;
172 case 'r':
de6decfe 173 qstring_append_chr(str, '\r');
4a5fcab7
AL
174 break;
175 case 't':
de6decfe 176 qstring_append_chr(str, '\t');
4a5fcab7 177 break;
de6decfe 178 case 'u':
dc45a07c
MA
179 cp = cvt4hex(ptr);
180 ptr += 4;
181
182 /* handle surrogate pairs */
183 if (cp >= 0xD800 && cp <= 0xDBFF
184 && ptr[0] == '\\' && ptr[1] == 'u') {
185 /* leading surrogate followed by \u */
186 cp = 0x10000 + ((cp & 0x3FF) << 10);
187 trailing = cvt4hex(ptr + 2);
188 if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
189 /* followed by trailing surrogate */
190 cp |= trailing & 0x3FF;
191 ptr += 6;
192 } else {
193 cp = -1; /* invalid */
4a5fcab7 194 }
4a5fcab7
AL
195 }
196
46a628b1
MA
197 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
198 parse_error(ctxt, token,
dc45a07c
MA
199 "%.*s is not a valid Unicode character",
200 (int)(ptr - beg), beg);
46a628b1
MA
201 goto out;
202 }
de6decfe
MA
203 qstring_append(str, utf8_buf);
204 break;
4a5fcab7
AL
205 default:
206 parse_error(ctxt, token, "invalid escape sequence in string");
207 goto out;
208 }
16a48599
MA
209 break;
210 case '%':
bbc0586c
CF
211 if (ctxt->ap) {
212 if (ptr[1] != '%') {
213 parse_error(ctxt, token, "can't interpolate into string");
214 goto out;
215 }
216 ptr++;
16a48599
MA
217 }
218 /* fall through */
219 default:
e59f39d4 220 cp = mod_utf8_codepoint(ptr, 6, &end);
4b1c0cd7 221 if (cp < 0) {
e59f39d4
MA
222 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
223 goto out;
224 }
225 ptr = end;
226 len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
227 assert(len >= 0);
228 qstring_append(str, utf8_buf);
4a5fcab7
AL
229 }
230 }
231
4a5fcab7
AL
232 return str;
233
234out:
cb3e7f08 235 qobject_unref(str);
4a5fcab7
AL
236 return NULL;
237}
238
9bada897
PB
239/* Note: the token object returned by parser_context_peek_token or
240 * parser_context_pop_token is deleted as soon as parser_context_pop_token
241 * is called again.
95385fe9 242 */
9bada897 243static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
65c0f1e9 244{
9bada897 245 g_free(ctxt->current);
95385fe9
PB
246 ctxt->current = g_queue_pop_head(ctxt->buf);
247 return ctxt->current;
65c0f1e9
MR
248}
249
9bada897 250static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
65c0f1e9 251{
95385fe9 252 return g_queue_peek_head(ctxt->buf);
65c0f1e9
MR
253}
254
4a5fcab7
AL
255/**
256 * Parsing rules
257 */
ada74c3b 258static int parse_pair(JSONParserContext *ctxt, QDict *dict)
4a5fcab7 259{
532fb532
HR
260 QObject *value;
261 QString *key = NULL;
9bada897 262 JSONToken *peek, *token;
4a5fcab7 263
65c0f1e9 264 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
265 if (peek == NULL) {
266 parse_error(ctxt, NULL, "premature EOI");
267 goto out;
268 }
269
ada74c3b 270 key = qobject_to(QString, parse_value(ctxt));
532fb532 271 if (!key) {
4a5fcab7
AL
272 parse_error(ctxt, peek, "key is not a string in object");
273 goto out;
274 }
275
65c0f1e9 276 token = parser_context_pop_token(ctxt);
11e8a46c
AL
277 if (token == NULL) {
278 parse_error(ctxt, NULL, "premature EOI");
279 goto out;
280 }
281
9bada897 282 if (token->type != JSON_COLON) {
4a5fcab7
AL
283 parse_error(ctxt, token, "missing : in object pair");
284 goto out;
285 }
286
ada74c3b 287 value = parse_value(ctxt);
4a5fcab7
AL
288 if (value == NULL) {
289 parse_error(ctxt, token, "Missing value in dict");
290 goto out;
291 }
00382fa8
MA
292
293 if (qdict_haskey(dict, qstring_get_str(key))) {
294 parse_error(ctxt, token, "duplicate key");
295 goto out;
296 }
4a5fcab7 297
532fb532 298 qdict_put_obj(dict, qstring_get_str(key), value);
4a5fcab7 299
cb3e7f08 300 qobject_unref(key);
4a5fcab7
AL
301
302 return 0;
303
304out:
cb3e7f08 305 qobject_unref(key);
4a5fcab7
AL
306
307 return -1;
308}
309
ada74c3b 310static QObject *parse_object(JSONParserContext *ctxt)
4a5fcab7
AL
311{
312 QDict *dict = NULL;
9bada897 313 JSONToken *token, *peek;
4a5fcab7 314
65c0f1e9 315 token = parser_context_pop_token(ctxt);
9bada897 316 assert(token && token->type == JSON_LCURLY);
4a5fcab7
AL
317
318 dict = qdict_new();
319
65c0f1e9 320 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
321 if (peek == NULL) {
322 parse_error(ctxt, NULL, "premature EOI");
323 goto out;
324 }
325
9bada897 326 if (peek->type != JSON_RCURLY) {
ada74c3b 327 if (parse_pair(ctxt, dict) == -1) {
4a5fcab7
AL
328 goto out;
329 }
330
65c0f1e9 331 token = parser_context_pop_token(ctxt);
11e8a46c
AL
332 if (token == NULL) {
333 parse_error(ctxt, NULL, "premature EOI");
334 goto out;
335 }
336
9bada897
PB
337 while (token->type != JSON_RCURLY) {
338 if (token->type != JSON_COMMA) {
4a5fcab7
AL
339 parse_error(ctxt, token, "expected separator in dict");
340 goto out;
341 }
4a5fcab7 342
ada74c3b 343 if (parse_pair(ctxt, dict) == -1) {
4a5fcab7
AL
344 goto out;
345 }
346
65c0f1e9 347 token = parser_context_pop_token(ctxt);
11e8a46c
AL
348 if (token == NULL) {
349 parse_error(ctxt, NULL, "premature EOI");
350 goto out;
351 }
4a5fcab7 352 }
4a5fcab7 353 } else {
a491af47 354 (void)parser_context_pop_token(ctxt);
4a5fcab7
AL
355 }
356
4a5fcab7
AL
357 return QOBJECT(dict);
358
359out:
cb3e7f08 360 qobject_unref(dict);
4a5fcab7
AL
361 return NULL;
362}
363
ada74c3b 364static QObject *parse_array(JSONParserContext *ctxt)
4a5fcab7
AL
365{
366 QList *list = NULL;
9bada897 367 JSONToken *token, *peek;
4a5fcab7 368
65c0f1e9 369 token = parser_context_pop_token(ctxt);
9bada897 370 assert(token && token->type == JSON_LSQUARE);
4a5fcab7
AL
371
372 list = qlist_new();
373
65c0f1e9 374 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
375 if (peek == NULL) {
376 parse_error(ctxt, NULL, "premature EOI");
377 goto out;
378 }
379
9bada897 380 if (peek->type != JSON_RSQUARE) {
4a5fcab7
AL
381 QObject *obj;
382
ada74c3b 383 obj = parse_value(ctxt);
4a5fcab7
AL
384 if (obj == NULL) {
385 parse_error(ctxt, token, "expecting value");
386 goto out;
387 }
388
389 qlist_append_obj(list, obj);
390
65c0f1e9 391 token = parser_context_pop_token(ctxt);
11e8a46c
AL
392 if (token == NULL) {
393 parse_error(ctxt, NULL, "premature EOI");
394 goto out;
395 }
396
9bada897
PB
397 while (token->type != JSON_RSQUARE) {
398 if (token->type != JSON_COMMA) {
4a5fcab7
AL
399 parse_error(ctxt, token, "expected separator in list");
400 goto out;
401 }
402
ada74c3b 403 obj = parse_value(ctxt);
4a5fcab7
AL
404 if (obj == NULL) {
405 parse_error(ctxt, token, "expecting value");
406 goto out;
407 }
408
409 qlist_append_obj(list, obj);
410
65c0f1e9 411 token = parser_context_pop_token(ctxt);
11e8a46c
AL
412 if (token == NULL) {
413 parse_error(ctxt, NULL, "premature EOI");
414 goto out;
415 }
4a5fcab7 416 }
4a5fcab7 417 } else {
a491af47 418 (void)parser_context_pop_token(ctxt);
4a5fcab7
AL
419 }
420
4a5fcab7
AL
421 return QOBJECT(list);
422
423out:
cb3e7f08 424 qobject_unref(list);
4a5fcab7
AL
425 return NULL;
426}
427
65c0f1e9 428static QObject *parse_keyword(JSONParserContext *ctxt)
4a5fcab7 429{
9bada897 430 JSONToken *token;
4a5fcab7 431
65c0f1e9 432 token = parser_context_pop_token(ctxt);
9bada897 433 assert(token && token->type == JSON_KEYWORD);
50e2a467 434
9bada897 435 if (!strcmp(token->str, "true")) {
d538b255 436 return QOBJECT(qbool_from_bool(true));
9bada897 437 } else if (!strcmp(token->str, "false")) {
d538b255 438 return QOBJECT(qbool_from_bool(false));
9bada897 439 } else if (!strcmp(token->str, "null")) {
006ca09f 440 return QOBJECT(qnull());
4a5fcab7 441 }
9bada897 442 parse_error(ctxt, token, "invalid keyword '%s'", token->str);
4a5fcab7
AL
443 return NULL;
444}
445
ada74c3b 446static QObject *parse_interpolation(JSONParserContext *ctxt)
4a5fcab7 447{
9bada897 448 JSONToken *token;
4a5fcab7 449
65c0f1e9 450 token = parser_context_pop_token(ctxt);
61030280 451 assert(token && token->type == JSON_INTERP);
6b9606f6 452
9bada897 453 if (!strcmp(token->str, "%p")) {
ada74c3b 454 return va_arg(*ctxt->ap, QObject *);
9bada897 455 } else if (!strcmp(token->str, "%i")) {
ada74c3b 456 return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
9bada897 457 } else if (!strcmp(token->str, "%d")) {
ada74c3b 458 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
9bada897 459 } else if (!strcmp(token->str, "%ld")) {
ada74c3b 460 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
53a0d616 461 } else if (!strcmp(token->str, "%lld")) {
ada74c3b 462 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
53a0d616 463 } else if (!strcmp(token->str, "%" PRId64)) {
ada74c3b 464 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
2bc7cfea 465 } else if (!strcmp(token->str, "%u")) {
ada74c3b 466 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
2bc7cfea 467 } else if (!strcmp(token->str, "%lu")) {
ada74c3b 468 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
53a0d616 469 } else if (!strcmp(token->str, "%llu")) {
ada74c3b 470 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
53a0d616 471 } else if (!strcmp(token->str, "%" PRIu64)) {
ada74c3b 472 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
9bada897 473 } else if (!strcmp(token->str, "%s")) {
ada74c3b 474 return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
9bada897 475 } else if (!strcmp(token->str, "%f")) {
ada74c3b 476 return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
4a5fcab7 477 }
f7617d45 478 parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
4a5fcab7
AL
479 return NULL;
480}
481
65c0f1e9 482static QObject *parse_literal(JSONParserContext *ctxt)
4a5fcab7 483{
9bada897 484 JSONToken *token;
4a5fcab7 485
65c0f1e9 486 token = parser_context_pop_token(ctxt);
d538b255 487 assert(token);
11e8a46c 488
9bada897 489 switch (token->type) {
4a5fcab7 490 case JSON_STRING:
b2da4a4d 491 return QOBJECT(parse_string(ctxt, token));
3d5b3ec6 492 case JSON_INTEGER: {
01b2ffce
MAL
493 /*
494 * Represent JSON_INTEGER as QNUM_I64 if possible, else as
2bc7cfea
MAL
495 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
496 * and qemu_strtou64() fail with ERANGE when it's not
497 * possible.
3d5b3ec6 498 *
01b2ffce 499 * qnum_get_int() will then work for any signed 64-bit
2bc7cfea
MAL
500 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
501 * integer, and qnum_get_double() both for any JSON_INTEGER
502 * and any JSON_FLOAT (with precision loss for integers beyond
503 * 53 bits)
3d5b3ec6 504 */
2bc7cfea 505 int ret;
3d5b3ec6 506 int64_t value;
2bc7cfea 507 uint64_t uvalue;
3d5b3ec6 508
2bc7cfea
MAL
509 ret = qemu_strtoi64(token->str, NULL, 10, &value);
510 if (!ret) {
01b2ffce 511 return QOBJECT(qnum_from_int(value));
3d5b3ec6 512 }
2bc7cfea
MAL
513 assert(ret == -ERANGE);
514
515 if (token->str[0] != '-') {
516 ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
517 if (!ret) {
518 return QOBJECT(qnum_from_uint(uvalue));
519 }
520 assert(ret == -ERANGE);
521 }
3d5b3ec6
MR
522 /* fall through to JSON_FLOAT */
523 }
4a5fcab7 524 case JSON_FLOAT:
6e8e5cb9 525 /* FIXME dependent on locale; a pervasive issue in QEMU */
37aded92 526 /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
6e8e5cb9 527 * but those might be useful extensions beyond JSON */
01b2ffce 528 return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
4a5fcab7 529 default:
d538b255 530 abort();
4a5fcab7 531 }
4a5fcab7
AL
532}
533
ada74c3b 534static QObject *parse_value(JSONParserContext *ctxt)
4a5fcab7 535{
9bada897 536 JSONToken *token;
4a5fcab7 537
d538b255
MA
538 token = parser_context_peek_token(ctxt);
539 if (token == NULL) {
540 parse_error(ctxt, NULL, "premature EOI");
541 return NULL;
4a5fcab7
AL
542 }
543
9bada897 544 switch (token->type) {
d538b255 545 case JSON_LCURLY:
ada74c3b 546 return parse_object(ctxt);
d538b255 547 case JSON_LSQUARE:
ada74c3b 548 return parse_array(ctxt);
61030280 549 case JSON_INTERP:
ada74c3b 550 return parse_interpolation(ctxt);
d538b255
MA
551 case JSON_INTEGER:
552 case JSON_FLOAT:
553 case JSON_STRING:
554 return parse_literal(ctxt);
555 case JSON_KEYWORD:
556 return parse_keyword(ctxt);
557 default:
558 parse_error(ctxt, token, "expecting value");
559 return NULL;
560 }
4a5fcab7
AL
561}
562
abe7c206
MA
563JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
564{
565 JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
566
567 token->type = type;
568 memcpy(token->str, tokstr->str, tokstr->len);
569 token->str[tokstr->len] = 0;
570 token->x = x;
571 token->y = y;
572 return token;
573}
574
62815d85 575QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
4a5fcab7 576{
ada74c3b 577 JSONParserContext ctxt = { .buf = tokens, .ap = ap };
4a5fcab7
AL
578 QObject *result;
579
ada74c3b 580 result = parse_value(&ctxt);
5d50113c 581 assert(ctxt.err || g_queue_is_empty(ctxt.buf));
65c0f1e9 582
e8b19d7d 583 error_propagate(errp, ctxt.err);
4a5fcab7 584
e8b19d7d
MAL
585 while (!g_queue_is_empty(ctxt.buf)) {
586 parser_context_pop_token(&ctxt);
587 }
588 g_free(ctxt.current);
ef749d07 589
4a5fcab7
AL
590 return result;
591}