]> git.proxmox.com Git - mirror_qemu.git/blame - qobject/json-parser.c
check-qjson: Document we expect invalid UTF-8 to be rejected
[mirror_qemu.git] / qobject / json-parser.c
CommitLineData
4a5fcab7 1/*
6e8e5cb9 2 * JSON Parser
4a5fcab7
AL
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
f2ad72b3 14#include "qemu/osdep.h"
2bc7cfea 15#include "qemu/cutils.h"
da34e65c 16#include "qapi/error.h"
4a5fcab7 17#include "qemu-common.h"
6b673957 18#include "qapi/qmp/qbool.h"
452fcdbc 19#include "qapi/qmp/qdict.h"
47e6b297 20#include "qapi/qmp/qlist.h"
15280c36
MA
21#include "qapi/qmp/qnull.h"
22#include "qapi/qmp/qnum.h"
6b673957 23#include "qapi/qmp/qstring.h"
7b1b5d19
PB
24#include "qapi/qmp/json-parser.h"
25#include "qapi/qmp/json-lexer.h"
9bada897 26#include "qapi/qmp/json-streamer.h"
4a5fcab7
AL
27
28typedef struct JSONParserContext
29{
ef749d07 30 Error *err;
9bada897 31 JSONToken *current;
95385fe9 32 GQueue *buf;
4a5fcab7
AL
33} JSONParserContext;
34
35#define BUG_ON(cond) assert(!(cond))
36
37/**
38 * TODO
39 *
40 * 0) make errors meaningful again
41 * 1) add geometry information to tokens
42 * 3) should we return a parsed size?
43 * 4) deal with premature EOI
44 */
45
65c0f1e9 46static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
4a5fcab7 47
4a5fcab7
AL
48/**
49 * Error handler
50 */
8b7968f7 51static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
9bada897 52 JSONToken *token, const char *msg, ...)
4a5fcab7 53{
c96c84a9 54 va_list ap;
ef749d07 55 char message[1024];
c96c84a9 56 va_start(ap, msg);
ef749d07 57 vsnprintf(message, sizeof(message), msg, ap);
c96c84a9 58 va_end(ap);
ef749d07
AL
59 if (ctxt->err) {
60 error_free(ctxt->err);
61 ctxt->err = NULL;
62 }
f231b88d 63 error_setg(&ctxt->err, "JSON parse error, %s", message);
4a5fcab7
AL
64}
65
66/**
67 * String helpers
68 *
69 * These helpers are used to unescape strings.
70 */
71static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
72{
73 if (wchar <= 0x007F) {
74 BUG_ON(buffer_length < 2);
75
76 buffer[0] = wchar & 0x7F;
77 buffer[1] = 0;
78 } else if (wchar <= 0x07FF) {
79 BUG_ON(buffer_length < 3);
80
81 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
82 buffer[1] = 0x80 | (wchar & 0x3F);
83 buffer[2] = 0;
84 } else {
85 BUG_ON(buffer_length < 4);
86
87 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
88 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
89 buffer[2] = 0x80 | (wchar & 0x3F);
90 buffer[3] = 0;
91 }
92}
93
94static int hex2decimal(char ch)
95{
96 if (ch >= '0' && ch <= '9') {
97 return (ch - '0');
98 } else if (ch >= 'a' && ch <= 'f') {
99 return 10 + (ch - 'a');
100 } else if (ch >= 'A' && ch <= 'F') {
101 return 10 + (ch - 'A');
102 }
103
104 return -1;
105}
106
107/**
108 * parse_string(): Parse a json string and return a QObject
109 *
110 * string
111 * ""
112 * " chars "
113 * chars
114 * char
115 * char chars
116 * char
117 * any-Unicode-character-
118 * except-"-or-\-or-
119 * control-character
120 * \"
121 * \\
122 * \/
123 * \b
124 * \f
125 * \n
126 * \r
127 * \t
128 * \u four-hex-digits
129 */
9bada897
PB
130static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
131 JSONToken *token)
4a5fcab7 132{
9bada897 133 const char *ptr = token->str;
4a5fcab7 134 QString *str;
00ea57fa 135 char quote;
4a5fcab7 136
00ea57fa
MA
137 assert(*ptr == '"' || *ptr == '\'');
138 quote = *ptr++;
4a5fcab7 139 str = qstring_new();
00ea57fa
MA
140
141 while (*ptr != quote) {
142 assert(*ptr);
4a5fcab7
AL
143 if (*ptr == '\\') {
144 ptr++;
00ea57fa 145 switch (*ptr++) {
4a5fcab7
AL
146 case '"':
147 qstring_append(str, "\"");
4a5fcab7
AL
148 break;
149 case '\'':
150 qstring_append(str, "'");
4a5fcab7
AL
151 break;
152 case '\\':
153 qstring_append(str, "\\");
4a5fcab7
AL
154 break;
155 case '/':
156 qstring_append(str, "/");
4a5fcab7
AL
157 break;
158 case 'b':
159 qstring_append(str, "\b");
4a5fcab7 160 break;
bd032695
LC
161 case 'f':
162 qstring_append(str, "\f");
bd032695 163 break;
4a5fcab7
AL
164 case 'n':
165 qstring_append(str, "\n");
4a5fcab7
AL
166 break;
167 case 'r':
168 qstring_append(str, "\r");
4a5fcab7
AL
169 break;
170 case 't':
171 qstring_append(str, "\t");
4a5fcab7
AL
172 break;
173 case 'u': {
174 uint16_t unicode_char = 0;
175 char utf8_char[4];
176 int i = 0;
177
4a5fcab7
AL
178 for (i = 0; i < 4; i++) {
179 if (qemu_isxdigit(*ptr)) {
180 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
181 } else {
182 parse_error(ctxt, token,
183 "invalid hex escape sequence in string");
184 goto out;
185 }
186 ptr++;
187 }
188
189 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
190 qstring_append(str, utf8_char);
191 } break;
192 default:
193 parse_error(ctxt, token, "invalid escape sequence in string");
194 goto out;
195 }
196 } else {
197 char dummy[2];
198
199 dummy[0] = *ptr++;
200 dummy[1] = 0;
201
202 qstring_append(str, dummy);
203 }
204 }
205
4a5fcab7
AL
206 return str;
207
208out:
cb3e7f08 209 qobject_unref(str);
4a5fcab7
AL
210 return NULL;
211}
212
9bada897
PB
213/* Note: the token object returned by parser_context_peek_token or
214 * parser_context_pop_token is deleted as soon as parser_context_pop_token
215 * is called again.
95385fe9 216 */
9bada897 217static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
65c0f1e9 218{
9bada897 219 g_free(ctxt->current);
95385fe9
PB
220 assert(!g_queue_is_empty(ctxt->buf));
221 ctxt->current = g_queue_pop_head(ctxt->buf);
222 return ctxt->current;
65c0f1e9
MR
223}
224
9bada897 225static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
65c0f1e9 226{
95385fe9
PB
227 assert(!g_queue_is_empty(ctxt->buf));
228 return g_queue_peek_head(ctxt->buf);
65c0f1e9
MR
229}
230
95385fe9 231static JSONParserContext *parser_context_new(GQueue *tokens)
65c0f1e9
MR
232{
233 JSONParserContext *ctxt;
65c0f1e9
MR
234
235 if (!tokens) {
236 return NULL;
237 }
238
65c0f1e9 239 ctxt = g_malloc0(sizeof(JSONParserContext));
95385fe9 240 ctxt->buf = tokens;
65c0f1e9
MR
241
242 return ctxt;
243}
244
245/* to support error propagation, ctxt->err must be freed separately */
246static void parser_context_free(JSONParserContext *ctxt)
247{
65c0f1e9 248 if (ctxt) {
95385fe9
PB
249 while (!g_queue_is_empty(ctxt->buf)) {
250 parser_context_pop_token(ctxt);
65c0f1e9 251 }
9bada897 252 g_free(ctxt->current);
95385fe9 253 g_queue_free(ctxt->buf);
65c0f1e9
MR
254 g_free(ctxt);
255 }
256}
257
4a5fcab7
AL
258/**
259 * Parsing rules
260 */
65c0f1e9 261static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
4a5fcab7 262{
532fb532
HR
263 QObject *value;
264 QString *key = NULL;
9bada897 265 JSONToken *peek, *token;
4a5fcab7 266
65c0f1e9 267 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
268 if (peek == NULL) {
269 parse_error(ctxt, NULL, "premature EOI");
270 goto out;
271 }
272
532fb532
HR
273 key = qobject_to(QString, parse_value(ctxt, ap));
274 if (!key) {
4a5fcab7
AL
275 parse_error(ctxt, peek, "key is not a string in object");
276 goto out;
277 }
278
65c0f1e9 279 token = parser_context_pop_token(ctxt);
11e8a46c
AL
280 if (token == NULL) {
281 parse_error(ctxt, NULL, "premature EOI");
282 goto out;
283 }
284
9bada897 285 if (token->type != JSON_COLON) {
4a5fcab7
AL
286 parse_error(ctxt, token, "missing : in object pair");
287 goto out;
288 }
289
65c0f1e9 290 value = parse_value(ctxt, ap);
4a5fcab7
AL
291 if (value == NULL) {
292 parse_error(ctxt, token, "Missing value in dict");
293 goto out;
294 }
295
532fb532 296 qdict_put_obj(dict, qstring_get_str(key), value);
4a5fcab7 297
cb3e7f08 298 qobject_unref(key);
4a5fcab7
AL
299
300 return 0;
301
302out:
cb3e7f08 303 qobject_unref(key);
4a5fcab7
AL
304
305 return -1;
306}
307
65c0f1e9 308static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
4a5fcab7
AL
309{
310 QDict *dict = NULL;
9bada897 311 JSONToken *token, *peek;
4a5fcab7 312
65c0f1e9 313 token = parser_context_pop_token(ctxt);
9bada897 314 assert(token && token->type == JSON_LCURLY);
4a5fcab7
AL
315
316 dict = qdict_new();
317
65c0f1e9 318 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
319 if (peek == NULL) {
320 parse_error(ctxt, NULL, "premature EOI");
321 goto out;
322 }
323
9bada897 324 if (peek->type != JSON_RCURLY) {
65c0f1e9 325 if (parse_pair(ctxt, dict, ap) == -1) {
4a5fcab7
AL
326 goto out;
327 }
328
65c0f1e9 329 token = parser_context_pop_token(ctxt);
11e8a46c
AL
330 if (token == NULL) {
331 parse_error(ctxt, NULL, "premature EOI");
332 goto out;
333 }
334
9bada897
PB
335 while (token->type != JSON_RCURLY) {
336 if (token->type != JSON_COMMA) {
4a5fcab7
AL
337 parse_error(ctxt, token, "expected separator in dict");
338 goto out;
339 }
4a5fcab7 340
65c0f1e9 341 if (parse_pair(ctxt, dict, ap) == -1) {
4a5fcab7
AL
342 goto out;
343 }
344
65c0f1e9 345 token = parser_context_pop_token(ctxt);
11e8a46c
AL
346 if (token == NULL) {
347 parse_error(ctxt, NULL, "premature EOI");
348 goto out;
349 }
4a5fcab7 350 }
4a5fcab7 351 } else {
a491af47 352 (void)parser_context_pop_token(ctxt);
4a5fcab7
AL
353 }
354
4a5fcab7
AL
355 return QOBJECT(dict);
356
357out:
cb3e7f08 358 qobject_unref(dict);
4a5fcab7
AL
359 return NULL;
360}
361
65c0f1e9 362static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
4a5fcab7
AL
363{
364 QList *list = NULL;
9bada897 365 JSONToken *token, *peek;
4a5fcab7 366
65c0f1e9 367 token = parser_context_pop_token(ctxt);
9bada897 368 assert(token && token->type == JSON_LSQUARE);
4a5fcab7
AL
369
370 list = qlist_new();
371
65c0f1e9 372 peek = parser_context_peek_token(ctxt);
11e8a46c
AL
373 if (peek == NULL) {
374 parse_error(ctxt, NULL, "premature EOI");
375 goto out;
376 }
377
9bada897 378 if (peek->type != JSON_RSQUARE) {
4a5fcab7
AL
379 QObject *obj;
380
65c0f1e9 381 obj = parse_value(ctxt, ap);
4a5fcab7
AL
382 if (obj == NULL) {
383 parse_error(ctxt, token, "expecting value");
384 goto out;
385 }
386
387 qlist_append_obj(list, obj);
388
65c0f1e9 389 token = parser_context_pop_token(ctxt);
11e8a46c
AL
390 if (token == NULL) {
391 parse_error(ctxt, NULL, "premature EOI");
392 goto out;
393 }
394
9bada897
PB
395 while (token->type != JSON_RSQUARE) {
396 if (token->type != JSON_COMMA) {
4a5fcab7
AL
397 parse_error(ctxt, token, "expected separator in list");
398 goto out;
399 }
400
65c0f1e9 401 obj = parse_value(ctxt, ap);
4a5fcab7
AL
402 if (obj == NULL) {
403 parse_error(ctxt, token, "expecting value");
404 goto out;
405 }
406
407 qlist_append_obj(list, obj);
408
65c0f1e9 409 token = parser_context_pop_token(ctxt);
11e8a46c
AL
410 if (token == NULL) {
411 parse_error(ctxt, NULL, "premature EOI");
412 goto out;
413 }
4a5fcab7 414 }
4a5fcab7 415 } else {
a491af47 416 (void)parser_context_pop_token(ctxt);
4a5fcab7
AL
417 }
418
4a5fcab7
AL
419 return QOBJECT(list);
420
421out:
cb3e7f08 422 qobject_unref(list);
4a5fcab7
AL
423 return NULL;
424}
425
65c0f1e9 426static QObject *parse_keyword(JSONParserContext *ctxt)
4a5fcab7 427{
9bada897 428 JSONToken *token;
4a5fcab7 429
65c0f1e9 430 token = parser_context_pop_token(ctxt);
9bada897 431 assert(token && token->type == JSON_KEYWORD);
50e2a467 432
9bada897 433 if (!strcmp(token->str, "true")) {
d538b255 434 return QOBJECT(qbool_from_bool(true));
9bada897 435 } else if (!strcmp(token->str, "false")) {
d538b255 436 return QOBJECT(qbool_from_bool(false));
9bada897 437 } else if (!strcmp(token->str, "null")) {
006ca09f 438 return QOBJECT(qnull());
4a5fcab7 439 }
9bada897 440 parse_error(ctxt, token, "invalid keyword '%s'", token->str);
4a5fcab7
AL
441 return NULL;
442}
443
65c0f1e9 444static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
4a5fcab7 445{
9bada897 446 JSONToken *token;
4a5fcab7
AL
447
448 if (ap == NULL) {
d538b255 449 return NULL;
4a5fcab7
AL
450 }
451
65c0f1e9 452 token = parser_context_pop_token(ctxt);
9bada897 453 assert(token && token->type == JSON_ESCAPE);
6b9606f6 454
9bada897 455 if (!strcmp(token->str, "%p")) {
d538b255 456 return va_arg(*ap, QObject *);
9bada897 457 } else if (!strcmp(token->str, "%i")) {
d538b255 458 return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
9bada897 459 } else if (!strcmp(token->str, "%d")) {
01b2ffce 460 return QOBJECT(qnum_from_int(va_arg(*ap, int)));
9bada897 461 } else if (!strcmp(token->str, "%ld")) {
01b2ffce 462 return QOBJECT(qnum_from_int(va_arg(*ap, long)));
9bada897
PB
463 } else if (!strcmp(token->str, "%lld") ||
464 !strcmp(token->str, "%I64d")) {
01b2ffce 465 return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
2bc7cfea
MAL
466 } else if (!strcmp(token->str, "%u")) {
467 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
468 } else if (!strcmp(token->str, "%lu")) {
469 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
470 } else if (!strcmp(token->str, "%llu") ||
471 !strcmp(token->str, "%I64u")) {
472 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
9bada897 473 } else if (!strcmp(token->str, "%s")) {
d538b255 474 return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
9bada897 475 } else if (!strcmp(token->str, "%f")) {
01b2ffce 476 return QOBJECT(qnum_from_double(va_arg(*ap, double)));
4a5fcab7 477 }
4a5fcab7
AL
478 return NULL;
479}
480
65c0f1e9 481static QObject *parse_literal(JSONParserContext *ctxt)
4a5fcab7 482{
9bada897 483 JSONToken *token;
4a5fcab7 484
65c0f1e9 485 token = parser_context_pop_token(ctxt);
d538b255 486 assert(token);
11e8a46c 487
9bada897 488 switch (token->type) {
4a5fcab7 489 case JSON_STRING:
d538b255 490 return QOBJECT(qstring_from_escaped_str(ctxt, token));
3d5b3ec6 491 case JSON_INTEGER: {
01b2ffce
MAL
492 /*
493 * Represent JSON_INTEGER as QNUM_I64 if possible, else as
2bc7cfea
MAL
494 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
495 * and qemu_strtou64() fail with ERANGE when it's not
496 * possible.
3d5b3ec6 497 *
01b2ffce 498 * qnum_get_int() will then work for any signed 64-bit
2bc7cfea
MAL
499 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
500 * integer, and qnum_get_double() both for any JSON_INTEGER
501 * and any JSON_FLOAT (with precision loss for integers beyond
502 * 53 bits)
3d5b3ec6 503 */
2bc7cfea 504 int ret;
3d5b3ec6 505 int64_t value;
2bc7cfea 506 uint64_t uvalue;
3d5b3ec6 507
2bc7cfea
MAL
508 ret = qemu_strtoi64(token->str, NULL, 10, &value);
509 if (!ret) {
01b2ffce 510 return QOBJECT(qnum_from_int(value));
3d5b3ec6 511 }
2bc7cfea
MAL
512 assert(ret == -ERANGE);
513
514 if (token->str[0] != '-') {
515 ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
516 if (!ret) {
517 return QOBJECT(qnum_from_uint(uvalue));
518 }
519 assert(ret == -ERANGE);
520 }
3d5b3ec6
MR
521 /* fall through to JSON_FLOAT */
522 }
4a5fcab7 523 case JSON_FLOAT:
6e8e5cb9
EB
524 /* FIXME dependent on locale; a pervasive issue in QEMU */
525 /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
526 * but those might be useful extensions beyond JSON */
01b2ffce 527 return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
4a5fcab7 528 default:
d538b255 529 abort();
4a5fcab7 530 }
4a5fcab7
AL
531}
532
65c0f1e9 533static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
4a5fcab7 534{
9bada897 535 JSONToken *token;
4a5fcab7 536
d538b255
MA
537 token = parser_context_peek_token(ctxt);
538 if (token == NULL) {
539 parse_error(ctxt, NULL, "premature EOI");
540 return NULL;
4a5fcab7
AL
541 }
542
9bada897 543 switch (token->type) {
d538b255
MA
544 case JSON_LCURLY:
545 return parse_object(ctxt, ap);
546 case JSON_LSQUARE:
547 return parse_array(ctxt, ap);
548 case JSON_ESCAPE:
549 return parse_escape(ctxt, ap);
550 case JSON_INTEGER:
551 case JSON_FLOAT:
552 case JSON_STRING:
553 return parse_literal(ctxt);
554 case JSON_KEYWORD:
555 return parse_keyword(ctxt);
556 default:
557 parse_error(ctxt, token, "expecting value");
558 return NULL;
559 }
4a5fcab7
AL
560}
561
95385fe9 562QObject *json_parser_parse(GQueue *tokens, va_list *ap)
ef749d07
AL
563{
564 return json_parser_parse_err(tokens, ap, NULL);
565}
566
95385fe9 567QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
4a5fcab7 568{
65c0f1e9 569 JSONParserContext *ctxt = parser_context_new(tokens);
4a5fcab7
AL
570 QObject *result;
571
65c0f1e9 572 if (!ctxt) {
c1990ebf
MR
573 return NULL;
574 }
4a5fcab7 575
65c0f1e9
MR
576 result = parse_value(ctxt, ap);
577
578 error_propagate(errp, ctxt->err);
4a5fcab7 579
65c0f1e9 580 parser_context_free(ctxt);
ef749d07 581
4a5fcab7
AL
582 return result;
583}