]> git.proxmox.com Git - mirror_qemu.git/blob - qobject/json-parser.c
json: Reject invalid UTF-8 sequences
[mirror_qemu.git] / qobject / json-parser.c
1 /*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "qapi/qmp/json-parser.h"
26 #include "qapi/qmp/json-lexer.h"
27 #include "qapi/qmp/json-streamer.h"
28
29 typedef struct JSONParserContext
30 {
31 Error *err;
32 JSONToken *current;
33 GQueue *buf;
34 } JSONParserContext;
35
36 #define BUG_ON(cond) assert(!(cond))
37
38 /**
39 * TODO
40 *
41 * 0) make errors meaningful again
42 * 1) add geometry information to tokens
43 * 3) should we return a parsed size?
44 * 4) deal with premature EOI
45 */
46
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48
49 /**
50 * Error handler
51 */
52 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
53 JSONToken *token, const char *msg, ...)
54 {
55 va_list ap;
56 char message[1024];
57 va_start(ap, msg);
58 vsnprintf(message, sizeof(message), msg, ap);
59 va_end(ap);
60 if (ctxt->err) {
61 error_free(ctxt->err);
62 ctxt->err = NULL;
63 }
64 error_setg(&ctxt->err, "JSON parse error, %s", message);
65 }
66
67 /**
68 * String helpers
69 *
70 * These helpers are used to unescape strings.
71 */
72 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
73 {
74 if (wchar <= 0x007F) {
75 BUG_ON(buffer_length < 2);
76
77 buffer[0] = wchar & 0x7F;
78 buffer[1] = 0;
79 } else if (wchar <= 0x07FF) {
80 BUG_ON(buffer_length < 3);
81
82 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
83 buffer[1] = 0x80 | (wchar & 0x3F);
84 buffer[2] = 0;
85 } else {
86 BUG_ON(buffer_length < 4);
87
88 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
89 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
90 buffer[2] = 0x80 | (wchar & 0x3F);
91 buffer[3] = 0;
92 }
93 }
94
95 static int hex2decimal(char ch)
96 {
97 if (ch >= '0' && ch <= '9') {
98 return (ch - '0');
99 } else if (ch >= 'a' && ch <= 'f') {
100 return 10 + (ch - 'a');
101 } else if (ch >= 'A' && ch <= 'F') {
102 return 10 + (ch - 'A');
103 }
104
105 return -1;
106 }
107
108 /**
109 * parse_string(): Parse a json string and return a QObject
110 *
111 * string
112 * ""
113 * " chars "
114 * chars
115 * char
116 * char chars
117 * char
118 * any-Unicode-character-
119 * except-"-or-\-or-
120 * control-character
121 * \"
122 * \\
123 * \/
124 * \b
125 * \f
126 * \n
127 * \r
128 * \t
129 * \u four-hex-digits
130 */
131 static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
132 JSONToken *token)
133 {
134 const char *ptr = token->str;
135 QString *str;
136 char quote;
137 int cp;
138 char *end;
139 ssize_t len;
140 char utf8_buf[5];
141
142 assert(*ptr == '"' || *ptr == '\'');
143 quote = *ptr++;
144 str = qstring_new();
145
146 while (*ptr != quote) {
147 assert(*ptr);
148 if (*ptr == '\\') {
149 ptr++;
150 switch (*ptr++) {
151 case '"':
152 qstring_append(str, "\"");
153 break;
154 case '\'':
155 qstring_append(str, "'");
156 break;
157 case '\\':
158 qstring_append(str, "\\");
159 break;
160 case '/':
161 qstring_append(str, "/");
162 break;
163 case 'b':
164 qstring_append(str, "\b");
165 break;
166 case 'f':
167 qstring_append(str, "\f");
168 break;
169 case 'n':
170 qstring_append(str, "\n");
171 break;
172 case 'r':
173 qstring_append(str, "\r");
174 break;
175 case 't':
176 qstring_append(str, "\t");
177 break;
178 case 'u': {
179 uint16_t unicode_char = 0;
180 char utf8_char[4];
181 int i = 0;
182
183 for (i = 0; i < 4; i++) {
184 if (qemu_isxdigit(*ptr)) {
185 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
186 } else {
187 parse_error(ctxt, token,
188 "invalid hex escape sequence in string");
189 goto out;
190 }
191 ptr++;
192 }
193
194 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
195 qstring_append(str, utf8_char);
196 } break;
197 default:
198 parse_error(ctxt, token, "invalid escape sequence in string");
199 goto out;
200 }
201 } else {
202 cp = mod_utf8_codepoint(ptr, 6, &end);
203 if (cp <= 0) {
204 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
205 goto out;
206 }
207 ptr = end;
208 len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
209 assert(len >= 0);
210 qstring_append(str, utf8_buf);
211 }
212 }
213
214 return str;
215
216 out:
217 qobject_unref(str);
218 return NULL;
219 }
220
221 /* Note: the token object returned by parser_context_peek_token or
222 * parser_context_pop_token is deleted as soon as parser_context_pop_token
223 * is called again.
224 */
225 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
226 {
227 g_free(ctxt->current);
228 assert(!g_queue_is_empty(ctxt->buf));
229 ctxt->current = g_queue_pop_head(ctxt->buf);
230 return ctxt->current;
231 }
232
233 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
234 {
235 assert(!g_queue_is_empty(ctxt->buf));
236 return g_queue_peek_head(ctxt->buf);
237 }
238
239 static JSONParserContext *parser_context_new(GQueue *tokens)
240 {
241 JSONParserContext *ctxt;
242
243 if (!tokens) {
244 return NULL;
245 }
246
247 ctxt = g_malloc0(sizeof(JSONParserContext));
248 ctxt->buf = tokens;
249
250 return ctxt;
251 }
252
253 /* to support error propagation, ctxt->err must be freed separately */
254 static void parser_context_free(JSONParserContext *ctxt)
255 {
256 if (ctxt) {
257 while (!g_queue_is_empty(ctxt->buf)) {
258 parser_context_pop_token(ctxt);
259 }
260 g_free(ctxt->current);
261 g_queue_free(ctxt->buf);
262 g_free(ctxt);
263 }
264 }
265
266 /**
267 * Parsing rules
268 */
269 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
270 {
271 QObject *value;
272 QString *key = NULL;
273 JSONToken *peek, *token;
274
275 peek = parser_context_peek_token(ctxt);
276 if (peek == NULL) {
277 parse_error(ctxt, NULL, "premature EOI");
278 goto out;
279 }
280
281 key = qobject_to(QString, parse_value(ctxt, ap));
282 if (!key) {
283 parse_error(ctxt, peek, "key is not a string in object");
284 goto out;
285 }
286
287 token = parser_context_pop_token(ctxt);
288 if (token == NULL) {
289 parse_error(ctxt, NULL, "premature EOI");
290 goto out;
291 }
292
293 if (token->type != JSON_COLON) {
294 parse_error(ctxt, token, "missing : in object pair");
295 goto out;
296 }
297
298 value = parse_value(ctxt, ap);
299 if (value == NULL) {
300 parse_error(ctxt, token, "Missing value in dict");
301 goto out;
302 }
303
304 qdict_put_obj(dict, qstring_get_str(key), value);
305
306 qobject_unref(key);
307
308 return 0;
309
310 out:
311 qobject_unref(key);
312
313 return -1;
314 }
315
316 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
317 {
318 QDict *dict = NULL;
319 JSONToken *token, *peek;
320
321 token = parser_context_pop_token(ctxt);
322 assert(token && token->type == JSON_LCURLY);
323
324 dict = qdict_new();
325
326 peek = parser_context_peek_token(ctxt);
327 if (peek == NULL) {
328 parse_error(ctxt, NULL, "premature EOI");
329 goto out;
330 }
331
332 if (peek->type != JSON_RCURLY) {
333 if (parse_pair(ctxt, dict, ap) == -1) {
334 goto out;
335 }
336
337 token = parser_context_pop_token(ctxt);
338 if (token == NULL) {
339 parse_error(ctxt, NULL, "premature EOI");
340 goto out;
341 }
342
343 while (token->type != JSON_RCURLY) {
344 if (token->type != JSON_COMMA) {
345 parse_error(ctxt, token, "expected separator in dict");
346 goto out;
347 }
348
349 if (parse_pair(ctxt, dict, ap) == -1) {
350 goto out;
351 }
352
353 token = parser_context_pop_token(ctxt);
354 if (token == NULL) {
355 parse_error(ctxt, NULL, "premature EOI");
356 goto out;
357 }
358 }
359 } else {
360 (void)parser_context_pop_token(ctxt);
361 }
362
363 return QOBJECT(dict);
364
365 out:
366 qobject_unref(dict);
367 return NULL;
368 }
369
370 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
371 {
372 QList *list = NULL;
373 JSONToken *token, *peek;
374
375 token = parser_context_pop_token(ctxt);
376 assert(token && token->type == JSON_LSQUARE);
377
378 list = qlist_new();
379
380 peek = parser_context_peek_token(ctxt);
381 if (peek == NULL) {
382 parse_error(ctxt, NULL, "premature EOI");
383 goto out;
384 }
385
386 if (peek->type != JSON_RSQUARE) {
387 QObject *obj;
388
389 obj = parse_value(ctxt, ap);
390 if (obj == NULL) {
391 parse_error(ctxt, token, "expecting value");
392 goto out;
393 }
394
395 qlist_append_obj(list, obj);
396
397 token = parser_context_pop_token(ctxt);
398 if (token == NULL) {
399 parse_error(ctxt, NULL, "premature EOI");
400 goto out;
401 }
402
403 while (token->type != JSON_RSQUARE) {
404 if (token->type != JSON_COMMA) {
405 parse_error(ctxt, token, "expected separator in list");
406 goto out;
407 }
408
409 obj = parse_value(ctxt, ap);
410 if (obj == NULL) {
411 parse_error(ctxt, token, "expecting value");
412 goto out;
413 }
414
415 qlist_append_obj(list, obj);
416
417 token = parser_context_pop_token(ctxt);
418 if (token == NULL) {
419 parse_error(ctxt, NULL, "premature EOI");
420 goto out;
421 }
422 }
423 } else {
424 (void)parser_context_pop_token(ctxt);
425 }
426
427 return QOBJECT(list);
428
429 out:
430 qobject_unref(list);
431 return NULL;
432 }
433
434 static QObject *parse_keyword(JSONParserContext *ctxt)
435 {
436 JSONToken *token;
437
438 token = parser_context_pop_token(ctxt);
439 assert(token && token->type == JSON_KEYWORD);
440
441 if (!strcmp(token->str, "true")) {
442 return QOBJECT(qbool_from_bool(true));
443 } else if (!strcmp(token->str, "false")) {
444 return QOBJECT(qbool_from_bool(false));
445 } else if (!strcmp(token->str, "null")) {
446 return QOBJECT(qnull());
447 }
448 parse_error(ctxt, token, "invalid keyword '%s'", token->str);
449 return NULL;
450 }
451
452 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
453 {
454 JSONToken *token;
455
456 if (ap == NULL) {
457 return NULL;
458 }
459
460 token = parser_context_pop_token(ctxt);
461 assert(token && token->type == JSON_ESCAPE);
462
463 if (!strcmp(token->str, "%p")) {
464 return va_arg(*ap, QObject *);
465 } else if (!strcmp(token->str, "%i")) {
466 return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
467 } else if (!strcmp(token->str, "%d")) {
468 return QOBJECT(qnum_from_int(va_arg(*ap, int)));
469 } else if (!strcmp(token->str, "%ld")) {
470 return QOBJECT(qnum_from_int(va_arg(*ap, long)));
471 } else if (!strcmp(token->str, "%lld") ||
472 !strcmp(token->str, "%I64d")) {
473 return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
474 } else if (!strcmp(token->str, "%u")) {
475 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
476 } else if (!strcmp(token->str, "%lu")) {
477 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
478 } else if (!strcmp(token->str, "%llu") ||
479 !strcmp(token->str, "%I64u")) {
480 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
481 } else if (!strcmp(token->str, "%s")) {
482 return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
483 } else if (!strcmp(token->str, "%f")) {
484 return QOBJECT(qnum_from_double(va_arg(*ap, double)));
485 }
486 return NULL;
487 }
488
489 static QObject *parse_literal(JSONParserContext *ctxt)
490 {
491 JSONToken *token;
492
493 token = parser_context_pop_token(ctxt);
494 assert(token);
495
496 switch (token->type) {
497 case JSON_STRING:
498 return QOBJECT(qstring_from_escaped_str(ctxt, token));
499 case JSON_INTEGER: {
500 /*
501 * Represent JSON_INTEGER as QNUM_I64 if possible, else as
502 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
503 * and qemu_strtou64() fail with ERANGE when it's not
504 * possible.
505 *
506 * qnum_get_int() will then work for any signed 64-bit
507 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
508 * integer, and qnum_get_double() both for any JSON_INTEGER
509 * and any JSON_FLOAT (with precision loss for integers beyond
510 * 53 bits)
511 */
512 int ret;
513 int64_t value;
514 uint64_t uvalue;
515
516 ret = qemu_strtoi64(token->str, NULL, 10, &value);
517 if (!ret) {
518 return QOBJECT(qnum_from_int(value));
519 }
520 assert(ret == -ERANGE);
521
522 if (token->str[0] != '-') {
523 ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
524 if (!ret) {
525 return QOBJECT(qnum_from_uint(uvalue));
526 }
527 assert(ret == -ERANGE);
528 }
529 /* fall through to JSON_FLOAT */
530 }
531 case JSON_FLOAT:
532 /* FIXME dependent on locale; a pervasive issue in QEMU */
533 /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
534 * but those might be useful extensions beyond JSON */
535 return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
536 default:
537 abort();
538 }
539 }
540
541 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
542 {
543 JSONToken *token;
544
545 token = parser_context_peek_token(ctxt);
546 if (token == NULL) {
547 parse_error(ctxt, NULL, "premature EOI");
548 return NULL;
549 }
550
551 switch (token->type) {
552 case JSON_LCURLY:
553 return parse_object(ctxt, ap);
554 case JSON_LSQUARE:
555 return parse_array(ctxt, ap);
556 case JSON_ESCAPE:
557 return parse_escape(ctxt, ap);
558 case JSON_INTEGER:
559 case JSON_FLOAT:
560 case JSON_STRING:
561 return parse_literal(ctxt);
562 case JSON_KEYWORD:
563 return parse_keyword(ctxt);
564 default:
565 parse_error(ctxt, token, "expecting value");
566 return NULL;
567 }
568 }
569
570 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
571 {
572 return json_parser_parse_err(tokens, ap, NULL);
573 }
574
575 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
576 {
577 JSONParserContext *ctxt = parser_context_new(tokens);
578 QObject *result;
579
580 if (!ctxt) {
581 return NULL;
582 }
583
584 result = parse_value(ctxt, ap);
585
586 error_propagate(errp, ctxt->err);
587
588 parser_context_free(ctxt);
589
590 return result;
591 }