]> git.proxmox.com Git - mirror_qemu.git/blob - qobject/json-parser.c
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-misc-20181214' into staging
[mirror_qemu.git] / qobject / json-parser.c
1 /*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "json-parser-int.h"
26
27 struct JSONToken {
28 JSONTokenType type;
29 int x;
30 int y;
31 char str[];
32 };
33
34 typedef struct JSONParserContext
35 {
36 Error *err;
37 JSONToken *current;
38 GQueue *buf;
39 va_list *ap;
40 } JSONParserContext;
41
42 #define BUG_ON(cond) assert(!(cond))
43
44 /**
45 * TODO
46 *
47 * 0) make errors meaningful again
48 * 1) add geometry information to tokens
49 * 3) should we return a parsed size?
50 * 4) deal with premature EOI
51 */
52
53 static QObject *parse_value(JSONParserContext *ctxt);
54
55 /**
56 * Error handler
57 */
58 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
59 JSONToken *token, const char *msg, ...)
60 {
61 va_list ap;
62 char message[1024];
63
64 if (ctxt->err) {
65 return;
66 }
67 va_start(ap, msg);
68 vsnprintf(message, sizeof(message), msg, ap);
69 va_end(ap);
70 error_setg(&ctxt->err, "JSON parse error, %s", message);
71 }
72
73 static int cvt4hex(const char *s)
74 {
75 int cp, i;
76
77 cp = 0;
78 for (i = 0; i < 4; i++) {
79 if (!qemu_isxdigit(s[i])) {
80 return -1;
81 }
82 cp <<= 4;
83 if (s[i] >= '0' && s[i] <= '9') {
84 cp |= s[i] - '0';
85 } else if (s[i] >= 'a' && s[i] <= 'f') {
86 cp |= 10 + s[i] - 'a';
87 } else if (s[i] >= 'A' && s[i] <= 'F') {
88 cp |= 10 + s[i] - 'A';
89 } else {
90 return -1;
91 }
92 }
93 return cp;
94 }
95
96 /**
97 * parse_string(): Parse a JSON string
98 *
99 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
100 * Interchange Format":
101 *
102 * char = unescaped /
103 * escape (
104 * %x22 / ; " quotation mark U+0022
105 * %x5C / ; \ reverse solidus U+005C
106 * %x2F / ; / solidus U+002F
107 * %x62 / ; b backspace U+0008
108 * %x66 / ; f form feed U+000C
109 * %x6E / ; n line feed U+000A
110 * %x72 / ; r carriage return U+000D
111 * %x74 / ; t tab U+0009
112 * %x75 4HEXDIG ) ; uXXXX U+XXXX
113 * escape = %x5C ; \
114 * quotation-mark = %x22 ; "
115 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
116 *
117 * Extensions over RFC 8259:
118 * - Extra escape sequence in strings:
119 * 0x27 (apostrophe) is recognized after escape, too
120 * - Single-quoted strings:
121 * Like double-quoted strings, except they're delimited by %x27
122 * (apostrophe) instead of %x22 (quotation mark), and can't contain
123 * unescaped apostrophe, but can contain unescaped quotation mark.
124 *
125 * Note:
126 * - Encoding is modified UTF-8.
127 * - Invalid Unicode characters are rejected.
128 * - Control characters \x00..\x1F are rejected by the lexer.
129 */
130 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
131 {
132 const char *ptr = token->str;
133 QString *str;
134 char quote;
135 const char *beg;
136 int cp, trailing;
137 char *end;
138 ssize_t len;
139 char utf8_buf[5];
140
141 assert(*ptr == '"' || *ptr == '\'');
142 quote = *ptr++;
143 str = qstring_new();
144
145 while (*ptr != quote) {
146 assert(*ptr);
147 switch (*ptr) {
148 case '\\':
149 beg = ptr++;
150 switch (*ptr++) {
151 case '"':
152 qstring_append_chr(str, '"');
153 break;
154 case '\'':
155 qstring_append_chr(str, '\'');
156 break;
157 case '\\':
158 qstring_append_chr(str, '\\');
159 break;
160 case '/':
161 qstring_append_chr(str, '/');
162 break;
163 case 'b':
164 qstring_append_chr(str, '\b');
165 break;
166 case 'f':
167 qstring_append_chr(str, '\f');
168 break;
169 case 'n':
170 qstring_append_chr(str, '\n');
171 break;
172 case 'r':
173 qstring_append_chr(str, '\r');
174 break;
175 case 't':
176 qstring_append_chr(str, '\t');
177 break;
178 case 'u':
179 cp = cvt4hex(ptr);
180 ptr += 4;
181
182 /* handle surrogate pairs */
183 if (cp >= 0xD800 && cp <= 0xDBFF
184 && ptr[0] == '\\' && ptr[1] == 'u') {
185 /* leading surrogate followed by \u */
186 cp = 0x10000 + ((cp & 0x3FF) << 10);
187 trailing = cvt4hex(ptr + 2);
188 if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
189 /* followed by trailing surrogate */
190 cp |= trailing & 0x3FF;
191 ptr += 6;
192 } else {
193 cp = -1; /* invalid */
194 }
195 }
196
197 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
198 parse_error(ctxt, token,
199 "%.*s is not a valid Unicode character",
200 (int)(ptr - beg), beg);
201 goto out;
202 }
203 qstring_append(str, utf8_buf);
204 break;
205 default:
206 parse_error(ctxt, token, "invalid escape sequence in string");
207 goto out;
208 }
209 break;
210 case '%':
211 if (ctxt->ap && ptr[1] != '%') {
212 parse_error(ctxt, token, "can't interpolate into string");
213 goto out;
214 }
215 ptr++;
216 /* fall through */
217 default:
218 cp = mod_utf8_codepoint(ptr, 6, &end);
219 if (cp < 0) {
220 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
221 goto out;
222 }
223 ptr = end;
224 len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
225 assert(len >= 0);
226 qstring_append(str, utf8_buf);
227 }
228 }
229
230 return str;
231
232 out:
233 qobject_unref(str);
234 return NULL;
235 }
236
237 /* Note: the token object returned by parser_context_peek_token or
238 * parser_context_pop_token is deleted as soon as parser_context_pop_token
239 * is called again.
240 */
241 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
242 {
243 g_free(ctxt->current);
244 ctxt->current = g_queue_pop_head(ctxt->buf);
245 return ctxt->current;
246 }
247
248 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
249 {
250 return g_queue_peek_head(ctxt->buf);
251 }
252
253 /**
254 * Parsing rules
255 */
256 static int parse_pair(JSONParserContext *ctxt, QDict *dict)
257 {
258 QObject *value;
259 QString *key = NULL;
260 JSONToken *peek, *token;
261
262 peek = parser_context_peek_token(ctxt);
263 if (peek == NULL) {
264 parse_error(ctxt, NULL, "premature EOI");
265 goto out;
266 }
267
268 key = qobject_to(QString, parse_value(ctxt));
269 if (!key) {
270 parse_error(ctxt, peek, "key is not a string in object");
271 goto out;
272 }
273
274 token = parser_context_pop_token(ctxt);
275 if (token == NULL) {
276 parse_error(ctxt, NULL, "premature EOI");
277 goto out;
278 }
279
280 if (token->type != JSON_COLON) {
281 parse_error(ctxt, token, "missing : in object pair");
282 goto out;
283 }
284
285 value = parse_value(ctxt);
286 if (value == NULL) {
287 parse_error(ctxt, token, "Missing value in dict");
288 goto out;
289 }
290
291 if (qdict_haskey(dict, qstring_get_str(key))) {
292 parse_error(ctxt, token, "duplicate key");
293 goto out;
294 }
295
296 qdict_put_obj(dict, qstring_get_str(key), value);
297
298 qobject_unref(key);
299
300 return 0;
301
302 out:
303 qobject_unref(key);
304
305 return -1;
306 }
307
308 static QObject *parse_object(JSONParserContext *ctxt)
309 {
310 QDict *dict = NULL;
311 JSONToken *token, *peek;
312
313 token = parser_context_pop_token(ctxt);
314 assert(token && token->type == JSON_LCURLY);
315
316 dict = qdict_new();
317
318 peek = parser_context_peek_token(ctxt);
319 if (peek == NULL) {
320 parse_error(ctxt, NULL, "premature EOI");
321 goto out;
322 }
323
324 if (peek->type != JSON_RCURLY) {
325 if (parse_pair(ctxt, dict) == -1) {
326 goto out;
327 }
328
329 token = parser_context_pop_token(ctxt);
330 if (token == NULL) {
331 parse_error(ctxt, NULL, "premature EOI");
332 goto out;
333 }
334
335 while (token->type != JSON_RCURLY) {
336 if (token->type != JSON_COMMA) {
337 parse_error(ctxt, token, "expected separator in dict");
338 goto out;
339 }
340
341 if (parse_pair(ctxt, dict) == -1) {
342 goto out;
343 }
344
345 token = parser_context_pop_token(ctxt);
346 if (token == NULL) {
347 parse_error(ctxt, NULL, "premature EOI");
348 goto out;
349 }
350 }
351 } else {
352 (void)parser_context_pop_token(ctxt);
353 }
354
355 return QOBJECT(dict);
356
357 out:
358 qobject_unref(dict);
359 return NULL;
360 }
361
362 static QObject *parse_array(JSONParserContext *ctxt)
363 {
364 QList *list = NULL;
365 JSONToken *token, *peek;
366
367 token = parser_context_pop_token(ctxt);
368 assert(token && token->type == JSON_LSQUARE);
369
370 list = qlist_new();
371
372 peek = parser_context_peek_token(ctxt);
373 if (peek == NULL) {
374 parse_error(ctxt, NULL, "premature EOI");
375 goto out;
376 }
377
378 if (peek->type != JSON_RSQUARE) {
379 QObject *obj;
380
381 obj = parse_value(ctxt);
382 if (obj == NULL) {
383 parse_error(ctxt, token, "expecting value");
384 goto out;
385 }
386
387 qlist_append_obj(list, obj);
388
389 token = parser_context_pop_token(ctxt);
390 if (token == NULL) {
391 parse_error(ctxt, NULL, "premature EOI");
392 goto out;
393 }
394
395 while (token->type != JSON_RSQUARE) {
396 if (token->type != JSON_COMMA) {
397 parse_error(ctxt, token, "expected separator in list");
398 goto out;
399 }
400
401 obj = parse_value(ctxt);
402 if (obj == NULL) {
403 parse_error(ctxt, token, "expecting value");
404 goto out;
405 }
406
407 qlist_append_obj(list, obj);
408
409 token = parser_context_pop_token(ctxt);
410 if (token == NULL) {
411 parse_error(ctxt, NULL, "premature EOI");
412 goto out;
413 }
414 }
415 } else {
416 (void)parser_context_pop_token(ctxt);
417 }
418
419 return QOBJECT(list);
420
421 out:
422 qobject_unref(list);
423 return NULL;
424 }
425
426 static QObject *parse_keyword(JSONParserContext *ctxt)
427 {
428 JSONToken *token;
429
430 token = parser_context_pop_token(ctxt);
431 assert(token && token->type == JSON_KEYWORD);
432
433 if (!strcmp(token->str, "true")) {
434 return QOBJECT(qbool_from_bool(true));
435 } else if (!strcmp(token->str, "false")) {
436 return QOBJECT(qbool_from_bool(false));
437 } else if (!strcmp(token->str, "null")) {
438 return QOBJECT(qnull());
439 }
440 parse_error(ctxt, token, "invalid keyword '%s'", token->str);
441 return NULL;
442 }
443
444 static QObject *parse_interpolation(JSONParserContext *ctxt)
445 {
446 JSONToken *token;
447
448 token = parser_context_pop_token(ctxt);
449 assert(token && token->type == JSON_INTERP);
450
451 if (!strcmp(token->str, "%p")) {
452 return va_arg(*ctxt->ap, QObject *);
453 } else if (!strcmp(token->str, "%i")) {
454 return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
455 } else if (!strcmp(token->str, "%d")) {
456 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
457 } else if (!strcmp(token->str, "%ld")) {
458 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
459 } else if (!strcmp(token->str, "%lld")) {
460 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
461 } else if (!strcmp(token->str, "%" PRId64)) {
462 return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
463 } else if (!strcmp(token->str, "%u")) {
464 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
465 } else if (!strcmp(token->str, "%lu")) {
466 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
467 } else if (!strcmp(token->str, "%llu")) {
468 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
469 } else if (!strcmp(token->str, "%" PRIu64)) {
470 return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
471 } else if (!strcmp(token->str, "%s")) {
472 return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
473 } else if (!strcmp(token->str, "%f")) {
474 return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
475 }
476 parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
477 return NULL;
478 }
479
480 static QObject *parse_literal(JSONParserContext *ctxt)
481 {
482 JSONToken *token;
483
484 token = parser_context_pop_token(ctxt);
485 assert(token);
486
487 switch (token->type) {
488 case JSON_STRING:
489 return QOBJECT(parse_string(ctxt, token));
490 case JSON_INTEGER: {
491 /*
492 * Represent JSON_INTEGER as QNUM_I64 if possible, else as
493 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
494 * and qemu_strtou64() fail with ERANGE when it's not
495 * possible.
496 *
497 * qnum_get_int() will then work for any signed 64-bit
498 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
499 * integer, and qnum_get_double() both for any JSON_INTEGER
500 * and any JSON_FLOAT (with precision loss for integers beyond
501 * 53 bits)
502 */
503 int ret;
504 int64_t value;
505 uint64_t uvalue;
506
507 ret = qemu_strtoi64(token->str, NULL, 10, &value);
508 if (!ret) {
509 return QOBJECT(qnum_from_int(value));
510 }
511 assert(ret == -ERANGE);
512
513 if (token->str[0] != '-') {
514 ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
515 if (!ret) {
516 return QOBJECT(qnum_from_uint(uvalue));
517 }
518 assert(ret == -ERANGE);
519 }
520 /* fall through to JSON_FLOAT */
521 }
522 case JSON_FLOAT:
523 /* FIXME dependent on locale; a pervasive issue in QEMU */
524 /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
525 * but those might be useful extensions beyond JSON */
526 return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
527 default:
528 abort();
529 }
530 }
531
532 static QObject *parse_value(JSONParserContext *ctxt)
533 {
534 JSONToken *token;
535
536 token = parser_context_peek_token(ctxt);
537 if (token == NULL) {
538 parse_error(ctxt, NULL, "premature EOI");
539 return NULL;
540 }
541
542 switch (token->type) {
543 case JSON_LCURLY:
544 return parse_object(ctxt);
545 case JSON_LSQUARE:
546 return parse_array(ctxt);
547 case JSON_INTERP:
548 return parse_interpolation(ctxt);
549 case JSON_INTEGER:
550 case JSON_FLOAT:
551 case JSON_STRING:
552 return parse_literal(ctxt);
553 case JSON_KEYWORD:
554 return parse_keyword(ctxt);
555 default:
556 parse_error(ctxt, token, "expecting value");
557 return NULL;
558 }
559 }
560
561 JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
562 {
563 JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
564
565 token->type = type;
566 memcpy(token->str, tokstr->str, tokstr->len);
567 token->str[tokstr->len] = 0;
568 token->x = x;
569 token->y = y;
570 return token;
571 }
572
573 QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
574 {
575 JSONParserContext ctxt = { .buf = tokens, .ap = ap };
576 QObject *result;
577
578 result = parse_value(&ctxt);
579 assert(ctxt.err || g_queue_is_empty(ctxt.buf));
580
581 error_propagate(errp, ctxt.err);
582
583 while (!g_queue_is_empty(ctxt.buf)) {
584 parser_context_pop_token(&ctxt);
585 }
586 g_free(ctxt.current);
587
588 return result;
589 }