]> git.proxmox.com Git - mirror_qemu.git/blob - qobject/json-parser.c
json: Simplify parse_string()
[mirror_qemu.git] / qobject / json-parser.c
1 /*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "qapi/qmp/json-parser.h"
26 #include "qapi/qmp/json-lexer.h"
27 #include "qapi/qmp/json-streamer.h"
28
29 typedef struct JSONParserContext
30 {
31 Error *err;
32 JSONToken *current;
33 GQueue *buf;
34 } JSONParserContext;
35
36 #define BUG_ON(cond) assert(!(cond))
37
38 /**
39 * TODO
40 *
41 * 0) make errors meaningful again
42 * 1) add geometry information to tokens
43 * 3) should we return a parsed size?
44 * 4) deal with premature EOI
45 */
46
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48
49 /**
50 * Error handler
51 */
52 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
53 JSONToken *token, const char *msg, ...)
54 {
55 va_list ap;
56 char message[1024];
57
58 if (ctxt->err) {
59 return;
60 }
61 va_start(ap, msg);
62 vsnprintf(message, sizeof(message), msg, ap);
63 va_end(ap);
64 error_setg(&ctxt->err, "JSON parse error, %s", message);
65 }
66
67 /**
68 * String helpers
69 *
70 * These helpers are used to unescape strings.
71 */
72 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
73 {
74 if (wchar <= 0x007F) {
75 BUG_ON(buffer_length < 2);
76
77 buffer[0] = wchar & 0x7F;
78 buffer[1] = 0;
79 } else if (wchar <= 0x07FF) {
80 BUG_ON(buffer_length < 3);
81
82 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
83 buffer[1] = 0x80 | (wchar & 0x3F);
84 buffer[2] = 0;
85 } else {
86 BUG_ON(buffer_length < 4);
87
88 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
89 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
90 buffer[2] = 0x80 | (wchar & 0x3F);
91 buffer[3] = 0;
92 }
93 }
94
95 static int hex2decimal(char ch)
96 {
97 if (ch >= '0' && ch <= '9') {
98 return (ch - '0');
99 } else if (ch >= 'a' && ch <= 'f') {
100 return 10 + (ch - 'a');
101 } else if (ch >= 'A' && ch <= 'F') {
102 return 10 + (ch - 'A');
103 }
104 abort();
105 }
106
107 /**
108 * parse_string(): Parse a JSON string
109 *
110 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
111 * Interchange Format":
112 *
113 * char = unescaped /
114 * escape (
115 * %x22 / ; " quotation mark U+0022
116 * %x5C / ; \ reverse solidus U+005C
117 * %x2F / ; / solidus U+002F
118 * %x62 / ; b backspace U+0008
119 * %x66 / ; f form feed U+000C
120 * %x6E / ; n line feed U+000A
121 * %x72 / ; r carriage return U+000D
122 * %x74 / ; t tab U+0009
123 * %x75 4HEXDIG ) ; uXXXX U+XXXX
124 * escape = %x5C ; \
125 * quotation-mark = %x22 ; "
126 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
127 *
128 * Extensions over RFC 8259:
129 * - Extra escape sequence in strings:
130 * 0x27 (apostrophe) is recognized after escape, too
131 * - Single-quoted strings:
132 * Like double-quoted strings, except they're delimited by %x27
133 * (apostrophe) instead of %x22 (quotation mark), and can't contain
134 * unescaped apostrophe, but can contain unescaped quotation mark.
135 *
136 * Note:
137 * - Encoding is modified UTF-8.
138 * - Invalid Unicode characters are rejected.
139 * - Control characters \x00..\x1F are rejected by the lexer.
140 */
141 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
142 {
143 const char *ptr = token->str;
144 QString *str;
145 char quote;
146 int cp, i;
147 char *end;
148 ssize_t len;
149 char utf8_buf[5];
150
151 assert(*ptr == '"' || *ptr == '\'');
152 quote = *ptr++;
153 str = qstring_new();
154
155 while (*ptr != quote) {
156 assert(*ptr);
157 if (*ptr == '\\') {
158 ptr++;
159 switch (*ptr++) {
160 case '"':
161 qstring_append_chr(str, '"');
162 break;
163 case '\'':
164 qstring_append_chr(str, '\'');
165 break;
166 case '\\':
167 qstring_append_chr(str, '\\');
168 break;
169 case '/':
170 qstring_append_chr(str, '/');
171 break;
172 case 'b':
173 qstring_append_chr(str, '\b');
174 break;
175 case 'f':
176 qstring_append_chr(str, '\f');
177 break;
178 case 'n':
179 qstring_append_chr(str, '\n');
180 break;
181 case 'r':
182 qstring_append_chr(str, '\r');
183 break;
184 case 't':
185 qstring_append_chr(str, '\t');
186 break;
187 case 'u':
188 cp = 0;
189 for (i = 0; i < 4; i++) {
190 if (!qemu_isxdigit(*ptr)) {
191 parse_error(ctxt, token,
192 "invalid hex escape sequence in string");
193 goto out;
194 }
195 cp <<= 4;
196 cp |= hex2decimal(*ptr);
197 ptr++;
198 }
199
200 wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf));
201 qstring_append(str, utf8_buf);
202 break;
203 default:
204 parse_error(ctxt, token, "invalid escape sequence in string");
205 goto out;
206 }
207 } else {
208 cp = mod_utf8_codepoint(ptr, 6, &end);
209 if (cp < 0) {
210 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
211 goto out;
212 }
213 ptr = end;
214 len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
215 assert(len >= 0);
216 qstring_append(str, utf8_buf);
217 }
218 }
219
220 return str;
221
222 out:
223 qobject_unref(str);
224 return NULL;
225 }
226
227 /* Note: the token object returned by parser_context_peek_token or
228 * parser_context_pop_token is deleted as soon as parser_context_pop_token
229 * is called again.
230 */
231 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
232 {
233 g_free(ctxt->current);
234 assert(!g_queue_is_empty(ctxt->buf));
235 ctxt->current = g_queue_pop_head(ctxt->buf);
236 return ctxt->current;
237 }
238
239 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
240 {
241 assert(!g_queue_is_empty(ctxt->buf));
242 return g_queue_peek_head(ctxt->buf);
243 }
244
245 static JSONParserContext *parser_context_new(GQueue *tokens)
246 {
247 JSONParserContext *ctxt;
248
249 if (!tokens) {
250 return NULL;
251 }
252
253 ctxt = g_malloc0(sizeof(JSONParserContext));
254 ctxt->buf = tokens;
255
256 return ctxt;
257 }
258
259 /* to support error propagation, ctxt->err must be freed separately */
260 static void parser_context_free(JSONParserContext *ctxt)
261 {
262 if (ctxt) {
263 while (!g_queue_is_empty(ctxt->buf)) {
264 parser_context_pop_token(ctxt);
265 }
266 g_free(ctxt->current);
267 g_queue_free(ctxt->buf);
268 g_free(ctxt);
269 }
270 }
271
272 /**
273 * Parsing rules
274 */
275 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
276 {
277 QObject *value;
278 QString *key = NULL;
279 JSONToken *peek, *token;
280
281 peek = parser_context_peek_token(ctxt);
282 if (peek == NULL) {
283 parse_error(ctxt, NULL, "premature EOI");
284 goto out;
285 }
286
287 key = qobject_to(QString, parse_value(ctxt, ap));
288 if (!key) {
289 parse_error(ctxt, peek, "key is not a string in object");
290 goto out;
291 }
292
293 token = parser_context_pop_token(ctxt);
294 if (token == NULL) {
295 parse_error(ctxt, NULL, "premature EOI");
296 goto out;
297 }
298
299 if (token->type != JSON_COLON) {
300 parse_error(ctxt, token, "missing : in object pair");
301 goto out;
302 }
303
304 value = parse_value(ctxt, ap);
305 if (value == NULL) {
306 parse_error(ctxt, token, "Missing value in dict");
307 goto out;
308 }
309
310 qdict_put_obj(dict, qstring_get_str(key), value);
311
312 qobject_unref(key);
313
314 return 0;
315
316 out:
317 qobject_unref(key);
318
319 return -1;
320 }
321
322 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
323 {
324 QDict *dict = NULL;
325 JSONToken *token, *peek;
326
327 token = parser_context_pop_token(ctxt);
328 assert(token && token->type == JSON_LCURLY);
329
330 dict = qdict_new();
331
332 peek = parser_context_peek_token(ctxt);
333 if (peek == NULL) {
334 parse_error(ctxt, NULL, "premature EOI");
335 goto out;
336 }
337
338 if (peek->type != JSON_RCURLY) {
339 if (parse_pair(ctxt, dict, ap) == -1) {
340 goto out;
341 }
342
343 token = parser_context_pop_token(ctxt);
344 if (token == NULL) {
345 parse_error(ctxt, NULL, "premature EOI");
346 goto out;
347 }
348
349 while (token->type != JSON_RCURLY) {
350 if (token->type != JSON_COMMA) {
351 parse_error(ctxt, token, "expected separator in dict");
352 goto out;
353 }
354
355 if (parse_pair(ctxt, dict, ap) == -1) {
356 goto out;
357 }
358
359 token = parser_context_pop_token(ctxt);
360 if (token == NULL) {
361 parse_error(ctxt, NULL, "premature EOI");
362 goto out;
363 }
364 }
365 } else {
366 (void)parser_context_pop_token(ctxt);
367 }
368
369 return QOBJECT(dict);
370
371 out:
372 qobject_unref(dict);
373 return NULL;
374 }
375
376 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
377 {
378 QList *list = NULL;
379 JSONToken *token, *peek;
380
381 token = parser_context_pop_token(ctxt);
382 assert(token && token->type == JSON_LSQUARE);
383
384 list = qlist_new();
385
386 peek = parser_context_peek_token(ctxt);
387 if (peek == NULL) {
388 parse_error(ctxt, NULL, "premature EOI");
389 goto out;
390 }
391
392 if (peek->type != JSON_RSQUARE) {
393 QObject *obj;
394
395 obj = parse_value(ctxt, ap);
396 if (obj == NULL) {
397 parse_error(ctxt, token, "expecting value");
398 goto out;
399 }
400
401 qlist_append_obj(list, obj);
402
403 token = parser_context_pop_token(ctxt);
404 if (token == NULL) {
405 parse_error(ctxt, NULL, "premature EOI");
406 goto out;
407 }
408
409 while (token->type != JSON_RSQUARE) {
410 if (token->type != JSON_COMMA) {
411 parse_error(ctxt, token, "expected separator in list");
412 goto out;
413 }
414
415 obj = parse_value(ctxt, ap);
416 if (obj == NULL) {
417 parse_error(ctxt, token, "expecting value");
418 goto out;
419 }
420
421 qlist_append_obj(list, obj);
422
423 token = parser_context_pop_token(ctxt);
424 if (token == NULL) {
425 parse_error(ctxt, NULL, "premature EOI");
426 goto out;
427 }
428 }
429 } else {
430 (void)parser_context_pop_token(ctxt);
431 }
432
433 return QOBJECT(list);
434
435 out:
436 qobject_unref(list);
437 return NULL;
438 }
439
440 static QObject *parse_keyword(JSONParserContext *ctxt)
441 {
442 JSONToken *token;
443
444 token = parser_context_pop_token(ctxt);
445 assert(token && token->type == JSON_KEYWORD);
446
447 if (!strcmp(token->str, "true")) {
448 return QOBJECT(qbool_from_bool(true));
449 } else if (!strcmp(token->str, "false")) {
450 return QOBJECT(qbool_from_bool(false));
451 } else if (!strcmp(token->str, "null")) {
452 return QOBJECT(qnull());
453 }
454 parse_error(ctxt, token, "invalid keyword '%s'", token->str);
455 return NULL;
456 }
457
458 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
459 {
460 JSONToken *token;
461
462 if (ap == NULL) {
463 return NULL;
464 }
465
466 token = parser_context_pop_token(ctxt);
467 assert(token && token->type == JSON_ESCAPE);
468
469 if (!strcmp(token->str, "%p")) {
470 return va_arg(*ap, QObject *);
471 } else if (!strcmp(token->str, "%i")) {
472 return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
473 } else if (!strcmp(token->str, "%d")) {
474 return QOBJECT(qnum_from_int(va_arg(*ap, int)));
475 } else if (!strcmp(token->str, "%ld")) {
476 return QOBJECT(qnum_from_int(va_arg(*ap, long)));
477 } else if (!strcmp(token->str, "%lld") ||
478 !strcmp(token->str, "%I64d")) {
479 return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
480 } else if (!strcmp(token->str, "%u")) {
481 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
482 } else if (!strcmp(token->str, "%lu")) {
483 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
484 } else if (!strcmp(token->str, "%llu") ||
485 !strcmp(token->str, "%I64u")) {
486 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
487 } else if (!strcmp(token->str, "%s")) {
488 return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
489 } else if (!strcmp(token->str, "%f")) {
490 return QOBJECT(qnum_from_double(va_arg(*ap, double)));
491 }
492 return NULL;
493 }
494
495 static QObject *parse_literal(JSONParserContext *ctxt)
496 {
497 JSONToken *token;
498
499 token = parser_context_pop_token(ctxt);
500 assert(token);
501
502 switch (token->type) {
503 case JSON_STRING:
504 return QOBJECT(parse_string(ctxt, token));
505 case JSON_INTEGER: {
506 /*
507 * Represent JSON_INTEGER as QNUM_I64 if possible, else as
508 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
509 * and qemu_strtou64() fail with ERANGE when it's not
510 * possible.
511 *
512 * qnum_get_int() will then work for any signed 64-bit
513 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
514 * integer, and qnum_get_double() both for any JSON_INTEGER
515 * and any JSON_FLOAT (with precision loss for integers beyond
516 * 53 bits)
517 */
518 int ret;
519 int64_t value;
520 uint64_t uvalue;
521
522 ret = qemu_strtoi64(token->str, NULL, 10, &value);
523 if (!ret) {
524 return QOBJECT(qnum_from_int(value));
525 }
526 assert(ret == -ERANGE);
527
528 if (token->str[0] != '-') {
529 ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
530 if (!ret) {
531 return QOBJECT(qnum_from_uint(uvalue));
532 }
533 assert(ret == -ERANGE);
534 }
535 /* fall through to JSON_FLOAT */
536 }
537 case JSON_FLOAT:
538 /* FIXME dependent on locale; a pervasive issue in QEMU */
539 /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
540 * but those might be useful extensions beyond JSON */
541 return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
542 default:
543 abort();
544 }
545 }
546
547 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
548 {
549 JSONToken *token;
550
551 token = parser_context_peek_token(ctxt);
552 if (token == NULL) {
553 parse_error(ctxt, NULL, "premature EOI");
554 return NULL;
555 }
556
557 switch (token->type) {
558 case JSON_LCURLY:
559 return parse_object(ctxt, ap);
560 case JSON_LSQUARE:
561 return parse_array(ctxt, ap);
562 case JSON_ESCAPE:
563 return parse_escape(ctxt, ap);
564 case JSON_INTEGER:
565 case JSON_FLOAT:
566 case JSON_STRING:
567 return parse_literal(ctxt);
568 case JSON_KEYWORD:
569 return parse_keyword(ctxt);
570 default:
571 parse_error(ctxt, token, "expecting value");
572 return NULL;
573 }
574 }
575
576 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
577 {
578 return json_parser_parse_err(tokens, ap, NULL);
579 }
580
581 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
582 {
583 JSONParserContext *ctxt = parser_context_new(tokens);
584 QObject *result;
585
586 if (!ctxt) {
587 return NULL;
588 }
589
590 result = parse_value(ctxt, ap);
591
592 error_propagate(errp, ctxt->err);
593
594 parser_context_free(ctxt);
595
596 return result;
597 }