]> git.proxmox.com Git - qemu.git/blame - json-parser.c
json-lexer: Handle missing escapes
[qemu.git] / json-parser.c
CommitLineData
4a5fcab7
AL
1/*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14#include <stdbool.h>
c96c84a9 15#include <stdarg.h>
4a5fcab7
AL
16
17#include "qemu-common.h"
18#include "qstring.h"
19#include "qint.h"
20#include "qdict.h"
21#include "qlist.h"
22#include "qfloat.h"
23#include "qbool.h"
24#include "json-parser.h"
25#include "json-lexer.h"
26
27typedef struct JSONParserContext
28{
29} JSONParserContext;
30
31#define BUG_ON(cond) assert(!(cond))
32
33/**
34 * TODO
35 *
36 * 0) make errors meaningful again
37 * 1) add geometry information to tokens
38 * 3) should we return a parsed size?
39 * 4) deal with premature EOI
40 */
41
42static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
43
44/**
45 * Token manipulators
46 *
47 * tokens are dictionaries that contain a type, a string value, and geometry information
48 * about a token identified by the lexer. These are routines that make working with
49 * these objects a bit easier.
50 */
51static const char *token_get_value(QObject *obj)
52{
53 return qdict_get_str(qobject_to_qdict(obj), "token");
54}
55
56static JSONTokenType token_get_type(QObject *obj)
57{
58 return qdict_get_int(qobject_to_qdict(obj), "type");
59}
60
61static int token_is_operator(QObject *obj, char op)
62{
63 const char *val;
64
65 if (token_get_type(obj) != JSON_OPERATOR) {
66 return 0;
67 }
68
69 val = token_get_value(obj);
70
71 return (val[0] == op) && (val[1] == 0);
72}
73
74static int token_is_keyword(QObject *obj, const char *value)
75{
76 if (token_get_type(obj) != JSON_KEYWORD) {
77 return 0;
78 }
79
80 return strcmp(token_get_value(obj), value) == 0;
81}
82
83static int token_is_escape(QObject *obj, const char *value)
84{
85 if (token_get_type(obj) != JSON_ESCAPE) {
86 return 0;
87 }
88
89 return (strcmp(token_get_value(obj), value) == 0);
90}
91
92/**
93 * Error handler
94 */
95static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
96{
c96c84a9
AK
97 va_list ap;
98 va_start(ap, msg);
99 fprintf(stderr, "parse error: ");
100 vfprintf(stderr, msg, ap);
101 fprintf(stderr, "\n");
102 va_end(ap);
4a5fcab7
AL
103}
104
105/**
106 * String helpers
107 *
108 * These helpers are used to unescape strings.
109 */
110static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111{
112 if (wchar <= 0x007F) {
113 BUG_ON(buffer_length < 2);
114
115 buffer[0] = wchar & 0x7F;
116 buffer[1] = 0;
117 } else if (wchar <= 0x07FF) {
118 BUG_ON(buffer_length < 3);
119
120 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
121 buffer[1] = 0x80 | (wchar & 0x3F);
122 buffer[2] = 0;
123 } else {
124 BUG_ON(buffer_length < 4);
125
126 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
127 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
128 buffer[2] = 0x80 | (wchar & 0x3F);
129 buffer[3] = 0;
130 }
131}
132
133static int hex2decimal(char ch)
134{
135 if (ch >= '0' && ch <= '9') {
136 return (ch - '0');
137 } else if (ch >= 'a' && ch <= 'f') {
138 return 10 + (ch - 'a');
139 } else if (ch >= 'A' && ch <= 'F') {
140 return 10 + (ch - 'A');
141 }
142
143 return -1;
144}
145
146/**
147 * parse_string(): Parse a json string and return a QObject
148 *
149 * string
150 * ""
151 * " chars "
152 * chars
153 * char
154 * char chars
155 * char
156 * any-Unicode-character-
157 * except-"-or-\-or-
158 * control-character
159 * \"
160 * \\
161 * \/
162 * \b
163 * \f
164 * \n
165 * \r
166 * \t
167 * \u four-hex-digits
168 */
169static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170{
171 const char *ptr = token_get_value(token);
172 QString *str;
173 int double_quote = 1;
174
175 if (*ptr == '"') {
176 double_quote = 1;
177 } else {
178 double_quote = 0;
179 }
180 ptr++;
181
182 str = qstring_new();
183 while (*ptr &&
184 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
185 if (*ptr == '\\') {
186 ptr++;
187
188 switch (*ptr) {
189 case '"':
190 qstring_append(str, "\"");
191 ptr++;
192 break;
193 case '\'':
194 qstring_append(str, "'");
195 ptr++;
196 break;
197 case '\\':
198 qstring_append(str, "\\");
199 ptr++;
200 break;
201 case '/':
202 qstring_append(str, "/");
203 ptr++;
204 break;
205 case 'b':
206 qstring_append(str, "\b");
207 ptr++;
208 break;
209 case 'n':
210 qstring_append(str, "\n");
211 ptr++;
212 break;
213 case 'r':
214 qstring_append(str, "\r");
215 ptr++;
216 break;
217 case 't':
218 qstring_append(str, "\t");
219 ptr++;
220 break;
221 case 'u': {
222 uint16_t unicode_char = 0;
223 char utf8_char[4];
224 int i = 0;
225
226 ptr++;
227
228 for (i = 0; i < 4; i++) {
229 if (qemu_isxdigit(*ptr)) {
230 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
231 } else {
232 parse_error(ctxt, token,
233 "invalid hex escape sequence in string");
234 goto out;
235 }
236 ptr++;
237 }
238
239 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
240 qstring_append(str, utf8_char);
241 } break;
242 default:
243 parse_error(ctxt, token, "invalid escape sequence in string");
244 goto out;
245 }
246 } else {
247 char dummy[2];
248
249 dummy[0] = *ptr++;
250 dummy[1] = 0;
251
252 qstring_append(str, dummy);
253 }
254 }
255
4a5fcab7
AL
256 return str;
257
258out:
259 QDECREF(str);
260 return NULL;
261}
262
263/**
264 * Parsing rules
265 */
266static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
267{
268 QObject *key, *token = NULL, *value, *peek;
269 QList *working = qlist_copy(*tokens);
270
271 peek = qlist_peek(working);
272 key = parse_value(ctxt, &working, ap);
d758d90f 273 if (!key || qobject_type(key) != QTYPE_QSTRING) {
4a5fcab7
AL
274 parse_error(ctxt, peek, "key is not a string in object");
275 goto out;
276 }
277
278 token = qlist_pop(working);
279 if (!token_is_operator(token, ':')) {
280 parse_error(ctxt, token, "missing : in object pair");
281 goto out;
282 }
283
284 value = parse_value(ctxt, &working, ap);
285 if (value == NULL) {
286 parse_error(ctxt, token, "Missing value in dict");
287 goto out;
288 }
289
290 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
291
292 qobject_decref(token);
293 qobject_decref(key);
294 QDECREF(*tokens);
295 *tokens = working;
296
297 return 0;
298
299out:
300 qobject_decref(token);
301 qobject_decref(key);
302 QDECREF(working);
303
304 return -1;
305}
306
307static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
308{
309 QDict *dict = NULL;
310 QObject *token, *peek;
311 QList *working = qlist_copy(*tokens);
312
313 token = qlist_pop(working);
314 if (!token_is_operator(token, '{')) {
315 goto out;
316 }
317 qobject_decref(token);
318 token = NULL;
319
320 dict = qdict_new();
321
322 peek = qlist_peek(working);
323 if (!token_is_operator(peek, '}')) {
324 if (parse_pair(ctxt, dict, &working, ap) == -1) {
325 goto out;
326 }
327
328 token = qlist_pop(working);
329 while (!token_is_operator(token, '}')) {
330 if (!token_is_operator(token, ',')) {
331 parse_error(ctxt, token, "expected separator in dict");
332 goto out;
333 }
334 qobject_decref(token);
335 token = NULL;
336
337 if (parse_pair(ctxt, dict, &working, ap) == -1) {
338 goto out;
339 }
340
341 token = qlist_pop(working);
342 }
343 qobject_decref(token);
344 token = NULL;
345 } else {
346 token = qlist_pop(working);
347 qobject_decref(token);
348 token = NULL;
349 }
350
351 QDECREF(*tokens);
352 *tokens = working;
353
354 return QOBJECT(dict);
355
356out:
357 qobject_decref(token);
358 QDECREF(working);
359 QDECREF(dict);
360 return NULL;
361}
362
363static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
364{
365 QList *list = NULL;
366 QObject *token, *peek;
367 QList *working = qlist_copy(*tokens);
368
369 token = qlist_pop(working);
370 if (!token_is_operator(token, '[')) {
371 goto out;
372 }
373 qobject_decref(token);
374 token = NULL;
375
376 list = qlist_new();
377
378 peek = qlist_peek(working);
379 if (!token_is_operator(peek, ']')) {
380 QObject *obj;
381
382 obj = parse_value(ctxt, &working, ap);
383 if (obj == NULL) {
384 parse_error(ctxt, token, "expecting value");
385 goto out;
386 }
387
388 qlist_append_obj(list, obj);
389
390 token = qlist_pop(working);
391 while (!token_is_operator(token, ']')) {
392 if (!token_is_operator(token, ',')) {
393 parse_error(ctxt, token, "expected separator in list");
394 goto out;
395 }
396
397 qobject_decref(token);
398 token = NULL;
399
400 obj = parse_value(ctxt, &working, ap);
401 if (obj == NULL) {
402 parse_error(ctxt, token, "expecting value");
403 goto out;
404 }
405
406 qlist_append_obj(list, obj);
407
408 token = qlist_pop(working);
409 }
410
411 qobject_decref(token);
412 token = NULL;
413 } else {
414 token = qlist_pop(working);
415 qobject_decref(token);
416 token = NULL;
417 }
418
419 QDECREF(*tokens);
420 *tokens = working;
421
422 return QOBJECT(list);
423
424out:
425 qobject_decref(token);
426 QDECREF(working);
427 QDECREF(list);
428 return NULL;
429}
430
431static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
432{
433 QObject *token, *ret;
434 QList *working = qlist_copy(*tokens);
435
436 token = qlist_pop(working);
437
438 if (token_get_type(token) != JSON_KEYWORD) {
439 goto out;
440 }
441
442 if (token_is_keyword(token, "true")) {
443 ret = QOBJECT(qbool_from_int(true));
444 } else if (token_is_keyword(token, "false")) {
445 ret = QOBJECT(qbool_from_int(false));
446 } else {
447 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
448 goto out;
449 }
450
451 qobject_decref(token);
452 QDECREF(*tokens);
453 *tokens = working;
454
455 return ret;
456
457out:
458 qobject_decref(token);
459 QDECREF(working);
460
461 return NULL;
462}
463
464static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
465{
466 QObject *token = NULL, *obj;
467 QList *working = qlist_copy(*tokens);
468
469 if (ap == NULL) {
470 goto out;
471 }
472
473 token = qlist_pop(working);
474
475 if (token_is_escape(token, "%p")) {
476 obj = va_arg(*ap, QObject *);
477 } else if (token_is_escape(token, "%i")) {
478 obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
479 } else if (token_is_escape(token, "%d")) {
480 obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
481 } else if (token_is_escape(token, "%ld")) {
482 obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
2c0d4b36
RT
483 } else if (token_is_escape(token, "%lld") ||
484 token_is_escape(token, "%I64d")) {
4a5fcab7
AL
485 obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
486 } else if (token_is_escape(token, "%s")) {
487 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
488 } else if (token_is_escape(token, "%f")) {
489 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
490 } else {
491 goto out;
492 }
493
494 qobject_decref(token);
495 QDECREF(*tokens);
496 *tokens = working;
497
498 return obj;
499
500out:
501 qobject_decref(token);
502 QDECREF(working);
503
504 return NULL;
505}
506
507static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
508{
509 QObject *token, *obj;
510 QList *working = qlist_copy(*tokens);
511
512 token = qlist_pop(working);
513 switch (token_get_type(token)) {
514 case JSON_STRING:
515 obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
516 break;
517 case JSON_INTEGER:
518 obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
519 break;
520 case JSON_FLOAT:
521 /* FIXME dependent on locale */
522 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
523 break;
524 default:
525 goto out;
526 }
527
528 qobject_decref(token);
529 QDECREF(*tokens);
530 *tokens = working;
531
532 return obj;
533
534out:
535 qobject_decref(token);
536 QDECREF(working);
537
538 return NULL;
539}
540
541static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
542{
543 QObject *obj;
544
545 obj = parse_object(ctxt, tokens, ap);
546 if (obj == NULL) {
547 obj = parse_array(ctxt, tokens, ap);
548 }
549 if (obj == NULL) {
550 obj = parse_escape(ctxt, tokens, ap);
551 }
552 if (obj == NULL) {
553 obj = parse_keyword(ctxt, tokens);
554 }
555 if (obj == NULL) {
556 obj = parse_literal(ctxt, tokens);
557 }
558
559 return obj;
560}
561
562QObject *json_parser_parse(QList *tokens, va_list *ap)
563{
564 JSONParserContext ctxt = {};
565 QList *working = qlist_copy(tokens);
566 QObject *result;
567
568 result = parse_value(&ctxt, &working, ap);
569
570 QDECREF(working);
571
572 return result;
573}