]>
git.proxmox.com Git - qemu.git/blob - json-lexer.c
4 * Copyright IBM, Corp. 2009
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "json-lexer.h"
21 #define MAX_TOKEN_SIZE (64ULL << 20)
24 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
25 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
26 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
32 enum json_lexer_state
{
53 IN_NEG_NONZERO_NUMBER
,
65 #define TERMINAL(state) [0 ... 0x7F] = (state)
67 /* Return whether TERMINAL is a terminal state and the transition to it
68 from OLD_STATE required lookahead. This happens whenever the table
69 below uses the TERMINAL macro. */
70 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
71 (json_lexer[(old_state)][0] == (terminal))
73 static const uint8_t json_lexer
[][256] = {
74 /* double quote string */
76 ['0' ... '9'] = IN_DQ_STRING
,
77 ['a' ... 'f'] = IN_DQ_STRING
,
78 ['A' ... 'F'] = IN_DQ_STRING
,
81 ['0' ... '9'] = IN_DQ_UCODE3
,
82 ['a' ... 'f'] = IN_DQ_UCODE3
,
83 ['A' ... 'F'] = IN_DQ_UCODE3
,
86 ['0' ... '9'] = IN_DQ_UCODE2
,
87 ['a' ... 'f'] = IN_DQ_UCODE2
,
88 ['A' ... 'F'] = IN_DQ_UCODE2
,
91 ['0' ... '9'] = IN_DQ_UCODE1
,
92 ['a' ... 'f'] = IN_DQ_UCODE1
,
93 ['A' ... 'F'] = IN_DQ_UCODE1
,
95 [IN_DQ_STRING_ESCAPE
] = {
100 ['t'] = IN_DQ_STRING
,
101 ['/'] = IN_DQ_STRING
,
102 ['\\'] = IN_DQ_STRING
,
103 ['\''] = IN_DQ_STRING
,
104 ['\"'] = IN_DQ_STRING
,
105 ['u'] = IN_DQ_UCODE0
,
108 [1 ... 0xBF] = IN_DQ_STRING
,
109 [0xC2 ... 0xF4] = IN_DQ_STRING
,
110 ['\\'] = IN_DQ_STRING_ESCAPE
,
114 /* single quote string */
116 ['0' ... '9'] = IN_SQ_STRING
,
117 ['a' ... 'f'] = IN_SQ_STRING
,
118 ['A' ... 'F'] = IN_SQ_STRING
,
121 ['0' ... '9'] = IN_SQ_UCODE3
,
122 ['a' ... 'f'] = IN_SQ_UCODE3
,
123 ['A' ... 'F'] = IN_SQ_UCODE3
,
126 ['0' ... '9'] = IN_SQ_UCODE2
,
127 ['a' ... 'f'] = IN_SQ_UCODE2
,
128 ['A' ... 'F'] = IN_SQ_UCODE2
,
131 ['0' ... '9'] = IN_SQ_UCODE1
,
132 ['a' ... 'f'] = IN_SQ_UCODE1
,
133 ['A' ... 'F'] = IN_SQ_UCODE1
,
135 [IN_SQ_STRING_ESCAPE
] = {
136 ['b'] = IN_SQ_STRING
,
137 ['f'] = IN_SQ_STRING
,
138 ['n'] = IN_SQ_STRING
,
139 ['r'] = IN_SQ_STRING
,
140 ['t'] = IN_SQ_STRING
,
141 ['/'] = IN_DQ_STRING
,
142 ['\\'] = IN_DQ_STRING
,
143 ['\''] = IN_SQ_STRING
,
144 ['\"'] = IN_SQ_STRING
,
145 ['u'] = IN_SQ_UCODE0
,
148 [1 ... 0xBF] = IN_SQ_STRING
,
149 [0xC2 ... 0xF4] = IN_SQ_STRING
,
150 ['\\'] = IN_SQ_STRING_ESCAPE
,
151 ['\''] = JSON_STRING
,
156 TERMINAL(JSON_INTEGER
),
157 ['0' ... '9'] = IN_ERROR
,
163 TERMINAL(JSON_FLOAT
),
164 ['0' ... '9'] = IN_DIGITS
,
168 ['0' ... '9'] = IN_DIGITS
,
174 ['0' ... '9'] = IN_DIGITS
,
177 [IN_MANTISSA_DIGITS
] = {
178 TERMINAL(JSON_FLOAT
),
179 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
185 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
189 [IN_NONZERO_NUMBER
] = {
190 TERMINAL(JSON_INTEGER
),
191 ['0' ... '9'] = IN_NONZERO_NUMBER
,
197 [IN_NEG_NONZERO_NUMBER
] = {
199 ['1' ... '9'] = IN_NONZERO_NUMBER
,
204 TERMINAL(JSON_KEYWORD
),
205 ['a' ... 'z'] = IN_KEYWORD
,
211 [' '] = IN_WHITESPACE
,
212 ['\t'] = IN_WHITESPACE
,
213 ['\r'] = IN_WHITESPACE
,
214 ['\n'] = IN_WHITESPACE
,
224 ['l'] = IN_ESCAPE_LL
,
232 ['4'] = IN_ESCAPE_I64
,
236 ['6'] = IN_ESCAPE_I6
,
251 ['"'] = IN_DQ_STRING
,
252 ['\''] = IN_SQ_STRING
,
254 ['1' ... '9'] = IN_NONZERO_NUMBER
,
255 ['-'] = IN_NEG_NONZERO_NUMBER
,
256 ['{'] = JSON_OPERATOR
,
257 ['}'] = JSON_OPERATOR
,
258 ['['] = JSON_OPERATOR
,
259 [']'] = JSON_OPERATOR
,
260 [','] = JSON_OPERATOR
,
261 [':'] = JSON_OPERATOR
,
262 ['a' ... 'z'] = IN_KEYWORD
,
264 [' '] = IN_WHITESPACE
,
265 ['\t'] = IN_WHITESPACE
,
266 ['\r'] = IN_WHITESPACE
,
267 ['\n'] = IN_WHITESPACE
,
271 void json_lexer_init(JSONLexer
*lexer
, JSONLexerEmitter func
)
274 lexer
->state
= IN_START
;
275 lexer
->token
= qstring_new();
276 lexer
->x
= lexer
->y
= 0;
279 static int json_lexer_feed_char(JSONLexer
*lexer
, char ch
, bool flush
)
281 int char_consumed
, new_state
;
290 new_state
= json_lexer
[lexer
->state
][(uint8_t)ch
];
291 char_consumed
= !TERMINAL_NEEDED_LOOKAHEAD(lexer
->state
, new_state
);
293 qstring_append_chr(lexer
->token
, ch
);
303 lexer
->emit(lexer
, lexer
->token
, new_state
, lexer
->x
, lexer
->y
);
306 QDECREF(lexer
->token
);
307 lexer
->token
= qstring_new();
308 new_state
= IN_START
;
311 /* XXX: To avoid having previous bad input leaving the parser in an
312 * unresponsive state where we consume unpredictable amounts of
313 * subsequent "good" input, percolate this error state up to the
314 * tokenizer/parser by forcing a NULL object to be emitted, then
317 * Also note that this handling is required for reliable channel
318 * negotiation between QMP and the guest agent, since chr(0xFF)
319 * is placed at the beginning of certain events to ensure proper
320 * delivery when the channel is in an unknown state. chr(0xFF) is
321 * never a valid ASCII/UTF-8 sequence, so this should reliably
322 * induce an error/flush state.
324 lexer
->emit(lexer
, lexer
->token
, JSON_ERROR
, lexer
->x
, lexer
->y
);
325 QDECREF(lexer
->token
);
326 lexer
->token
= qstring_new();
327 new_state
= IN_START
;
328 lexer
->state
= new_state
;
333 lexer
->state
= new_state
;
334 } while (!char_consumed
&& !flush
);
336 /* Do not let a single token grow to an arbitrarily large size,
337 * this is a security consideration.
339 if (lexer
->token
->length
> MAX_TOKEN_SIZE
) {
340 lexer
->emit(lexer
, lexer
->token
, lexer
->state
, lexer
->x
, lexer
->y
);
341 QDECREF(lexer
->token
);
342 lexer
->token
= qstring_new();
343 lexer
->state
= IN_START
;
349 int json_lexer_feed(JSONLexer
*lexer
, const char *buffer
, size_t size
)
353 for (i
= 0; i
< size
; i
++) {
356 err
= json_lexer_feed_char(lexer
, buffer
[i
], false);
365 int json_lexer_flush(JSONLexer
*lexer
)
367 return lexer
->state
== IN_START
? 0 : json_lexer_feed_char(lexer
, 0, true);
370 void json_lexer_destroy(JSONLexer
*lexer
)
372 QDECREF(lexer
->token
);