]> git.proxmox.com Git - qemu.git/blob - json-lexer.c
json-lexer: Handle missing escapes
[qemu.git] / json-lexer.c
1 /*
2 * JSON lexer
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include "qstring.h"
15 #include "qlist.h"
16 #include "qdict.h"
17 #include "qint.h"
18 #include "qemu-common.h"
19 #include "json-lexer.h"
20
21 /*
22 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25 * [{}\[\],:]
26 * [a-z]+
27 *
28 */
29
30 enum json_lexer_state {
31 ERROR = 0,
32 IN_DONE_STRING,
33 IN_DQ_UCODE3,
34 IN_DQ_UCODE2,
35 IN_DQ_UCODE1,
36 IN_DQ_UCODE0,
37 IN_DQ_STRING_ESCAPE,
38 IN_DQ_STRING,
39 IN_SQ_UCODE3,
40 IN_SQ_UCODE2,
41 IN_SQ_UCODE1,
42 IN_SQ_UCODE0,
43 IN_SQ_STRING_ESCAPE,
44 IN_SQ_STRING,
45 IN_ZERO,
46 IN_DIGITS,
47 IN_DIGIT,
48 IN_EXP_E,
49 IN_MANTISSA,
50 IN_MANTISSA_DIGITS,
51 IN_NONZERO_NUMBER,
52 IN_NEG_NONZERO_NUMBER,
53 IN_KEYWORD,
54 IN_ESCAPE,
55 IN_ESCAPE_L,
56 IN_ESCAPE_LL,
57 IN_ESCAPE_I,
58 IN_ESCAPE_I6,
59 IN_ESCAPE_I64,
60 IN_ESCAPE_DONE,
61 IN_WHITESPACE,
62 IN_OPERATOR_DONE,
63 IN_START,
64 };
65
66 #define TERMINAL(state) [0 ... 0x7F] = (state)
67
68 static const uint8_t json_lexer[][256] = {
69 [IN_DONE_STRING] = {
70 TERMINAL(JSON_STRING),
71 },
72
73 /* double quote string */
74 [IN_DQ_UCODE3] = {
75 ['0' ... '9'] = IN_DQ_STRING,
76 ['a' ... 'f'] = IN_DQ_STRING,
77 ['A' ... 'F'] = IN_DQ_STRING,
78 },
79 [IN_DQ_UCODE2] = {
80 ['0' ... '9'] = IN_DQ_UCODE3,
81 ['a' ... 'f'] = IN_DQ_UCODE3,
82 ['A' ... 'F'] = IN_DQ_UCODE3,
83 },
84 [IN_DQ_UCODE1] = {
85 ['0' ... '9'] = IN_DQ_UCODE2,
86 ['a' ... 'f'] = IN_DQ_UCODE2,
87 ['A' ... 'F'] = IN_DQ_UCODE2,
88 },
89 [IN_DQ_UCODE0] = {
90 ['0' ... '9'] = IN_DQ_UCODE1,
91 ['a' ... 'f'] = IN_DQ_UCODE1,
92 ['A' ... 'F'] = IN_DQ_UCODE1,
93 },
94 [IN_DQ_STRING_ESCAPE] = {
95 ['b'] = IN_DQ_STRING,
96 ['f'] = IN_DQ_STRING,
97 ['n'] = IN_DQ_STRING,
98 ['r'] = IN_DQ_STRING,
99 ['t'] = IN_DQ_STRING,
100 ['/'] = IN_DQ_STRING,
101 ['\\'] = IN_DQ_STRING,
102 ['\''] = IN_DQ_STRING,
103 ['\"'] = IN_DQ_STRING,
104 ['u'] = IN_DQ_UCODE0,
105 },
106 [IN_DQ_STRING] = {
107 [1 ... 0xFF] = IN_DQ_STRING,
108 ['\\'] = IN_DQ_STRING_ESCAPE,
109 ['"'] = IN_DONE_STRING,
110 },
111
112 /* single quote string */
113 [IN_SQ_UCODE3] = {
114 ['0' ... '9'] = IN_SQ_STRING,
115 ['a' ... 'f'] = IN_SQ_STRING,
116 ['A' ... 'F'] = IN_SQ_STRING,
117 },
118 [IN_SQ_UCODE2] = {
119 ['0' ... '9'] = IN_SQ_UCODE3,
120 ['a' ... 'f'] = IN_SQ_UCODE3,
121 ['A' ... 'F'] = IN_SQ_UCODE3,
122 },
123 [IN_SQ_UCODE1] = {
124 ['0' ... '9'] = IN_SQ_UCODE2,
125 ['a' ... 'f'] = IN_SQ_UCODE2,
126 ['A' ... 'F'] = IN_SQ_UCODE2,
127 },
128 [IN_SQ_UCODE0] = {
129 ['0' ... '9'] = IN_SQ_UCODE1,
130 ['a' ... 'f'] = IN_SQ_UCODE1,
131 ['A' ... 'F'] = IN_SQ_UCODE1,
132 },
133 [IN_SQ_STRING_ESCAPE] = {
134 ['b'] = IN_SQ_STRING,
135 ['f'] = IN_SQ_STRING,
136 ['n'] = IN_SQ_STRING,
137 ['r'] = IN_SQ_STRING,
138 ['t'] = IN_SQ_STRING,
139 ['/'] = IN_DQ_STRING,
140 ['\\'] = IN_DQ_STRING,
141 ['\''] = IN_SQ_STRING,
142 ['\"'] = IN_SQ_STRING,
143 ['u'] = IN_SQ_UCODE0,
144 },
145 [IN_SQ_STRING] = {
146 [1 ... 0xFF] = IN_SQ_STRING,
147 ['\\'] = IN_SQ_STRING_ESCAPE,
148 ['\''] = IN_DONE_STRING,
149 },
150
151 /* Zero */
152 [IN_ZERO] = {
153 TERMINAL(JSON_INTEGER),
154 ['0' ... '9'] = ERROR,
155 ['.'] = IN_MANTISSA,
156 },
157
158 /* Float */
159 [IN_DIGITS] = {
160 TERMINAL(JSON_FLOAT),
161 ['0' ... '9'] = IN_DIGITS,
162 },
163
164 [IN_DIGIT] = {
165 ['0' ... '9'] = IN_DIGITS,
166 },
167
168 [IN_EXP_E] = {
169 ['-'] = IN_DIGIT,
170 ['+'] = IN_DIGIT,
171 ['0' ... '9'] = IN_DIGITS,
172 },
173
174 [IN_MANTISSA_DIGITS] = {
175 TERMINAL(JSON_FLOAT),
176 ['0' ... '9'] = IN_MANTISSA_DIGITS,
177 ['e'] = IN_EXP_E,
178 ['E'] = IN_EXP_E,
179 },
180
181 [IN_MANTISSA] = {
182 ['0' ... '9'] = IN_MANTISSA_DIGITS,
183 },
184
185 /* Number */
186 [IN_NONZERO_NUMBER] = {
187 TERMINAL(JSON_INTEGER),
188 ['0' ... '9'] = IN_NONZERO_NUMBER,
189 ['e'] = IN_EXP_E,
190 ['E'] = IN_EXP_E,
191 ['.'] = IN_MANTISSA,
192 },
193
194 [IN_NEG_NONZERO_NUMBER] = {
195 ['0'] = IN_ZERO,
196 ['1' ... '9'] = IN_NONZERO_NUMBER,
197 },
198
199 /* keywords */
200 [IN_KEYWORD] = {
201 TERMINAL(JSON_KEYWORD),
202 ['a' ... 'z'] = IN_KEYWORD,
203 },
204
205 /* whitespace */
206 [IN_WHITESPACE] = {
207 TERMINAL(JSON_SKIP),
208 [' '] = IN_WHITESPACE,
209 ['\t'] = IN_WHITESPACE,
210 ['\r'] = IN_WHITESPACE,
211 ['\n'] = IN_WHITESPACE,
212 },
213
214 /* operator */
215 [IN_OPERATOR_DONE] = {
216 TERMINAL(JSON_OPERATOR),
217 },
218
219 /* escape */
220 [IN_ESCAPE_DONE] = {
221 TERMINAL(JSON_ESCAPE),
222 },
223
224 [IN_ESCAPE_LL] = {
225 ['d'] = IN_ESCAPE_DONE,
226 },
227
228 [IN_ESCAPE_L] = {
229 ['d'] = IN_ESCAPE_DONE,
230 ['l'] = IN_ESCAPE_LL,
231 },
232
233 [IN_ESCAPE_I64] = {
234 ['d'] = IN_ESCAPE_DONE,
235 },
236
237 [IN_ESCAPE_I6] = {
238 ['4'] = IN_ESCAPE_I64,
239 },
240
241 [IN_ESCAPE_I] = {
242 ['6'] = IN_ESCAPE_I6,
243 },
244
245 [IN_ESCAPE] = {
246 ['d'] = IN_ESCAPE_DONE,
247 ['i'] = IN_ESCAPE_DONE,
248 ['p'] = IN_ESCAPE_DONE,
249 ['s'] = IN_ESCAPE_DONE,
250 ['f'] = IN_ESCAPE_DONE,
251 ['l'] = IN_ESCAPE_L,
252 ['I'] = IN_ESCAPE_I,
253 },
254
255 /* top level rule */
256 [IN_START] = {
257 ['"'] = IN_DQ_STRING,
258 ['\''] = IN_SQ_STRING,
259 ['0'] = IN_ZERO,
260 ['1' ... '9'] = IN_NONZERO_NUMBER,
261 ['-'] = IN_NEG_NONZERO_NUMBER,
262 ['{'] = IN_OPERATOR_DONE,
263 ['}'] = IN_OPERATOR_DONE,
264 ['['] = IN_OPERATOR_DONE,
265 [']'] = IN_OPERATOR_DONE,
266 [','] = IN_OPERATOR_DONE,
267 [':'] = IN_OPERATOR_DONE,
268 ['a' ... 'z'] = IN_KEYWORD,
269 ['%'] = IN_ESCAPE,
270 [' '] = IN_WHITESPACE,
271 ['\t'] = IN_WHITESPACE,
272 ['\r'] = IN_WHITESPACE,
273 ['\n'] = IN_WHITESPACE,
274 },
275 };
276
277 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
278 {
279 lexer->emit = func;
280 lexer->state = IN_START;
281 lexer->token = qstring_new();
282 lexer->x = lexer->y = 0;
283 }
284
285 static int json_lexer_feed_char(JSONLexer *lexer, char ch)
286 {
287 char buf[2];
288
289 lexer->x++;
290 if (ch == '\n') {
291 lexer->x = 0;
292 lexer->y++;
293 }
294
295 lexer->state = json_lexer[lexer->state][(uint8_t)ch];
296
297 switch (lexer->state) {
298 case JSON_OPERATOR:
299 case JSON_ESCAPE:
300 case JSON_INTEGER:
301 case JSON_FLOAT:
302 case JSON_KEYWORD:
303 case JSON_STRING:
304 lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
305 case JSON_SKIP:
306 lexer->state = json_lexer[IN_START][(uint8_t)ch];
307 QDECREF(lexer->token);
308 lexer->token = qstring_new();
309 break;
310 case ERROR:
311 return -EINVAL;
312 default:
313 break;
314 }
315
316 buf[0] = ch;
317 buf[1] = 0;
318
319 qstring_append(lexer->token, buf);
320
321 return 0;
322 }
323
324 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
325 {
326 size_t i;
327
328 for (i = 0; i < size; i++) {
329 int err;
330
331 err = json_lexer_feed_char(lexer, buffer[i]);
332 if (err < 0) {
333 return err;
334 }
335 }
336
337 return 0;
338 }
339
340 int json_lexer_flush(JSONLexer *lexer)
341 {
342 return json_lexer_feed_char(lexer, 0);
343 }
344
345 void json_lexer_destroy(JSONLexer *lexer)
346 {
347 QDECREF(lexer->token);
348 }