]> git.proxmox.com Git - qemu.git/blob - json-lexer.c
tcg-sparc: Use TCG_TARGET_REG_BITS in conditional compilation.
[qemu.git] / json-lexer.c
1 /*
2 * JSON lexer
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include "qstring.h"
15 #include "qlist.h"
16 #include "qdict.h"
17 #include "qint.h"
18 #include "qemu-common.h"
19 #include "json-lexer.h"
20
21 /*
22 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25 * [{}\[\],:]
26 * [a-z]+
27 *
28 */
29
30 enum json_lexer_state {
31 ERROR = 0,
32 IN_DONE_STRING,
33 IN_DQ_UCODE3,
34 IN_DQ_UCODE2,
35 IN_DQ_UCODE1,
36 IN_DQ_UCODE0,
37 IN_DQ_STRING_ESCAPE,
38 IN_DQ_STRING,
39 IN_SQ_UCODE3,
40 IN_SQ_UCODE2,
41 IN_SQ_UCODE1,
42 IN_SQ_UCODE0,
43 IN_SQ_STRING_ESCAPE,
44 IN_SQ_STRING,
45 IN_ZERO,
46 IN_DIGITS,
47 IN_DIGIT,
48 IN_EXP_E,
49 IN_MANTISSA,
50 IN_MANTISSA_DIGITS,
51 IN_NONZERO_NUMBER,
52 IN_NEG_NONZERO_NUMBER,
53 IN_KEYWORD,
54 IN_ESCAPE,
55 IN_ESCAPE_L,
56 IN_ESCAPE_LL,
57 IN_ESCAPE_DONE,
58 IN_WHITESPACE,
59 IN_OPERATOR_DONE,
60 IN_START,
61 };
62
63 #define TERMINAL(state) [0 ... 0x7F] = (state)
64
65 static const uint8_t json_lexer[][256] = {
66 [IN_DONE_STRING] = {
67 TERMINAL(JSON_STRING),
68 },
69
70 /* double quote string */
71 [IN_DQ_UCODE3] = {
72 ['0' ... '9'] = IN_DQ_STRING,
73 ['a' ... 'f'] = IN_DQ_STRING,
74 ['A' ... 'F'] = IN_DQ_STRING,
75 },
76 [IN_DQ_UCODE2] = {
77 ['0' ... '9'] = IN_DQ_UCODE3,
78 ['a' ... 'f'] = IN_DQ_UCODE3,
79 ['A' ... 'F'] = IN_DQ_UCODE3,
80 },
81 [IN_DQ_UCODE1] = {
82 ['0' ... '9'] = IN_DQ_UCODE2,
83 ['a' ... 'f'] = IN_DQ_UCODE2,
84 ['A' ... 'F'] = IN_DQ_UCODE2,
85 },
86 [IN_DQ_UCODE0] = {
87 ['0' ... '9'] = IN_DQ_UCODE1,
88 ['a' ... 'f'] = IN_DQ_UCODE1,
89 ['A' ... 'F'] = IN_DQ_UCODE1,
90 },
91 [IN_DQ_STRING_ESCAPE] = {
92 ['b'] = IN_DQ_STRING,
93 ['f'] = IN_DQ_STRING,
94 ['n'] = IN_DQ_STRING,
95 ['r'] = IN_DQ_STRING,
96 ['t'] = IN_DQ_STRING,
97 ['\''] = IN_DQ_STRING,
98 ['\"'] = IN_DQ_STRING,
99 ['u'] = IN_DQ_UCODE0,
100 },
101 [IN_DQ_STRING] = {
102 [1 ... 0xFF] = IN_DQ_STRING,
103 ['\\'] = IN_DQ_STRING_ESCAPE,
104 ['"'] = IN_DONE_STRING,
105 },
106
107 /* single quote string */
108 [IN_SQ_UCODE3] = {
109 ['0' ... '9'] = IN_SQ_STRING,
110 ['a' ... 'f'] = IN_SQ_STRING,
111 ['A' ... 'F'] = IN_SQ_STRING,
112 },
113 [IN_SQ_UCODE2] = {
114 ['0' ... '9'] = IN_SQ_UCODE3,
115 ['a' ... 'f'] = IN_SQ_UCODE3,
116 ['A' ... 'F'] = IN_SQ_UCODE3,
117 },
118 [IN_SQ_UCODE1] = {
119 ['0' ... '9'] = IN_SQ_UCODE2,
120 ['a' ... 'f'] = IN_SQ_UCODE2,
121 ['A' ... 'F'] = IN_SQ_UCODE2,
122 },
123 [IN_SQ_UCODE0] = {
124 ['0' ... '9'] = IN_SQ_UCODE1,
125 ['a' ... 'f'] = IN_SQ_UCODE1,
126 ['A' ... 'F'] = IN_SQ_UCODE1,
127 },
128 [IN_SQ_STRING_ESCAPE] = {
129 ['b'] = IN_SQ_STRING,
130 ['f'] = IN_SQ_STRING,
131 ['n'] = IN_SQ_STRING,
132 ['r'] = IN_SQ_STRING,
133 ['t'] = IN_SQ_STRING,
134 ['\''] = IN_SQ_STRING,
135 ['\"'] = IN_SQ_STRING,
136 ['u'] = IN_SQ_UCODE0,
137 },
138 [IN_SQ_STRING] = {
139 [1 ... 0xFF] = IN_SQ_STRING,
140 ['\\'] = IN_SQ_STRING_ESCAPE,
141 ['\''] = IN_DONE_STRING,
142 },
143
144 /* Zero */
145 [IN_ZERO] = {
146 TERMINAL(JSON_INTEGER),
147 ['0' ... '9'] = ERROR,
148 ['.'] = IN_MANTISSA,
149 },
150
151 /* Float */
152 [IN_DIGITS] = {
153 TERMINAL(JSON_FLOAT),
154 ['0' ... '9'] = IN_DIGITS,
155 },
156
157 [IN_DIGIT] = {
158 ['0' ... '9'] = IN_DIGITS,
159 },
160
161 [IN_EXP_E] = {
162 ['-'] = IN_DIGIT,
163 ['+'] = IN_DIGIT,
164 ['0' ... '9'] = IN_DIGITS,
165 },
166
167 [IN_MANTISSA_DIGITS] = {
168 TERMINAL(JSON_FLOAT),
169 ['0' ... '9'] = IN_MANTISSA_DIGITS,
170 ['e'] = IN_EXP_E,
171 ['E'] = IN_EXP_E,
172 },
173
174 [IN_MANTISSA] = {
175 ['0' ... '9'] = IN_MANTISSA_DIGITS,
176 },
177
178 /* Number */
179 [IN_NONZERO_NUMBER] = {
180 TERMINAL(JSON_INTEGER),
181 ['0' ... '9'] = IN_NONZERO_NUMBER,
182 ['e'] = IN_EXP_E,
183 ['E'] = IN_EXP_E,
184 ['.'] = IN_MANTISSA,
185 },
186
187 [IN_NEG_NONZERO_NUMBER] = {
188 ['0'] = IN_ZERO,
189 ['1' ... '9'] = IN_NONZERO_NUMBER,
190 },
191
192 /* keywords */
193 [IN_KEYWORD] = {
194 TERMINAL(JSON_KEYWORD),
195 ['a' ... 'z'] = IN_KEYWORD,
196 },
197
198 /* whitespace */
199 [IN_WHITESPACE] = {
200 TERMINAL(JSON_SKIP),
201 [' '] = IN_WHITESPACE,
202 ['\t'] = IN_WHITESPACE,
203 ['\r'] = IN_WHITESPACE,
204 ['\n'] = IN_WHITESPACE,
205 },
206
207 /* operator */
208 [IN_OPERATOR_DONE] = {
209 TERMINAL(JSON_OPERATOR),
210 },
211
212 /* escape */
213 [IN_ESCAPE_DONE] = {
214 TERMINAL(JSON_ESCAPE),
215 },
216
217 [IN_ESCAPE_LL] = {
218 ['d'] = IN_ESCAPE_DONE,
219 },
220
221 [IN_ESCAPE_L] = {
222 ['d'] = IN_ESCAPE_DONE,
223 ['l'] = IN_ESCAPE_LL,
224 },
225
226 [IN_ESCAPE] = {
227 ['d'] = IN_ESCAPE_DONE,
228 ['i'] = IN_ESCAPE_DONE,
229 ['p'] = IN_ESCAPE_DONE,
230 ['s'] = IN_ESCAPE_DONE,
231 ['f'] = IN_ESCAPE_DONE,
232 ['l'] = IN_ESCAPE_L,
233 },
234
235 /* top level rule */
236 [IN_START] = {
237 ['"'] = IN_DQ_STRING,
238 ['\''] = IN_SQ_STRING,
239 ['0'] = IN_ZERO,
240 ['1' ... '9'] = IN_NONZERO_NUMBER,
241 ['-'] = IN_NEG_NONZERO_NUMBER,
242 ['{'] = IN_OPERATOR_DONE,
243 ['}'] = IN_OPERATOR_DONE,
244 ['['] = IN_OPERATOR_DONE,
245 [']'] = IN_OPERATOR_DONE,
246 [','] = IN_OPERATOR_DONE,
247 [':'] = IN_OPERATOR_DONE,
248 ['a' ... 'z'] = IN_KEYWORD,
249 ['%'] = IN_ESCAPE,
250 [' '] = IN_WHITESPACE,
251 ['\t'] = IN_WHITESPACE,
252 ['\r'] = IN_WHITESPACE,
253 ['\n'] = IN_WHITESPACE,
254 },
255 };
256
257 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
258 {
259 lexer->emit = func;
260 lexer->state = IN_START;
261 lexer->token = qstring_new();
262 }
263
264 static int json_lexer_feed_char(JSONLexer *lexer, char ch)
265 {
266 char buf[2];
267
268 lexer->x++;
269 if (ch == '\n') {
270 lexer->x = 0;
271 lexer->y++;
272 }
273
274 lexer->state = json_lexer[lexer->state][(uint8_t)ch];
275
276 switch (lexer->state) {
277 case JSON_OPERATOR:
278 case JSON_ESCAPE:
279 case JSON_INTEGER:
280 case JSON_FLOAT:
281 case JSON_KEYWORD:
282 case JSON_STRING:
283 lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
284 case JSON_SKIP:
285 lexer->state = json_lexer[IN_START][(uint8_t)ch];
286 QDECREF(lexer->token);
287 lexer->token = qstring_new();
288 break;
289 case ERROR:
290 return -EINVAL;
291 default:
292 break;
293 }
294
295 buf[0] = ch;
296 buf[1] = 0;
297
298 qstring_append(lexer->token, buf);
299
300 return 0;
301 }
302
303 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
304 {
305 size_t i;
306
307 for (i = 0; i < size; i++) {
308 int err;
309
310 err = json_lexer_feed_char(lexer, buffer[i]);
311 if (err < 0) {
312 return err;
313 }
314 }
315
316 return 0;
317 }
318
319 int json_lexer_flush(JSONLexer *lexer)
320 {
321 return json_lexer_feed_char(lexer, 0);
322 }
323
324 void json_lexer_destroy(JSONLexer *lexer)
325 {
326 QDECREF(lexer->token);
327 }