]>
git.proxmox.com Git - qemu.git/blob - json-lexer.c
fe5a060d4dd1c5cad3fa30ec4cc349f7af5c2a46
4 * Copyright IBM, Corp. 2009
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "json-lexer.h"
21 #define MAX_TOKEN_SIZE (64ULL << 20)
24 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
25 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
26 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
32 enum json_lexer_state
{
53 IN_NEG_NONZERO_NUMBER
,
65 #define TERMINAL(state) [0 ... 0x7F] = (state)
67 /* Return whether TERMINAL is a terminal state and the transition to it
68 from OLD_STATE required lookahead. This happens whenever the table
69 below uses the TERMINAL macro. */
70 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
71 (json_lexer[(old_state)][0] == (terminal))
73 static const uint8_t json_lexer
[][256] = {
74 /* double quote string */
76 ['0' ... '9'] = IN_DQ_STRING
,
77 ['a' ... 'f'] = IN_DQ_STRING
,
78 ['A' ... 'F'] = IN_DQ_STRING
,
81 ['0' ... '9'] = IN_DQ_UCODE3
,
82 ['a' ... 'f'] = IN_DQ_UCODE3
,
83 ['A' ... 'F'] = IN_DQ_UCODE3
,
86 ['0' ... '9'] = IN_DQ_UCODE2
,
87 ['a' ... 'f'] = IN_DQ_UCODE2
,
88 ['A' ... 'F'] = IN_DQ_UCODE2
,
91 ['0' ... '9'] = IN_DQ_UCODE1
,
92 ['a' ... 'f'] = IN_DQ_UCODE1
,
93 ['A' ... 'F'] = IN_DQ_UCODE1
,
95 [IN_DQ_STRING_ESCAPE
] = {
100 ['t'] = IN_DQ_STRING
,
101 ['/'] = IN_DQ_STRING
,
102 ['\\'] = IN_DQ_STRING
,
103 ['\''] = IN_DQ_STRING
,
104 ['\"'] = IN_DQ_STRING
,
105 ['u'] = IN_DQ_UCODE0
,
108 [1 ... 0xFF] = IN_DQ_STRING
,
109 ['\\'] = IN_DQ_STRING_ESCAPE
,
113 /* single quote string */
115 ['0' ... '9'] = IN_SQ_STRING
,
116 ['a' ... 'f'] = IN_SQ_STRING
,
117 ['A' ... 'F'] = IN_SQ_STRING
,
120 ['0' ... '9'] = IN_SQ_UCODE3
,
121 ['a' ... 'f'] = IN_SQ_UCODE3
,
122 ['A' ... 'F'] = IN_SQ_UCODE3
,
125 ['0' ... '9'] = IN_SQ_UCODE2
,
126 ['a' ... 'f'] = IN_SQ_UCODE2
,
127 ['A' ... 'F'] = IN_SQ_UCODE2
,
130 ['0' ... '9'] = IN_SQ_UCODE1
,
131 ['a' ... 'f'] = IN_SQ_UCODE1
,
132 ['A' ... 'F'] = IN_SQ_UCODE1
,
134 [IN_SQ_STRING_ESCAPE
] = {
135 ['b'] = IN_SQ_STRING
,
136 ['f'] = IN_SQ_STRING
,
137 ['n'] = IN_SQ_STRING
,
138 ['r'] = IN_SQ_STRING
,
139 ['t'] = IN_SQ_STRING
,
140 ['/'] = IN_DQ_STRING
,
141 ['\\'] = IN_DQ_STRING
,
142 ['\''] = IN_SQ_STRING
,
143 ['\"'] = IN_SQ_STRING
,
144 ['u'] = IN_SQ_UCODE0
,
147 [1 ... 0xFF] = IN_SQ_STRING
,
148 ['\\'] = IN_SQ_STRING_ESCAPE
,
149 ['\''] = JSON_STRING
,
154 TERMINAL(JSON_INTEGER
),
155 ['0' ... '9'] = IN_ERROR
,
161 TERMINAL(JSON_FLOAT
),
162 ['0' ... '9'] = IN_DIGITS
,
166 ['0' ... '9'] = IN_DIGITS
,
172 ['0' ... '9'] = IN_DIGITS
,
175 [IN_MANTISSA_DIGITS
] = {
176 TERMINAL(JSON_FLOAT
),
177 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
183 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
187 [IN_NONZERO_NUMBER
] = {
188 TERMINAL(JSON_INTEGER
),
189 ['0' ... '9'] = IN_NONZERO_NUMBER
,
195 [IN_NEG_NONZERO_NUMBER
] = {
197 ['1' ... '9'] = IN_NONZERO_NUMBER
,
202 TERMINAL(JSON_KEYWORD
),
203 ['a' ... 'z'] = IN_KEYWORD
,
209 [' '] = IN_WHITESPACE
,
210 ['\t'] = IN_WHITESPACE
,
211 ['\r'] = IN_WHITESPACE
,
212 ['\n'] = IN_WHITESPACE
,
222 ['l'] = IN_ESCAPE_LL
,
230 ['4'] = IN_ESCAPE_I64
,
234 ['6'] = IN_ESCAPE_I6
,
249 ['"'] = IN_DQ_STRING
,
250 ['\''] = IN_SQ_STRING
,
252 ['1' ... '9'] = IN_NONZERO_NUMBER
,
253 ['-'] = IN_NEG_NONZERO_NUMBER
,
254 ['{'] = JSON_OPERATOR
,
255 ['}'] = JSON_OPERATOR
,
256 ['['] = JSON_OPERATOR
,
257 [']'] = JSON_OPERATOR
,
258 [','] = JSON_OPERATOR
,
259 [':'] = JSON_OPERATOR
,
260 ['a' ... 'z'] = IN_KEYWORD
,
262 [' '] = IN_WHITESPACE
,
263 ['\t'] = IN_WHITESPACE
,
264 ['\r'] = IN_WHITESPACE
,
265 ['\n'] = IN_WHITESPACE
,
269 void json_lexer_init(JSONLexer
*lexer
, JSONLexerEmitter func
)
272 lexer
->state
= IN_START
;
273 lexer
->token
= qstring_new();
274 lexer
->x
= lexer
->y
= 0;
277 static int json_lexer_feed_char(JSONLexer
*lexer
, char ch
)
279 int char_consumed
, new_state
;
288 new_state
= json_lexer
[lexer
->state
][(uint8_t)ch
];
289 char_consumed
= !TERMINAL_NEEDED_LOOKAHEAD(lexer
->state
, new_state
);
291 qstring_append_chr(lexer
->token
, ch
);
301 lexer
->emit(lexer
, lexer
->token
, new_state
, lexer
->x
, lexer
->y
);
303 QDECREF(lexer
->token
);
304 lexer
->token
= qstring_new();
305 new_state
= IN_START
;
312 lexer
->state
= new_state
;
313 } while (!char_consumed
);
315 /* Do not let a single token grow to an arbitrarily large size,
316 * this is a security consideration.
318 if (lexer
->token
->length
> MAX_TOKEN_SIZE
) {
319 lexer
->emit(lexer
, lexer
->token
, lexer
->state
, lexer
->x
, lexer
->y
);
320 QDECREF(lexer
->token
);
321 lexer
->token
= qstring_new();
322 lexer
->state
= IN_START
;
328 int json_lexer_feed(JSONLexer
*lexer
, const char *buffer
, size_t size
)
332 for (i
= 0; i
< size
; i
++) {
335 err
= json_lexer_feed_char(lexer
, buffer
[i
]);
344 int json_lexer_flush(JSONLexer
*lexer
)
346 return lexer
->state
== IN_START
? 0 : json_lexer_feed_char(lexer
, 0);
349 void json_lexer_destroy(JSONLexer
*lexer
)
351 QDECREF(lexer
->token
);