X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=json-lexer.c;h=c21338f66db75814e504ca86bf29c7b7487c3903;hb=6e1db57b2ac9025c2443c665a0d9e78748637b26;hp=5ea64a75a7a216b96dddf54373c25687911cf6b1;hpb=f7c052747e4b139df16e1d5b7851f4729acc2bb7;p=qemu.git diff --git a/json-lexer.c b/json-lexer.c index 5ea64a75a..c21338f66 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -18,6 +18,8 @@ #include "qemu-common.h" #include "json-lexer.h" +#define MAX_TOKEN_SIZE (64ULL << 20) + /* * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\" * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*' @@ -28,8 +30,7 @@ */ enum json_lexer_state { - ERROR = 0, - IN_DONE_STRING, + IN_ERROR = 0, IN_DQ_UCODE3, IN_DQ_UCODE2, IN_DQ_UCODE1, @@ -57,9 +58,7 @@ enum json_lexer_state { IN_ESCAPE_I, IN_ESCAPE_I6, IN_ESCAPE_I64, - IN_ESCAPE_DONE, IN_WHITESPACE, - IN_OPERATOR_DONE, IN_START, }; @@ -72,10 +71,6 @@ enum json_lexer_state { (json_lexer[(old_state)][0] == (terminal)) static const uint8_t json_lexer[][256] = { - [IN_DONE_STRING] = { - TERMINAL(JSON_STRING), - }, - /* double quote string */ [IN_DQ_UCODE3] = { ['0' ... '9'] = IN_DQ_STRING, @@ -110,9 +105,10 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_DQ_UCODE0, }, [IN_DQ_STRING] = { - [1 ... 0xFF] = IN_DQ_STRING, + [1 ... 0xBF] = IN_DQ_STRING, + [0xC2 ... 0xF4] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, - ['"'] = IN_DONE_STRING, + ['"'] = JSON_STRING, }, /* single quote string */ @@ -149,15 +145,16 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_SQ_UCODE0, }, [IN_SQ_STRING] = { - [1 ... 0xFF] = IN_SQ_STRING, + [1 ... 0xBF] = IN_SQ_STRING, + [0xC2 ... 0xF4] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, - ['\''] = IN_DONE_STRING, + ['\''] = JSON_STRING, }, /* Zero */ [IN_ZERO] = { TERMINAL(JSON_INTEGER), - ['0' ... '9'] = ERROR, + ['0' ... '9'] = IN_ERROR, ['.'] = IN_MANTISSA, }, @@ -217,27 +214,18 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* operator */ - [IN_OPERATOR_DONE] = { - TERMINAL(JSON_OPERATOR), - }, - /* escape */ - [IN_ESCAPE_DONE] = { - TERMINAL(JSON_ESCAPE), - }, - [IN_ESCAPE_LL] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, }, [IN_ESCAPE_L] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, ['l'] = IN_ESCAPE_LL, }, [IN_ESCAPE_I64] = { - ['d'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, }, [IN_ESCAPE_I6] = { @@ -249,11 +237,11 @@ static const uint8_t json_lexer[][256] = { }, [IN_ESCAPE] = { - ['d'] = IN_ESCAPE_DONE, - ['i'] = IN_ESCAPE_DONE, - ['p'] = IN_ESCAPE_DONE, - ['s'] = IN_ESCAPE_DONE, - ['f'] = IN_ESCAPE_DONE, + ['d'] = JSON_ESCAPE, + ['i'] = JSON_ESCAPE, + ['p'] = JSON_ESCAPE, + ['s'] = JSON_ESCAPE, + ['f'] = JSON_ESCAPE, ['l'] = IN_ESCAPE_L, ['I'] = IN_ESCAPE_I, }, @@ -265,12 +253,12 @@ static const uint8_t json_lexer[][256] = { ['0'] = IN_ZERO, ['1' ... '9'] = IN_NONZERO_NUMBER, ['-'] = IN_NEG_NONZERO_NUMBER, - ['{'] = IN_OPERATOR_DONE, - ['}'] = IN_OPERATOR_DONE, - ['['] = IN_OPERATOR_DONE, - [']'] = IN_OPERATOR_DONE, - [','] = IN_OPERATOR_DONE, - [':'] = IN_OPERATOR_DONE, + ['{'] = JSON_OPERATOR, + ['}'] = JSON_OPERATOR, + ['['] = JSON_OPERATOR, + [']'] = JSON_OPERATOR, + [','] = JSON_OPERATOR, + [':'] = JSON_OPERATOR, ['a' ... 'z'] = IN_KEYWORD, ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, @@ -288,7 +276,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) lexer->x = lexer->y = 0; } -static int json_lexer_feed_char(JSONLexer *lexer, char ch) +static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) { int char_consumed, new_state; @@ -318,13 +306,42 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch) lexer->token = qstring_new(); new_state = IN_START; break; - case ERROR: - return -EINVAL; + case IN_ERROR: + /* XXX: To avoid having previous bad input leaving the parser in an + * unresponsive state where we consume unpredictable amounts of + * subsequent "good" input, percolate this error state up to the + * tokenizer/parser by forcing a NULL object to be emitted, then + * reset state. + * + * Also note that this handling is required for reliable channel + * negotiation between QMP and the guest agent, since chr(0xFF) + * is placed at the beginning of certain events to ensure proper + * delivery when the channel is in an unknown state. chr(0xFF) is + * never a valid ASCII/UTF-8 sequence, so this should reliably + * induce an error/flush state. + */ + lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); + QDECREF(lexer->token); + lexer->token = qstring_new(); + new_state = IN_START; + lexer->state = new_state; + return 0; default: break; } lexer->state = new_state; - } while (!char_consumed); + } while (!char_consumed && !flush); + + /* Do not let a single token grow to an arbitrarily large size, + * this is a security consideration. + */ + if (lexer->token->length > MAX_TOKEN_SIZE) { + lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); + QDECREF(lexer->token); + lexer->token = qstring_new(); + lexer->state = IN_START; + } + return 0; } @@ -335,7 +352,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) for (i = 0; i < size; i++) { int err; - err = json_lexer_feed_char(lexer, buffer[i]); + err = json_lexer_feed_char(lexer, buffer[i], false); if (err < 0) { return err; } @@ -346,7 +363,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) int json_lexer_flush(JSONLexer *lexer) { - return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0); + return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); } void json_lexer_destroy(JSONLexer *lexer)