]> git.proxmox.com Git - qemu.git/blobdiff - json-lexer.c
multiboot: Support commas in module parameters
[qemu.git] / json-lexer.c
index 5ea64a75a7a216b96dddf54373c25687911cf6b1..c21338f66db75814e504ca86bf29c7b7487c3903 100644 (file)
@@ -18,6 +18,8 @@
 #include "qemu-common.h"
 #include "json-lexer.h"
 
+#define MAX_TOKEN_SIZE (64ULL << 20)
+
 /*
  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
@@ -28,8 +30,7 @@
  */
 
 enum json_lexer_state {
-    ERROR = 0,
-    IN_DONE_STRING,
+    IN_ERROR = 0,
     IN_DQ_UCODE3,
     IN_DQ_UCODE2,
     IN_DQ_UCODE1,
@@ -57,9 +58,7 @@ enum json_lexer_state {
     IN_ESCAPE_I,
     IN_ESCAPE_I6,
     IN_ESCAPE_I64,
-    IN_ESCAPE_DONE,
     IN_WHITESPACE,
-    IN_OPERATOR_DONE,
     IN_START,
 };
 
@@ -72,10 +71,6 @@ enum json_lexer_state {
             (json_lexer[(old_state)][0] == (terminal))
 
 static const uint8_t json_lexer[][256] =  {
-    [IN_DONE_STRING] = {
-        TERMINAL(JSON_STRING),
-    },
-
     /* double quote string */
     [IN_DQ_UCODE3] = {
         ['0' ... '9'] = IN_DQ_STRING,
@@ -110,9 +105,10 @@ static const uint8_t json_lexer[][256] =  {
         ['u'] = IN_DQ_UCODE0,
     },
     [IN_DQ_STRING] = {
-        [1 ... 0xFF] = IN_DQ_STRING,
+        [1 ... 0xBF] = IN_DQ_STRING,
+        [0xC2 ... 0xF4] = IN_DQ_STRING,
         ['\\'] = IN_DQ_STRING_ESCAPE,
-        ['"'] = IN_DONE_STRING,
+        ['"'] = JSON_STRING,
     },
 
     /* single quote string */
@@ -149,15 +145,16 @@ static const uint8_t json_lexer[][256] =  {
         ['u'] = IN_SQ_UCODE0,
     },
     [IN_SQ_STRING] = {
-        [1 ... 0xFF] = IN_SQ_STRING,
+        [1 ... 0xBF] = IN_SQ_STRING,
+        [0xC2 ... 0xF4] = IN_SQ_STRING,
         ['\\'] = IN_SQ_STRING_ESCAPE,
-        ['\''] = IN_DONE_STRING,
+        ['\''] = JSON_STRING,
     },
 
     /* Zero */
     [IN_ZERO] = {
         TERMINAL(JSON_INTEGER),
-        ['0' ... '9'] = ERROR,
+        ['0' ... '9'] = IN_ERROR,
         ['.'] = IN_MANTISSA,
     },
 
@@ -217,27 +214,18 @@ static const uint8_t json_lexer[][256] =  {
         ['\n'] = IN_WHITESPACE,
     },        
 
-    /* operator */
-    [IN_OPERATOR_DONE] = {
-        TERMINAL(JSON_OPERATOR),
-    },
-
     /* escape */
-    [IN_ESCAPE_DONE] = {
-        TERMINAL(JSON_ESCAPE),
-    },
-
     [IN_ESCAPE_LL] = {
-        ['d'] = IN_ESCAPE_DONE,
+        ['d'] = JSON_ESCAPE,
     },
 
     [IN_ESCAPE_L] = {
-        ['d'] = IN_ESCAPE_DONE,
+        ['d'] = JSON_ESCAPE,
         ['l'] = IN_ESCAPE_LL,
     },
 
     [IN_ESCAPE_I64] = {
-        ['d'] = IN_ESCAPE_DONE,
+        ['d'] = JSON_ESCAPE,
     },
 
     [IN_ESCAPE_I6] = {
@@ -249,11 +237,11 @@ static const uint8_t json_lexer[][256] =  {
     },
 
     [IN_ESCAPE] = {
-        ['d'] = IN_ESCAPE_DONE,
-        ['i'] = IN_ESCAPE_DONE,
-        ['p'] = IN_ESCAPE_DONE,
-        ['s'] = IN_ESCAPE_DONE,
-        ['f'] = IN_ESCAPE_DONE,
+        ['d'] = JSON_ESCAPE,
+        ['i'] = JSON_ESCAPE,
+        ['p'] = JSON_ESCAPE,
+        ['s'] = JSON_ESCAPE,
+        ['f'] = JSON_ESCAPE,
         ['l'] = IN_ESCAPE_L,
         ['I'] = IN_ESCAPE_I,
     },
@@ -265,12 +253,12 @@ static const uint8_t json_lexer[][256] =  {
         ['0'] = IN_ZERO,
         ['1' ... '9'] = IN_NONZERO_NUMBER,
         ['-'] = IN_NEG_NONZERO_NUMBER,
-        ['{'] = IN_OPERATOR_DONE,
-        ['}'] = IN_OPERATOR_DONE,
-        ['['] = IN_OPERATOR_DONE,
-        [']'] = IN_OPERATOR_DONE,
-        [','] = IN_OPERATOR_DONE,
-        [':'] = IN_OPERATOR_DONE,
+        ['{'] = JSON_OPERATOR,
+        ['}'] = JSON_OPERATOR,
+        ['['] = JSON_OPERATOR,
+        [']'] = JSON_OPERATOR,
+        [','] = JSON_OPERATOR,
+        [':'] = JSON_OPERATOR,
         ['a' ... 'z'] = IN_KEYWORD,
         ['%'] = IN_ESCAPE,
         [' '] = IN_WHITESPACE,
@@ -288,7 +276,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
     lexer->x = lexer->y = 0;
 }
 
-static int json_lexer_feed_char(JSONLexer *lexer, char ch)
+static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
 {
     int char_consumed, new_state;
 
@@ -318,13 +306,42 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch)
             lexer->token = qstring_new();
             new_state = IN_START;
             break;
-        case ERROR:
-            return -EINVAL;
+        case IN_ERROR:
+            /* XXX: To avoid having previous bad input leaving the parser in an
+             * unresponsive state where we consume unpredictable amounts of
+             * subsequent "good" input, percolate this error state up to the
+             * tokenizer/parser by forcing a NULL object to be emitted, then
+             * reset state.
+             *
+             * Also note that this handling is required for reliable channel
+             * negotiation between QMP and the guest agent, since chr(0xFF)
+             * is placed at the beginning of certain events to ensure proper
+             * delivery when the channel is in an unknown state. chr(0xFF) is
+             * never a valid ASCII/UTF-8 sequence, so this should reliably
+             * induce an error/flush state.
+             */
+            lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
+            QDECREF(lexer->token);
+            lexer->token = qstring_new();
+            new_state = IN_START;
+            lexer->state = new_state;
+            return 0;
         default:
             break;
         }
         lexer->state = new_state;
-    } while (!char_consumed);
+    } while (!char_consumed && !flush);
+
+    /* Do not let a single token grow to an arbitrarily large size,
+     * this is a security consideration.
+     */
+    if (lexer->token->length > MAX_TOKEN_SIZE) {
+        lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
+        QDECREF(lexer->token);
+        lexer->token = qstring_new();
+        lexer->state = IN_START;
+    }
+
     return 0;
 }
 
@@ -335,7 +352,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
     for (i = 0; i < size; i++) {
         int err;
 
-        err = json_lexer_feed_char(lexer, buffer[i]);
+        err = json_lexer_feed_char(lexer, buffer[i], false);
         if (err < 0) {
             return err;
         }
@@ -346,7 +363,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
 
 int json_lexer_flush(JSONLexer *lexer)
 {
-    return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0);
+    return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
 }
 
 void json_lexer_destroy(JSONLexer *lexer)