2 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
4 * Jansson is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
7 (C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
9 SPDX-License-Identifier: BSD-2-Clause-Patent AND MIT
16 #include "jansson_private.h"
29 #include "strbuffer.h"
32 #define STREAM_STATE_OK 0
33 #define STREAM_STATE_EOF -1
34 #define STREAM_STATE_ERROR -2
36 #define TOKEN_INVALID -1
38 #define TOKEN_STRING 256
39 #define TOKEN_INTEGER 257
40 #define TOKEN_REAL 258
41 #define TOKEN_TRUE 259
42 #define TOKEN_FALSE 260
43 #define TOKEN_NULL 261
45 /* Locale independent versions of isxxx() functions */
46 #define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
47 #define l_islower(c) ('a' <= (c) && (c) <= 'z')
48 #define l_isalpha(c) (l_isupper(c) || l_islower(c))
49 #define l_isdigit(c) ('0' <= (c) && (c) <= '9')
50 #define l_isxdigit(c) \
51 (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
53 /* Read one byte from stream, convert to unsigned char, then int, and
54 return. return EOF on end of file. This corresponds to the
55 behaviour of fgetc(). */
56 typedef int (*get_func
)(void *data
);
65 int column
, last_column
;
71 strbuffer_t saved_text
;
85 #define stream_to_lex(stream) container_of(stream, lex_t, stream)
87 /*** error reporting ***/
89 static void error_set(json_error_t
*error
, const lex_t
*lex
, enum json_error_code code
,
90 const char *msg
, ...) {
92 char msg_text
[JSON_ERROR_TEXT_LENGTH
];
93 char msg_with_context
[JSON_ERROR_TEXT_LENGTH
];
95 int line
= -1, col
= -1;
97 const char *result
= msg_text
;
103 vsnprintf(msg_text
, JSON_ERROR_TEXT_LENGTH
, msg
, ap
);
104 msg_text
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
108 const char *saved_text
= strbuffer_value(&lex
->saved_text
);
110 line
= lex
->stream
.line
;
111 col
= lex
->stream
.column
;
112 pos
= lex
->stream
.position
;
114 if (saved_text
&& saved_text
[0]) {
115 if (lex
->saved_text
.length
<= 20) {
116 snprintf(msg_with_context
, JSON_ERROR_TEXT_LENGTH
, "%s near '%s'",
117 msg_text
, saved_text
);
118 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
119 result
= msg_with_context
;
122 if (code
== json_error_invalid_syntax
) {
123 /* More specific error code for premature end of file. */
124 code
= json_error_premature_end_of_input
;
126 if (lex
->stream
.state
== STREAM_STATE_ERROR
) {
127 /* No context for UTF-8 decoding errors */
130 snprintf(msg_with_context
, JSON_ERROR_TEXT_LENGTH
, "%s near end of file",
132 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
133 result
= msg_with_context
;
138 jsonp_error_set(error
, line
, col
, pos
, code
, "%s", result
);
141 /*** lexical analyzer ***/
143 static void stream_init(stream_t
*stream
, get_func get
, void *data
) {
146 stream
->buffer
[0] = '\0';
147 stream
->buffer_pos
= 0;
149 stream
->state
= STREAM_STATE_OK
;
152 stream
->position
= 0;
155 static int stream_get(stream_t
*stream
, json_error_t
*error
) {
158 if (stream
->state
!= STREAM_STATE_OK
)
159 return stream
->state
;
161 if (!stream
->buffer
[stream
->buffer_pos
]) {
162 c
= stream
->get(stream
->data
);
164 stream
->state
= STREAM_STATE_EOF
;
165 return STREAM_STATE_EOF
;
168 stream
->buffer
[0] = c
;
169 stream
->buffer_pos
= 0;
171 if (0x80 <= c
&& c
<= 0xFF) {
172 /* multi-byte UTF-8 sequence */
175 count
= utf8_check_first(c
);
181 for (i
= 1; i
< count
; i
++)
182 stream
->buffer
[i
] = stream
->get(stream
->data
);
184 if (!utf8_check_full(stream
->buffer
, count
, NULL
))
187 stream
->buffer
[count
] = '\0';
189 stream
->buffer
[1] = '\0';
192 c
= stream
->buffer
[stream
->buffer_pos
++];
197 stream
->last_column
= stream
->column
;
199 } else if (utf8_check_first(c
)) {
200 /* track the Unicode character column, so increment only if
201 this is the first character of a UTF-8 sequence */
208 stream
->state
= STREAM_STATE_ERROR
;
209 error_set(error
, stream_to_lex(stream
), json_error_invalid_utf8
,
210 "unable to decode byte 0x%x", c
);
211 return STREAM_STATE_ERROR
;
214 static void stream_unget(stream_t
*stream
, int c
) {
215 if (c
== STREAM_STATE_EOF
|| c
== STREAM_STATE_ERROR
)
221 stream
->column
= stream
->last_column
;
222 } else if (utf8_check_first(c
))
225 assert(stream
->buffer_pos
> 0);
226 stream
->buffer_pos
--;
227 assert(stream
->buffer
[stream
->buffer_pos
] == c
);
230 static int lex_get(lex_t
*lex
, json_error_t
*error
) {
231 return stream_get(&lex
->stream
, error
);
234 static void lex_save(lex_t
*lex
, int c
) { strbuffer_append_byte(&lex
->saved_text
, c
); }
236 static int lex_get_save(lex_t
*lex
, json_error_t
*error
) {
237 int c
= stream_get(&lex
->stream
, error
);
238 if (c
!= STREAM_STATE_EOF
&& c
!= STREAM_STATE_ERROR
)
243 static void lex_unget(lex_t
*lex
, int c
) { stream_unget(&lex
->stream
, c
); }
245 static void lex_unget_unsave(lex_t
*lex
, int c
) {
246 if (c
!= STREAM_STATE_EOF
&& c
!= STREAM_STATE_ERROR
) {
247 /* Since we treat warnings as errors, when assertions are turned
248 * off the "d" variable would be set but never used. Which is
249 * treated as an error by GCC.
254 stream_unget(&lex
->stream
, c
);
258 strbuffer_pop(&lex
->saved_text
);
263 static void lex_save_cached(lex_t
*lex
) {
264 while (lex
->stream
.buffer
[lex
->stream
.buffer_pos
] != '\0') {
265 lex_save(lex
, lex
->stream
.buffer
[lex
->stream
.buffer_pos
]);
266 lex
->stream
.buffer_pos
++;
267 lex
->stream
.position
++;
271 static void lex_free_string(lex_t
*lex
) {
272 jsonp_free(lex
->value
.string
.val
);
273 lex
->value
.string
.val
= NULL
;
274 lex
->value
.string
.len
= 0;
277 /* assumes that str points to 'u' plus at least 4 valid hex digits */
278 static int32_t decode_unicode_escape(const char *str
) {
282 assert(str
[0] == 'u');
284 for (i
= 1; i
<= 4; i
++) {
289 else if (l_islower(c
))
290 value
+= c
- 'a' + 10;
291 else if (l_isupper(c
))
292 value
+= c
- 'A' + 10;
300 static void lex_scan_string(lex_t
*lex
, json_error_t
*error
) {
306 lex
->value
.string
.val
= NULL
;
307 lex
->token
= TOKEN_INVALID
;
309 c
= lex_get_save(lex
, error
);
312 if (c
== STREAM_STATE_ERROR
)
315 else if (c
== STREAM_STATE_EOF
) {
316 error_set(error
, lex
, json_error_premature_end_of_input
,
317 "premature end of input");
321 else if (0 <= c
&& c
<= 0x1F) {
322 /* control character */
323 lex_unget_unsave(lex
, c
);
325 error_set(error
, lex
, json_error_invalid_syntax
, "unexpected newline");
327 error_set(error
, lex
, json_error_invalid_syntax
, "control character 0x%x",
332 else if (c
== '\\') {
333 c
= lex_get_save(lex
, error
);
335 c
= lex_get_save(lex
, error
);
336 for (i
= 0; i
< 4; i
++) {
337 if (!l_isxdigit(c
)) {
338 error_set(error
, lex
, json_error_invalid_syntax
,
342 c
= lex_get_save(lex
, error
);
344 } else if (c
== '"' || c
== '\\' || c
== '/' || c
== 'b' || c
== 'f' ||
345 c
== 'n' || c
== 'r' || c
== 't')
346 c
= lex_get_save(lex
, error
);
348 error_set(error
, lex
, json_error_invalid_syntax
, "invalid escape");
352 c
= lex_get_save(lex
, error
);
355 /* the actual value is at most of the same length as the source
357 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
358 - a single \uXXXX escape (length 6) is converted to at most 3 bytes
359 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
360 are converted to 4 bytes
362 t
= jsonp_malloc(lex
->saved_text
.length
+ 1);
364 /* this is not very nice, since TOKEN_INVALID is returned */
367 lex
->value
.string
.val
= t
;
369 /* + 1 to skip the " */
370 p
= strbuffer_value(&lex
->saved_text
) + 1;
379 value
= decode_unicode_escape(p
);
381 error_set(error
, lex
, json_error_invalid_syntax
,
382 "invalid Unicode escape '%.6s'", p
- 1);
387 if (0xD800 <= value
&& value
<= 0xDBFF) {
389 if (*p
== '\\' && *(p
+ 1) == 'u') {
390 int32_t value2
= decode_unicode_escape(++p
);
392 error_set(error
, lex
, json_error_invalid_syntax
,
393 "invalid Unicode escape '%.6s'", p
- 1);
398 if (0xDC00 <= value2
&& value2
<= 0xDFFF) {
399 /* valid second surrogate */
401 ((value
- 0xD800) << 10) + (value2
- 0xDC00) + 0x10000;
403 /* invalid second surrogate */
404 error_set(error
, lex
, json_error_invalid_syntax
,
405 "invalid Unicode '\\u%04X\\u%04X'", value
, value2
);
409 /* no second surrogate */
410 error_set(error
, lex
, json_error_invalid_syntax
,
411 "invalid Unicode '\\u%04X'", value
);
414 } else if (0xDC00 <= value
&& value
<= 0xDFFF) {
415 error_set(error
, lex
, json_error_invalid_syntax
,
416 "invalid Unicode '\\u%04X'", value
);
420 if (utf8_encode(value
, t
, &length
))
455 lex
->value
.string
.len
= t
- lex
->value
.string
.val
;
456 lex
->token
= TOKEN_STRING
;
460 lex_free_string(lex
);
463 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
464 #if JSON_INTEGER_IS_LONG_LONG
465 #ifdef _MSC_VER /* Microsoft Visual Studio */
466 #define json_strtoint _strtoi64
468 #define json_strtoint strtoll
471 #define json_strtoint strtol
475 static int lex_scan_number(lex_t
*lex
, int c
, json_error_t
*error
) {
476 const char *saved_text
;
480 lex
->token
= TOKEN_INVALID
;
483 c
= lex_get_save(lex
, error
);
486 c
= lex_get_save(lex
, error
);
488 lex_unget_unsave(lex
, c
);
491 } else if (l_isdigit(c
)) {
493 c
= lex_get_save(lex
, error
);
494 while (l_isdigit(c
));
496 lex_unget_unsave(lex
, c
);
500 if (!(lex
->flags
& JSON_DECODE_INT_AS_REAL
) && c
!= '.' && c
!= 'E' && c
!= 'e') {
503 lex_unget_unsave(lex
, c
);
505 saved_text
= strbuffer_value(&lex
->saved_text
);
508 intval
= json_strtoint(saved_text
, &end
, 10);
509 if (errno
== ERANGE
) {
511 error_set(error
, lex
, json_error_numeric_overflow
,
512 "too big negative integer");
514 error_set(error
, lex
, json_error_numeric_overflow
, "too big integer");
518 assert(end
== saved_text
+ lex
->saved_text
.length
);
520 lex
->token
= TOKEN_INTEGER
;
521 lex
->value
.integer
= intval
;
526 c
= lex_get(lex
, error
);
534 c
= lex_get_save(lex
, error
);
535 while (l_isdigit(c
));
538 if (c
== 'E' || c
== 'e') {
539 c
= lex_get_save(lex
, error
);
540 if (c
== '+' || c
== '-')
541 c
= lex_get_save(lex
, error
);
544 lex_unget_unsave(lex
, c
);
549 c
= lex_get_save(lex
, error
);
550 while (l_isdigit(c
));
553 lex_unget_unsave(lex
, c
);
555 if (jsonp_strtod(&lex
->saved_text
, &doubleval
)) {
556 error_set(error
, lex
, json_error_numeric_overflow
, "real number overflow");
560 lex
->token
= TOKEN_REAL
;
561 lex
->value
.real
= doubleval
;
568 static int lex_scan(lex_t
*lex
, json_error_t
*error
) {
571 strbuffer_clear(&lex
->saved_text
);
573 if (lex
->token
== TOKEN_STRING
)
574 lex_free_string(lex
);
577 c
= lex_get(lex
, error
);
578 while (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r');
580 if (c
== STREAM_STATE_EOF
) {
581 lex
->token
= TOKEN_EOF
;
585 if (c
== STREAM_STATE_ERROR
) {
586 lex
->token
= TOKEN_INVALID
;
592 if (c
== '{' || c
== '}' || c
== '[' || c
== ']' || c
== ':' || c
== ',')
596 lex_scan_string(lex
, error
);
598 else if (l_isdigit(c
) || c
== '-') {
599 if (lex_scan_number(lex
, c
, error
))
603 else if (l_isalpha(c
)) {
604 /* eat up the whole identifier for clearer error messages */
605 const char *saved_text
;
608 c
= lex_get_save(lex
, error
);
609 while (l_isalpha(c
));
610 lex_unget_unsave(lex
, c
);
612 saved_text
= strbuffer_value(&lex
->saved_text
);
614 if (strcmp(saved_text
, "true") == 0)
615 lex
->token
= TOKEN_TRUE
;
616 else if (strcmp(saved_text
, "false") == 0)
617 lex
->token
= TOKEN_FALSE
;
618 else if (strcmp(saved_text
, "null") == 0)
619 lex
->token
= TOKEN_NULL
;
621 lex
->token
= TOKEN_INVALID
;
625 /* save the rest of the input UTF-8 sequence to get an error
626 message of valid UTF-8 */
627 lex_save_cached(lex
);
628 lex
->token
= TOKEN_INVALID
;
635 static char *lex_steal_string(lex_t
*lex
, size_t *out_len
) {
637 if (lex
->token
== TOKEN_STRING
) {
638 result
= lex
->value
.string
.val
;
639 *out_len
= lex
->value
.string
.len
;
640 lex
->value
.string
.val
= NULL
;
641 lex
->value
.string
.len
= 0;
646 static int lex_init(lex_t
*lex
, get_func get
, size_t flags
, void *data
) {
647 stream_init(&lex
->stream
, get
, data
);
648 if (strbuffer_init(&lex
->saved_text
))
652 lex
->token
= TOKEN_INVALID
;
656 static void lex_close(lex_t
*lex
) {
657 if (lex
->token
== TOKEN_STRING
)
658 lex_free_string(lex
);
659 strbuffer_close(&lex
->saved_text
);
664 static json_t
*parse_value(lex_t
*lex
, size_t flags
, json_error_t
*error
);
666 static json_t
*parse_object(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
667 json_t
*object
= json_object();
671 lex_scan(lex
, error
);
672 if (lex
->token
== '}')
680 if (lex
->token
!= TOKEN_STRING
) {
681 error_set(error
, lex
, json_error_invalid_syntax
, "string or '}' expected");
685 key
= lex_steal_string(lex
, &len
);
688 if (memchr(key
, '\0', len
)) {
690 error_set(error
, lex
, json_error_null_byte_in_key
,
691 "NUL byte in object key not supported");
695 if (flags
& JSON_REJECT_DUPLICATES
) {
696 if (json_object_get(object
, key
)) {
698 error_set(error
, lex
, json_error_duplicate_key
, "duplicate object key");
703 lex_scan(lex
, error
);
704 if (lex
->token
!= ':') {
706 error_set(error
, lex
, json_error_invalid_syntax
, "':' expected");
710 lex_scan(lex
, error
);
711 value
= parse_value(lex
, flags
, error
);
717 if (json_object_set_new_nocheck(object
, key
, value
)) {
724 lex_scan(lex
, error
);
725 if (lex
->token
!= ',')
728 lex_scan(lex
, error
);
731 if (lex
->token
!= '}') {
732 error_set(error
, lex
, json_error_invalid_syntax
, "'}' expected");
743 static json_t
*parse_array(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
744 json_t
*array
= json_array();
748 lex_scan(lex
, error
);
749 if (lex
->token
== ']')
753 json_t
*elem
= parse_value(lex
, flags
, error
);
757 if (json_array_append_new(array
, elem
)) {
761 lex_scan(lex
, error
);
762 if (lex
->token
!= ',')
765 lex_scan(lex
, error
);
768 if (lex
->token
!= ']') {
769 error_set(error
, lex
, json_error_invalid_syntax
, "']' expected");
780 static json_t
*parse_value(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
784 if (lex
->depth
> JSON_PARSER_MAX_DEPTH
) {
785 error_set(error
, lex
, json_error_stack_overflow
, "maximum parsing depth reached");
789 switch (lex
->token
) {
791 const char *value
= lex
->value
.string
.val
;
792 size_t len
= lex
->value
.string
.len
;
794 if (!(flags
& JSON_ALLOW_NUL
)) {
795 if (memchr(value
, '\0', len
)) {
796 error_set(error
, lex
, json_error_null_character
,
797 "\\u0000 is not allowed without JSON_ALLOW_NUL");
802 json
= jsonp_stringn_nocheck_own(value
, len
);
803 lex
->value
.string
.val
= NULL
;
804 lex
->value
.string
.len
= 0;
808 case TOKEN_INTEGER
: {
809 json
= json_integer(lex
->value
.integer
);
814 json
= json_real(lex
->value
.real
);
831 json
= parse_object(lex
, flags
, error
);
835 json
= parse_array(lex
, flags
, error
);
839 error_set(error
, lex
, json_error_invalid_syntax
, "invalid token");
843 error_set(error
, lex
, json_error_invalid_syntax
, "unexpected token");
854 static json_t
*parse_json(lex_t
*lex
, size_t flags
, json_error_t
*error
) {
859 lex_scan(lex
, error
);
860 if (!(flags
& JSON_DECODE_ANY
)) {
861 if (lex
->token
!= '[' && lex
->token
!= '{') {
862 error_set(error
, lex
, json_error_invalid_syntax
, "'[' or '{' expected");
867 result
= parse_value(lex
, flags
, error
);
871 if (!(flags
& JSON_DISABLE_EOF_CHECK
)) {
872 lex_scan(lex
, error
);
873 if (lex
->token
!= TOKEN_EOF
) {
874 error_set(error
, lex
, json_error_end_of_input_expected
,
875 "end of file expected");
882 /* Save the position even though there was no error */
883 error
->position
= (int)lex
->stream
.position
;
894 static int string_get(void *data
) {
896 string_data_t
*stream
= (string_data_t
*)data
;
897 c
= stream
->data
[stream
->pos
];
902 return (unsigned char)c
;
906 json_t
*json_loads(const char *string
, size_t flags
, json_error_t
*error
) {
909 string_data_t stream_data
;
911 jsonp_error_init(error
, "<string>");
913 if (string
== NULL
) {
914 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
918 stream_data
.data
= string
;
921 if (lex_init(&lex
, string_get
, flags
, (void *)&stream_data
))
924 result
= parse_json(&lex
, flags
, error
);
936 static int buffer_get(void *data
) {
938 buffer_data_t
*stream
= data
;
939 if (stream
->pos
>= stream
->len
)
942 c
= stream
->data
[stream
->pos
];
944 return (unsigned char)c
;
947 json_t
*json_loadb(const char *buffer
, size_t buflen
, size_t flags
, json_error_t
*error
) {
950 buffer_data_t stream_data
;
952 jsonp_error_init(error
, "<buffer>");
954 if (buffer
== NULL
) {
955 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
959 stream_data
.data
= buffer
;
961 stream_data
.len
= buflen
;
963 if (lex_init(&lex
, buffer_get
, flags
, (void *)&stream_data
))
966 result
= parse_json(&lex
, flags
, error
);
972 json_t
*json_loadf(FILE *input
, size_t flags
, json_error_t
*error
) {
983 jsonp_error_init(error
, source
);
986 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
990 if (lex_init(&lex
, (get_func
)fgetc
, flags
, input
))
993 result
= parse_json(&lex
, flags
, error
);
999 static int fd_get_func(int *fd
) {
1000 #ifdef HAVE_UNISTD_H
1002 if (read(*fd
, &c
, 1) == 1)
1008 json_t
*json_loadfd(int input
, size_t flags
, json_error_t
*error
) {
1013 #ifdef HAVE_UNISTD_H
1014 if (input
== STDIN_FILENO
)
1018 source
= "<stream>";
1020 jsonp_error_init(error
, source
);
1023 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1027 if (lex_init(&lex
, (get_func
)fd_get_func
, flags
, &input
))
1030 result
= parse_json(&lex
, flags
, error
);
1036 json_t
*json_load_file(const char *path
, size_t flags
, json_error_t
*error
) {
1040 jsonp_error_init(error
, path
);
1043 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1047 fp
= fopen(path
, "rb");
1049 error_set(error
, NULL
, json_error_cannot_open_file
, "unable to open %s: %s", path
,
1054 result
= json_loadf(fp
, flags
, error
);
1060 #define MAX_BUF_LEN 1024
1063 char data
[MAX_BUF_LEN
];
1066 json_load_callback_t callback
;
1070 static int callback_get(void *data
) {
1072 callback_data_t
*stream
= data
;
1074 if (stream
->pos
>= stream
->len
) {
1076 stream
->len
= stream
->callback(stream
->data
, MAX_BUF_LEN
, stream
->arg
);
1077 if (stream
->len
== 0 || stream
->len
== (size_t)-1)
1081 c
= stream
->data
[stream
->pos
];
1083 return (unsigned char)c
;
1086 json_t
*json_load_callback(json_load_callback_t callback
, void *arg
, size_t flags
,
1087 json_error_t
*error
) {
1091 callback_data_t stream_data
;
1093 memset(&stream_data
, 0, sizeof(stream_data
));
1094 stream_data
.callback
= callback
;
1095 stream_data
.arg
= arg
;
1097 jsonp_error_init(error
, "<callback>");
1099 if (callback
== NULL
) {
1100 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1104 if (lex_init(&lex
, (get_func
)callback_get
, flags
, &stream_data
))
1107 result
= parse_json(&lex
, flags
, error
);