2 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
4 * Jansson is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
7 (C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
9 SPDX-License-Identifier: BSD-2-Clause-Patent AND MIT
16 #include "jansson_private.h"
29 #include "strbuffer.h"
32 #define STREAM_STATE_OK 0
33 #define STREAM_STATE_EOF -1
34 #define STREAM_STATE_ERROR -2
36 #define TOKEN_INVALID -1
38 #define TOKEN_STRING 256
39 #define TOKEN_INTEGER 257
40 #define TOKEN_REAL 258
41 #define TOKEN_TRUE 259
42 #define TOKEN_FALSE 260
43 #define TOKEN_NULL 261
45 /* Locale independent versions of isxxx() functions */
46 #define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
47 #define l_islower(c) ('a' <= (c) && (c) <= 'z')
48 #define l_isalpha(c) (l_isupper(c) || l_islower(c))
49 #define l_isdigit(c) ('0' <= (c) && (c) <= '9')
50 #define l_isxdigit(c) \
51 (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
53 /* Read one byte from stream, convert to unsigned char, then int, and
54 return. return EOF on end of file. This corresponds to the
55 behaviour of fgetc(). */
56 typedef int (*get_func
)(
67 int column
, last_column
;
73 strbuffer_t saved_text
;
87 #define stream_to_lex(stream) container_of(stream, lex_t, stream)
89 /*** error reporting ***/
95 enum json_error_code code
,
101 char msg_text
[JSON_ERROR_TEXT_LENGTH
];
102 char msg_with_context
[JSON_ERROR_TEXT_LENGTH
];
104 int line
= -1, col
= -1;
106 const char *result
= msg_text
;
113 vsnprintf (msg_text
, JSON_ERROR_TEXT_LENGTH
, msg
, ap
);
114 msg_text
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
118 const char *saved_text
= strbuffer_value (&lex
->saved_text
);
120 line
= lex
->stream
.line
;
121 col
= lex
->stream
.column
;
122 pos
= lex
->stream
.position
;
124 if (saved_text
&& saved_text
[0]) {
125 if (lex
->saved_text
.length
<= 20) {
128 JSON_ERROR_TEXT_LENGTH
,
133 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
134 result
= msg_with_context
;
137 if (code
== json_error_invalid_syntax
) {
138 /* More specific error code for premature end of file. */
139 code
= json_error_premature_end_of_input
;
142 if (lex
->stream
.state
== STREAM_STATE_ERROR
) {
143 /* No context for UTF-8 decoding errors */
148 JSON_ERROR_TEXT_LENGTH
,
149 "%s near end of file",
152 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
153 result
= msg_with_context
;
158 jsonp_error_set (error
, line
, col
, pos
, code
, "%s", result
);
161 /*** lexical analyzer ***/
172 stream
->buffer
[0] = '\0';
173 stream
->buffer_pos
= 0;
175 stream
->state
= STREAM_STATE_OK
;
178 stream
->position
= 0;
189 if (stream
->state
!= STREAM_STATE_OK
) {
190 return stream
->state
;
193 if (!stream
->buffer
[stream
->buffer_pos
]) {
194 c
= stream
->get (stream
->data
);
196 stream
->state
= STREAM_STATE_EOF
;
197 return STREAM_STATE_EOF
;
200 stream
->buffer
[0] = c
;
201 stream
->buffer_pos
= 0;
203 if ((0x80 <= c
) && (c
<= 0xFF)) {
204 /* multi-byte UTF-8 sequence */
207 count
= utf8_check_first (c
);
214 for (i
= 1; i
< count
; i
++) {
215 stream
->buffer
[i
] = stream
->get (stream
->data
);
218 if (!utf8_check_full (stream
->buffer
, count
, NULL
)) {
222 stream
->buffer
[count
] = '\0';
224 stream
->buffer
[1] = '\0';
228 c
= stream
->buffer
[stream
->buffer_pos
++];
233 stream
->last_column
= stream
->column
;
235 } else if (utf8_check_first (c
)) {
236 /* track the Unicode character column, so increment only if
237 this is the first character of a UTF-8 sequence */
244 stream
->state
= STREAM_STATE_ERROR
;
247 stream_to_lex (stream
),
248 json_error_invalid_utf8
,
249 "unable to decode byte 0x%x",
252 return STREAM_STATE_ERROR
;
261 if ((c
== STREAM_STATE_EOF
) || (c
== STREAM_STATE_ERROR
)) {
268 stream
->column
= stream
->last_column
;
269 } else if (utf8_check_first (c
)) {
273 assert (stream
->buffer_pos
> 0);
274 stream
->buffer_pos
--;
275 assert (stream
->buffer
[stream
->buffer_pos
] == c
);
284 return stream_get (&lex
->stream
, error
);
293 strbuffer_append_byte (&lex
->saved_text
, c
);
302 int c
= stream_get (&lex
->stream
, error
);
304 if ((c
!= STREAM_STATE_EOF
) && (c
!= STREAM_STATE_ERROR
)) {
317 stream_unget (&lex
->stream
, c
);
326 if ((c
!= STREAM_STATE_EOF
) && (c
!= STREAM_STATE_ERROR
)) {
327 /* Since we treat warnings as errors, when assertions are turned
328 * off the "d" variable would be set but never used. Which is
329 * treated as an error by GCC.
334 stream_unget (&lex
->stream
, c
);
338 strbuffer_pop (&lex
->saved_text
);
348 while (lex
->stream
.buffer
[lex
->stream
.buffer_pos
] != '\0') {
349 lex_save (lex
, lex
->stream
.buffer
[lex
->stream
.buffer_pos
]);
350 lex
->stream
.buffer_pos
++;
351 lex
->stream
.position
++;
360 jsonp_free (lex
->value
.string
.val
);
361 lex
->value
.string
.val
= NULL
;
362 lex
->value
.string
.len
= 0;
365 /* assumes that str points to 'u' plus at least 4 valid hex digits */
367 decode_unicode_escape (
374 assert (str
[0] == 'u');
376 for (i
= 1; i
<= 4; i
++) {
381 } else if (l_islower (c
)) {
382 value
+= c
- 'a' + 10;
383 } else if (l_isupper (c
)) {
384 value
+= c
- 'A' + 10;
404 lex
->value
.string
.val
= NULL
;
405 lex
->token
= TOKEN_INVALID
;
407 c
= lex_get_save (lex
, error
);
410 if (c
== STREAM_STATE_ERROR
) {
412 } else if (c
== STREAM_STATE_EOF
) {
416 json_error_premature_end_of_input
,
417 "premature end of input"
420 } else if ((0 <= c
) && (c
<= 0x1F)) {
421 /* control character */
422 lex_unget_unsave (lex
, c
);
424 error_set (error
, lex
, json_error_invalid_syntax
, "unexpected newline");
429 json_error_invalid_syntax
,
430 "control character 0x%x",
436 } else if (c
== '\\') {
437 c
= lex_get_save (lex
, error
);
439 c
= lex_get_save (lex
, error
);
440 for (i
= 0; i
< 4; i
++) {
441 if (!l_isxdigit (c
)) {
445 json_error_invalid_syntax
,
451 c
= lex_get_save (lex
, error
);
453 } else if ((c
== '"') || (c
== '\\') || (c
== '/') || (c
== 'b') || (c
== 'f') ||
454 (c
== 'n') || (c
== 'r') || (c
== 't'))
456 c
= lex_get_save (lex
, error
);
458 error_set (error
, lex
, json_error_invalid_syntax
, "invalid escape");
462 c
= lex_get_save (lex
, error
);
466 /* the actual value is at most of the same length as the source
468 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
469 - a single \uXXXX escape (length 6) is converted to at most 3 bytes
470 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
471 are converted to 4 bytes
473 t
= jsonp_malloc (lex
->saved_text
.length
+ 1);
475 /* this is not very nice, since TOKEN_INVALID is returned */
479 lex
->value
.string
.val
= t
;
481 /* + 1 to skip the " */
482 p
= strbuffer_value (&lex
->saved_text
) + 1;
491 value
= decode_unicode_escape (p
);
496 json_error_invalid_syntax
,
497 "invalid Unicode escape '%.6s'",
505 if ((0xD800 <= value
) && (value
<= 0xDBFF)) {
507 if ((*p
== '\\') && (*(p
+ 1) == 'u')) {
508 int32_t value2
= decode_unicode_escape (++p
);
513 json_error_invalid_syntax
,
514 "invalid Unicode escape '%.6s'",
522 if ((0xDC00 <= value2
) && (value2
<= 0xDFFF)) {
523 /* valid second surrogate */
525 ((value
- 0xD800) << 10) + (value2
- 0xDC00) + 0x10000;
527 /* invalid second surrogate */
531 json_error_invalid_syntax
,
532 "invalid Unicode '\\u%04X\\u%04X'",
539 /* no second surrogate */
543 json_error_invalid_syntax
,
544 "invalid Unicode '\\u%04X'",
549 } else if ((0xDC00 <= value
) && (value
<= 0xDFFF)) {
553 json_error_invalid_syntax
,
554 "invalid Unicode '\\u%04X'",
560 if (utf8_encode (value
, t
, &length
)) {
600 lex
->value
.string
.len
= t
- lex
->value
.string
.val
;
601 lex
->token
= TOKEN_STRING
;
605 lex_free_string (lex
);
608 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
609 #if JSON_INTEGER_IS_LONG_LONG
610 #ifdef _MSC_VER /* Microsoft Visual Studio */
611 #define json_strtoint _strtoi64
613 #define json_strtoint strtoll
616 #define json_strtoint strtol
627 const char *saved_text
;
631 lex
->token
= TOKEN_INVALID
;
634 c
= lex_get_save (lex
, error
);
638 c
= lex_get_save (lex
, error
);
640 lex_unget_unsave (lex
, c
);
643 } else if (l_isdigit (c
)) {
645 c
= lex_get_save (lex
, error
);
646 } while (l_isdigit (c
));
648 lex_unget_unsave (lex
, c
);
652 if (!(lex
->flags
& JSON_DECODE_INT_AS_REAL
) && (c
!= '.') && (c
!= 'E') && (c
!= 'e')) {
655 lex_unget_unsave (lex
, c
);
657 saved_text
= strbuffer_value (&lex
->saved_text
);
660 intval
= json_strtoint (saved_text
, &end
, 10);
661 if (errno
== ERANGE
) {
666 json_error_numeric_overflow
,
667 "too big negative integer"
670 error_set (error
, lex
, json_error_numeric_overflow
, "too big integer");
676 assert (end
== saved_text
+ lex
->saved_text
.length
);
678 lex
->token
= TOKEN_INTEGER
;
679 lex
->value
.integer
= intval
;
684 c
= lex_get (lex
, error
);
685 if (!l_isdigit (c
)) {
693 c
= lex_get_save (lex
, error
);
694 } while (l_isdigit (c
));
697 if ((c
== 'E') || (c
== 'e')) {
698 c
= lex_get_save (lex
, error
);
699 if ((c
== '+') || (c
== '-')) {
700 c
= lex_get_save (lex
, error
);
703 if (!l_isdigit (c
)) {
704 lex_unget_unsave (lex
, c
);
709 c
= lex_get_save (lex
, error
);
710 } while (l_isdigit (c
));
713 lex_unget_unsave (lex
, c
);
715 if (jsonp_strtod (&lex
->saved_text
, &doubleval
)) {
716 error_set (error
, lex
, json_error_numeric_overflow
, "real number overflow");
720 lex
->token
= TOKEN_REAL
;
721 lex
->value
.real
= doubleval
;
736 strbuffer_clear (&lex
->saved_text
);
738 if (lex
->token
== TOKEN_STRING
) {
739 lex_free_string (lex
);
743 c
= lex_get (lex
, error
);
744 } while (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r');
746 if (c
== STREAM_STATE_EOF
) {
747 lex
->token
= TOKEN_EOF
;
751 if (c
== STREAM_STATE_ERROR
) {
752 lex
->token
= TOKEN_INVALID
;
758 if ((c
== '{') || (c
== '}') || (c
== '[') || (c
== ']') || (c
== ':') || (c
== ',')) {
760 } else if (c
== '"') {
761 lex_scan_string (lex
, error
);
762 } else if (l_isdigit (c
) || (c
== '-')) {
763 if (lex_scan_number (lex
, c
, error
)) {
766 } else if (l_isalpha (c
)) {
767 /* eat up the whole identifier for clearer error messages */
768 const char *saved_text
;
771 c
= lex_get_save (lex
, error
);
772 } while (l_isalpha (c
));
774 lex_unget_unsave (lex
, c
);
776 saved_text
= strbuffer_value (&lex
->saved_text
);
778 if (strcmp (saved_text
, "true") == 0) {
779 lex
->token
= TOKEN_TRUE
;
780 } else if (strcmp (saved_text
, "false") == 0) {
781 lex
->token
= TOKEN_FALSE
;
782 } else if (strcmp (saved_text
, "null") == 0) {
783 lex
->token
= TOKEN_NULL
;
785 lex
->token
= TOKEN_INVALID
;
788 /* save the rest of the input UTF-8 sequence to get an error
789 message of valid UTF-8 */
790 lex_save_cached (lex
);
791 lex
->token
= TOKEN_INVALID
;
806 if (lex
->token
== TOKEN_STRING
) {
807 result
= lex
->value
.string
.val
;
808 *out_len
= lex
->value
.string
.len
;
809 lex
->value
.string
.val
= NULL
;
810 lex
->value
.string
.len
= 0;
824 stream_init (&lex
->stream
, get
, data
);
825 if (strbuffer_init (&lex
->saved_text
)) {
830 lex
->token
= TOKEN_INVALID
;
839 if (lex
->token
== TOKEN_STRING
) {
840 lex_free_string (lex
);
843 strbuffer_close (&lex
->saved_text
);
862 json_t
*object
= json_object ();
868 lex_scan (lex
, error
);
869 if (lex
->token
== '}') {
878 if (lex
->token
!= TOKEN_STRING
) {
879 error_set (error
, lex
, json_error_invalid_syntax
, "string or '}' expected");
883 key
= lex_steal_string (lex
, &len
);
888 if (memchr (key
, '\0', len
)) {
893 json_error_null_byte_in_key
,
894 "NUL byte in object key not supported"
899 if (flags
& JSON_REJECT_DUPLICATES
) {
900 if (json_object_get (object
, key
)) {
902 error_set (error
, lex
, json_error_duplicate_key
, "duplicate object key");
907 lex_scan (lex
, error
);
908 if (lex
->token
!= ':') {
910 error_set (error
, lex
, json_error_invalid_syntax
, "':' expected");
914 lex_scan (lex
, error
);
915 value
= parse_value (lex
, flags
, error
);
921 if (json_object_set_new_nocheck (object
, key
, value
)) {
928 lex_scan (lex
, error
);
929 if (lex
->token
!= ',') {
933 lex_scan (lex
, error
);
936 if (lex
->token
!= '}') {
937 error_set (error
, lex
, json_error_invalid_syntax
, "'}' expected");
944 json_decref (object
);
955 json_t
*array
= json_array ();
961 lex_scan (lex
, error
);
962 if (lex
->token
== ']') {
967 json_t
*elem
= parse_value (lex
, flags
, error
);
972 if (json_array_append_new (array
, elem
)) {
976 lex_scan (lex
, error
);
977 if (lex
->token
!= ',') {
981 lex_scan (lex
, error
);
984 if (lex
->token
!= ']') {
985 error_set (error
, lex
, json_error_invalid_syntax
, "']' expected");
1006 if (lex
->depth
> JSON_PARSER_MAX_DEPTH
) {
1007 error_set (error
, lex
, json_error_stack_overflow
, "maximum parsing depth reached");
1011 switch (lex
->token
) {
1014 const char *value
= lex
->value
.string
.val
;
1015 size_t len
= lex
->value
.string
.len
;
1017 if (!(flags
& JSON_ALLOW_NUL
)) {
1018 if (memchr (value
, '\0', len
)) {
1022 json_error_null_character
,
1023 "\\u0000 is not allowed without JSON_ALLOW_NUL"
1029 json
= jsonp_stringn_nocheck_own (value
, len
);
1030 lex
->value
.string
.val
= NULL
;
1031 lex
->value
.string
.len
= 0;
1037 json
= json_integer (lex
->value
.integer
);
1043 json
= json_real (lex
->value
.real
);
1048 json
= json_true ();
1052 json
= json_false ();
1056 json
= json_null ();
1060 json
= parse_object (lex
, flags
, error
);
1064 json
= parse_array (lex
, flags
, error
);
1068 error_set (error
, lex
, json_error_invalid_syntax
, "invalid token");
1072 error_set (error
, lex
, json_error_invalid_syntax
, "unexpected token");
1095 lex_scan (lex
, error
);
1096 if (!(flags
& JSON_DECODE_ANY
)) {
1097 if ((lex
->token
!= '[') && (lex
->token
!= '{')) {
1098 error_set (error
, lex
, json_error_invalid_syntax
, "'[' or '{' expected");
1103 result
= parse_value (lex
, flags
, error
);
1108 if (!(flags
& JSON_DISABLE_EOF_CHECK
)) {
1109 lex_scan (lex
, error
);
1110 if (lex
->token
!= TOKEN_EOF
) {
1114 json_error_end_of_input_expected
,
1115 "end of file expected"
1117 json_decref (result
);
1123 /* Save the position even though there was no error */
1124 error
->position
= (int)lex
->stream
.position
;
1141 string_data_t
*stream
= (string_data_t
*)data
;
1143 c
= stream
->data
[stream
->pos
];
1148 return (unsigned char)c
;
1161 string_data_t stream_data
;
1163 jsonp_error_init (error
, "<string>");
1165 if (string
== NULL
) {
1166 error_set (error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1170 stream_data
.data
= string
;
1171 stream_data
.pos
= 0;
1173 if (lex_init (&lex
, string_get
, flags
, (void *)&stream_data
)) {
1177 result
= parse_json (&lex
, flags
, error
);
1195 buffer_data_t
*stream
= data
;
1197 if (stream
->pos
>= stream
->len
) {
1201 c
= stream
->data
[stream
->pos
];
1203 return (unsigned char)c
;
1216 buffer_data_t stream_data
;
1218 jsonp_error_init (error
, "<buffer>");
1220 if (buffer
== NULL
) {
1221 error_set (error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1225 stream_data
.data
= buffer
;
1226 stream_data
.pos
= 0;
1227 stream_data
.len
= buflen
;
1229 if (lex_init (&lex
, buffer_get
, flags
, (void *)&stream_data
)) {
1233 result
= parse_json (&lex
, flags
, error
);
1250 #ifdef HAVE_UNISTD_H
1251 if (input
== stdin
) {
1255 source
= "<stream>";
1257 jsonp_error_init (error
, source
);
1259 if (input
== NULL
) {
1260 error_set (error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1264 if (lex_init (&lex
, (get_func
)fgetc
, flags
, input
)) {
1268 result
= parse_json (&lex
, flags
, error
);
1279 #ifdef HAVE_UNISTD_H
1281 if (read (*fd
, &c
, 1) == 1) {
1300 #ifdef HAVE_UNISTD_H
1301 if (input
== STDIN_FILENO
) {
1305 source
= "<stream>";
1307 jsonp_error_init (error
, source
);
1310 error_set (error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1314 if (lex_init (&lex
, (get_func
)fd_get_func
, flags
, &input
)) {
1318 result
= parse_json (&lex
, flags
, error
);
1334 jsonp_error_init (error
, path
);
1337 error_set (error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1341 fp
= fopen (path
, "rb");
1346 json_error_cannot_open_file
,
1347 "unable to open %s: %s",
1354 result
= json_loadf (fp
, flags
, error
);
1360 #define MAX_BUF_LEN 1024
1363 char data
[MAX_BUF_LEN
];
1366 json_load_callback_t callback
;
1376 callback_data_t
*stream
= data
;
1378 if (stream
->pos
>= stream
->len
) {
1380 stream
->len
= stream
->callback (stream
->data
, MAX_BUF_LEN
, stream
->arg
);
1381 if ((stream
->len
== 0) || (stream
->len
== (size_t)-1)) {
1386 c
= stream
->data
[stream
->pos
];
1388 return (unsigned char)c
;
1392 json_load_callback (
1393 json_load_callback_t callback
,
1402 callback_data_t stream_data
;
1404 memset (&stream_data
, 0, sizeof (stream_data
));
1405 stream_data
.callback
= callback
;
1406 stream_data
.arg
= arg
;
1408 jsonp_error_init (error
, "<callback>");
1410 if (callback
== NULL
) {
1411 error_set (error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1415 if (lex_init (&lex
, (get_func
)callback_get
, flags
, &stream_data
)) {
1419 result
= parse_json (&lex
, flags
, error
);