]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | #pragma once |
2 | ||
3 | #include <algorithm> // generate_n | |
4 | #include <array> // array | |
5 | #include <cmath> // ldexp | |
6 | #include <cstddef> // size_t | |
7 | #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t | |
8 | #include <cstdio> // snprintf | |
9 | #include <cstring> // memcpy | |
10 | #include <iterator> // back_inserter | |
11 | #include <limits> // numeric_limits | |
12 | #include <string> // char_traits, string | |
13 | #include <utility> // make_pair, move | |
14 | #include <vector> // vector | |
15 | ||
16 | #include <nlohmann/detail/exceptions.hpp> | |
17 | #include <nlohmann/detail/input/input_adapters.hpp> | |
18 | #include <nlohmann/detail/input/json_sax.hpp> | |
19 | #include <nlohmann/detail/input/lexer.hpp> | |
20 | #include <nlohmann/detail/macro_scope.hpp> | |
21 | #include <nlohmann/detail/meta/is_sax.hpp> | |
22 | #include <nlohmann/detail/meta/type_traits.hpp> | |
23 | #include <nlohmann/detail/value_t.hpp> | |
24 | ||
25 | namespace nlohmann | |
26 | { | |
27 | namespace detail | |
28 | { | |
29 | ||
30 | /// how to treat CBOR tags | |
31 | enum class cbor_tag_handler_t | |
32 | { | |
33 | error, ///< throw a parse_error exception in case of a tag | |
34 | ignore, ///< ignore tags | |
35 | store ///< store tags as binary type | |
36 | }; | |
37 | ||
38 | /*! | |
39 | @brief determine system byte order | |
40 | ||
41 | @return true if and only if system's byte order is little endian | |
42 | ||
43 | @note from https://stackoverflow.com/a/1001328/266378 | |
44 | */ | |
45 | static inline bool little_endianness(int num = 1) noexcept | |
46 | { | |
47 | return *reinterpret_cast<char*>(&num) == 1; | |
48 | } | |
49 | ||
50 | ||
51 | /////////////////// | |
52 | // binary reader // | |
53 | /////////////////// | |
54 | ||
55 | /*! | |
56 | @brief deserialization of CBOR, MessagePack, and UBJSON values | |
57 | */ | |
58 | template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>> | |
59 | class binary_reader | |
60 | { | |
61 | using number_integer_t = typename BasicJsonType::number_integer_t; | |
62 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; | |
63 | using number_float_t = typename BasicJsonType::number_float_t; | |
64 | using string_t = typename BasicJsonType::string_t; | |
65 | using binary_t = typename BasicJsonType::binary_t; | |
66 | using json_sax_t = SAX; | |
67 | using char_type = typename InputAdapterType::char_type; | |
68 | using char_int_type = typename std::char_traits<char_type>::int_type; | |
69 | ||
70 | public: | |
71 | /*! | |
72 | @brief create a binary reader | |
73 | ||
74 | @param[in] adapter input adapter to read from | |
75 | */ | |
76 | explicit binary_reader(InputAdapterType&& adapter) noexcept : ia(std::move(adapter)) | |
77 | { | |
78 | (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; | |
79 | } | |
80 | ||
81 | // make class move-only | |
82 | binary_reader(const binary_reader&) = delete; | |
83 | binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) | |
84 | binary_reader& operator=(const binary_reader&) = delete; | |
85 | binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) | |
86 | ~binary_reader() = default; | |
87 | ||
88 | /*! | |
89 | @param[in] format the binary format to parse | |
90 | @param[in] sax_ a SAX event processor | |
91 | @param[in] strict whether to expect the input to be consumed completed | |
92 | @param[in] tag_handler how to treat CBOR tags | |
93 | ||
94 | @return whether parsing was successful | |
95 | */ | |
96 | JSON_HEDLEY_NON_NULL(3) | |
97 | bool sax_parse(const input_format_t format, | |
98 | json_sax_t* sax_, | |
99 | const bool strict = true, | |
100 | const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) | |
101 | { | |
102 | sax = sax_; | |
103 | bool result = false; | |
104 | ||
105 | switch (format) | |
106 | { | |
107 | case input_format_t::bson: | |
108 | result = parse_bson_internal(); | |
109 | break; | |
110 | ||
111 | case input_format_t::cbor: | |
112 | result = parse_cbor_internal(true, tag_handler); | |
113 | break; | |
114 | ||
115 | case input_format_t::msgpack: | |
116 | result = parse_msgpack_internal(); | |
117 | break; | |
118 | ||
119 | case input_format_t::ubjson: | |
120 | result = parse_ubjson_internal(); | |
121 | break; | |
122 | ||
123 | case input_format_t::json: // LCOV_EXCL_LINE | |
124 | default: // LCOV_EXCL_LINE | |
125 | JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE | |
126 | } | |
127 | ||
128 | // strict mode: next byte must be EOF | |
129 | if (result && strict) | |
130 | { | |
131 | if (format == input_format_t::ubjson) | |
132 | { | |
133 | get_ignore_noop(); | |
134 | } | |
135 | else | |
136 | { | |
137 | get(); | |
138 | } | |
139 | ||
140 | if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof())) | |
141 | { | |
142 | return sax->parse_error(chars_read, get_token_string(), | |
143 | parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"), BasicJsonType())); | |
144 | } | |
145 | } | |
146 | ||
147 | return result; | |
148 | } | |
149 | ||
150 | private: | |
151 | ////////// | |
152 | // BSON // | |
153 | ////////// | |
154 | ||
155 | /*! | |
156 | @brief Reads in a BSON-object and passes it to the SAX-parser. | |
157 | @return whether a valid BSON-value was passed to the SAX parser | |
158 | */ | |
159 | bool parse_bson_internal() | |
160 | { | |
161 | std::int32_t document_size{}; | |
162 | get_number<std::int32_t, true>(input_format_t::bson, document_size); | |
163 | ||
164 | if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) | |
165 | { | |
166 | return false; | |
167 | } | |
168 | ||
169 | if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false))) | |
170 | { | |
171 | return false; | |
172 | } | |
173 | ||
174 | return sax->end_object(); | |
175 | } | |
176 | ||
177 | /*! | |
178 | @brief Parses a C-style string from the BSON input. | |
179 | @param[in,out] result A reference to the string variable where the read | |
180 | string is to be stored. | |
181 | @return `true` if the \x00-byte indicating the end of the string was | |
182 | encountered before the EOF; false` indicates an unexpected EOF. | |
183 | */ | |
184 | bool get_bson_cstr(string_t& result) | |
185 | { | |
186 | auto out = std::back_inserter(result); | |
187 | while (true) | |
188 | { | |
189 | get(); | |
190 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring"))) | |
191 | { | |
192 | return false; | |
193 | } | |
194 | if (current == 0x00) | |
195 | { | |
196 | return true; | |
197 | } | |
198 | *out++ = static_cast<typename string_t::value_type>(current); | |
199 | } | |
200 | } | |
201 | ||
202 | /*! | |
203 | @brief Parses a zero-terminated string of length @a len from the BSON | |
204 | input. | |
205 | @param[in] len The length (including the zero-byte at the end) of the | |
206 | string to be read. | |
207 | @param[in,out] result A reference to the string variable where the read | |
208 | string is to be stored. | |
209 | @tparam NumberType The type of the length @a len | |
210 | @pre len >= 1 | |
211 | @return `true` if the string was successfully parsed | |
212 | */ | |
213 | template<typename NumberType> | |
214 | bool get_bson_string(const NumberType len, string_t& result) | |
215 | { | |
216 | if (JSON_HEDLEY_UNLIKELY(len < 1)) | |
217 | { | |
218 | auto last_token = get_token_string(); | |
219 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"), BasicJsonType())); | |
220 | } | |
221 | ||
222 | return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof(); | |
223 | } | |
224 | ||
225 | /*! | |
226 | @brief Parses a byte array input of length @a len from the BSON input. | |
227 | @param[in] len The length of the byte array to be read. | |
228 | @param[in,out] result A reference to the binary variable where the read | |
229 | array is to be stored. | |
230 | @tparam NumberType The type of the length @a len | |
231 | @pre len >= 0 | |
232 | @return `true` if the byte array was successfully parsed | |
233 | */ | |
234 | template<typename NumberType> | |
235 | bool get_bson_binary(const NumberType len, binary_t& result) | |
236 | { | |
237 | if (JSON_HEDLEY_UNLIKELY(len < 0)) | |
238 | { | |
239 | auto last_token = get_token_string(); | |
240 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"), BasicJsonType())); | |
241 | } | |
242 | ||
243 | // All BSON binary values have a subtype | |
244 | std::uint8_t subtype{}; | |
245 | get_number<std::uint8_t>(input_format_t::bson, subtype); | |
246 | result.set_subtype(subtype); | |
247 | ||
248 | return get_binary(input_format_t::bson, len, result); | |
249 | } | |
250 | ||
251 | /*! | |
252 | @brief Read a BSON document element of the given @a element_type. | |
253 | @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html | |
254 | @param[in] element_type_parse_position The position in the input stream, | |
255 | where the `element_type` was read. | |
256 | @warning Not all BSON element types are supported yet. An unsupported | |
257 | @a element_type will give rise to a parse_error.114: | |
258 | Unsupported BSON record type 0x... | |
259 | @return whether a valid BSON-object/array was passed to the SAX parser | |
260 | */ | |
261 | bool parse_bson_element_internal(const char_int_type element_type, | |
262 | const std::size_t element_type_parse_position) | |
263 | { | |
264 | switch (element_type) | |
265 | { | |
266 | case 0x01: // double | |
267 | { | |
268 | double number{}; | |
269 | return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
270 | } | |
271 | ||
272 | case 0x02: // string | |
273 | { | |
274 | std::int32_t len{}; | |
275 | string_t value; | |
276 | return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value); | |
277 | } | |
278 | ||
279 | case 0x03: // object | |
280 | { | |
281 | return parse_bson_internal(); | |
282 | } | |
283 | ||
284 | case 0x04: // array | |
285 | { | |
286 | return parse_bson_array(); | |
287 | } | |
288 | ||
289 | case 0x05: // binary | |
290 | { | |
291 | std::int32_t len{}; | |
292 | binary_t value; | |
293 | return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value); | |
294 | } | |
295 | ||
296 | case 0x08: // boolean | |
297 | { | |
298 | return sax->boolean(get() != 0); | |
299 | } | |
300 | ||
301 | case 0x0A: // null | |
302 | { | |
303 | return sax->null(); | |
304 | } | |
305 | ||
306 | case 0x10: // int32 | |
307 | { | |
308 | std::int32_t value{}; | |
309 | return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value); | |
310 | } | |
311 | ||
312 | case 0x12: // int64 | |
313 | { | |
314 | std::int64_t value{}; | |
315 | return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value); | |
316 | } | |
317 | ||
318 | default: // anything else not supported (yet) | |
319 | { | |
320 | std::array<char, 3> cr{{}}; | |
321 | static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) | |
322 | return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()), BasicJsonType())); | |
323 | } | |
324 | } | |
325 | } | |
326 | ||
327 | /*! | |
328 | @brief Read a BSON element list (as specified in the BSON-spec) | |
329 | ||
330 | The same binary layout is used for objects and arrays, hence it must be | |
331 | indicated with the argument @a is_array which one is expected | |
332 | (true --> array, false --> object). | |
333 | ||
334 | @param[in] is_array Determines if the element list being read is to be | |
335 | treated as an object (@a is_array == false), or as an | |
336 | array (@a is_array == true). | |
337 | @return whether a valid BSON-object/array was passed to the SAX parser | |
338 | */ | |
339 | bool parse_bson_element_list(const bool is_array) | |
340 | { | |
341 | string_t key; | |
342 | ||
343 | while (auto element_type = get()) | |
344 | { | |
345 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list"))) | |
346 | { | |
347 | return false; | |
348 | } | |
349 | ||
350 | const std::size_t element_type_parse_position = chars_read; | |
351 | if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key))) | |
352 | { | |
353 | return false; | |
354 | } | |
355 | ||
356 | if (!is_array && !sax->key(key)) | |
357 | { | |
358 | return false; | |
359 | } | |
360 | ||
361 | if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position))) | |
362 | { | |
363 | return false; | |
364 | } | |
365 | ||
366 | // get_bson_cstr only appends | |
367 | key.clear(); | |
368 | } | |
369 | ||
370 | return true; | |
371 | } | |
372 | ||
373 | /*! | |
374 | @brief Reads an array from the BSON input and passes it to the SAX-parser. | |
375 | @return whether a valid BSON-array was passed to the SAX parser | |
376 | */ | |
377 | bool parse_bson_array() | |
378 | { | |
379 | std::int32_t document_size{}; | |
380 | get_number<std::int32_t, true>(input_format_t::bson, document_size); | |
381 | ||
382 | if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) | |
383 | { | |
384 | return false; | |
385 | } | |
386 | ||
387 | if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true))) | |
388 | { | |
389 | return false; | |
390 | } | |
391 | ||
392 | return sax->end_array(); | |
393 | } | |
394 | ||
395 | ////////// | |
396 | // CBOR // | |
397 | ////////// | |
398 | ||
399 | /*! | |
400 | @param[in] get_char whether a new character should be retrieved from the | |
401 | input (true) or whether the last read character should | |
402 | be considered instead (false) | |
403 | @param[in] tag_handler how CBOR tags should be treated | |
404 | ||
405 | @return whether a valid CBOR value was passed to the SAX parser | |
406 | */ | |
407 | bool parse_cbor_internal(const bool get_char, | |
408 | const cbor_tag_handler_t tag_handler) | |
409 | { | |
410 | switch (get_char ? get() : current) | |
411 | { | |
412 | // EOF | |
413 | case std::char_traits<char_type>::eof(): | |
414 | return unexpect_eof(input_format_t::cbor, "value"); | |
415 | ||
416 | // Integer 0x00..0x17 (0..23) | |
417 | case 0x00: | |
418 | case 0x01: | |
419 | case 0x02: | |
420 | case 0x03: | |
421 | case 0x04: | |
422 | case 0x05: | |
423 | case 0x06: | |
424 | case 0x07: | |
425 | case 0x08: | |
426 | case 0x09: | |
427 | case 0x0A: | |
428 | case 0x0B: | |
429 | case 0x0C: | |
430 | case 0x0D: | |
431 | case 0x0E: | |
432 | case 0x0F: | |
433 | case 0x10: | |
434 | case 0x11: | |
435 | case 0x12: | |
436 | case 0x13: | |
437 | case 0x14: | |
438 | case 0x15: | |
439 | case 0x16: | |
440 | case 0x17: | |
441 | return sax->number_unsigned(static_cast<number_unsigned_t>(current)); | |
442 | ||
443 | case 0x18: // Unsigned integer (one-byte uint8_t follows) | |
444 | { | |
445 | std::uint8_t number{}; | |
446 | return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); | |
447 | } | |
448 | ||
449 | case 0x19: // Unsigned integer (two-byte uint16_t follows) | |
450 | { | |
451 | std::uint16_t number{}; | |
452 | return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); | |
453 | } | |
454 | ||
455 | case 0x1A: // Unsigned integer (four-byte uint32_t follows) | |
456 | { | |
457 | std::uint32_t number{}; | |
458 | return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); | |
459 | } | |
460 | ||
461 | case 0x1B: // Unsigned integer (eight-byte uint64_t follows) | |
462 | { | |
463 | std::uint64_t number{}; | |
464 | return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); | |
465 | } | |
466 | ||
467 | // Negative integer -1-0x00..-1-0x17 (-1..-24) | |
468 | case 0x20: | |
469 | case 0x21: | |
470 | case 0x22: | |
471 | case 0x23: | |
472 | case 0x24: | |
473 | case 0x25: | |
474 | case 0x26: | |
475 | case 0x27: | |
476 | case 0x28: | |
477 | case 0x29: | |
478 | case 0x2A: | |
479 | case 0x2B: | |
480 | case 0x2C: | |
481 | case 0x2D: | |
482 | case 0x2E: | |
483 | case 0x2F: | |
484 | case 0x30: | |
485 | case 0x31: | |
486 | case 0x32: | |
487 | case 0x33: | |
488 | case 0x34: | |
489 | case 0x35: | |
490 | case 0x36: | |
491 | case 0x37: | |
492 | return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current)); | |
493 | ||
494 | case 0x38: // Negative integer (one-byte uint8_t follows) | |
495 | { | |
496 | std::uint8_t number{}; | |
497 | return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number); | |
498 | } | |
499 | ||
500 | case 0x39: // Negative integer -1-n (two-byte uint16_t follows) | |
501 | { | |
502 | std::uint16_t number{}; | |
503 | return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number); | |
504 | } | |
505 | ||
506 | case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) | |
507 | { | |
508 | std::uint32_t number{}; | |
509 | return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number); | |
510 | } | |
511 | ||
512 | case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) | |
513 | { | |
514 | std::uint64_t number{}; | |
515 | return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) | |
516 | - static_cast<number_integer_t>(number)); | |
517 | } | |
518 | ||
519 | // Binary data (0x00..0x17 bytes follow) | |
520 | case 0x40: | |
521 | case 0x41: | |
522 | case 0x42: | |
523 | case 0x43: | |
524 | case 0x44: | |
525 | case 0x45: | |
526 | case 0x46: | |
527 | case 0x47: | |
528 | case 0x48: | |
529 | case 0x49: | |
530 | case 0x4A: | |
531 | case 0x4B: | |
532 | case 0x4C: | |
533 | case 0x4D: | |
534 | case 0x4E: | |
535 | case 0x4F: | |
536 | case 0x50: | |
537 | case 0x51: | |
538 | case 0x52: | |
539 | case 0x53: | |
540 | case 0x54: | |
541 | case 0x55: | |
542 | case 0x56: | |
543 | case 0x57: | |
544 | case 0x58: // Binary data (one-byte uint8_t for n follows) | |
545 | case 0x59: // Binary data (two-byte uint16_t for n follow) | |
546 | case 0x5A: // Binary data (four-byte uint32_t for n follow) | |
547 | case 0x5B: // Binary data (eight-byte uint64_t for n follow) | |
548 | case 0x5F: // Binary data (indefinite length) | |
549 | { | |
550 | binary_t b; | |
551 | return get_cbor_binary(b) && sax->binary(b); | |
552 | } | |
553 | ||
554 | // UTF-8 string (0x00..0x17 bytes follow) | |
555 | case 0x60: | |
556 | case 0x61: | |
557 | case 0x62: | |
558 | case 0x63: | |
559 | case 0x64: | |
560 | case 0x65: | |
561 | case 0x66: | |
562 | case 0x67: | |
563 | case 0x68: | |
564 | case 0x69: | |
565 | case 0x6A: | |
566 | case 0x6B: | |
567 | case 0x6C: | |
568 | case 0x6D: | |
569 | case 0x6E: | |
570 | case 0x6F: | |
571 | case 0x70: | |
572 | case 0x71: | |
573 | case 0x72: | |
574 | case 0x73: | |
575 | case 0x74: | |
576 | case 0x75: | |
577 | case 0x76: | |
578 | case 0x77: | |
579 | case 0x78: // UTF-8 string (one-byte uint8_t for n follows) | |
580 | case 0x79: // UTF-8 string (two-byte uint16_t for n follow) | |
581 | case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) | |
582 | case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) | |
583 | case 0x7F: // UTF-8 string (indefinite length) | |
584 | { | |
585 | string_t s; | |
586 | return get_cbor_string(s) && sax->string(s); | |
587 | } | |
588 | ||
589 | // array (0x00..0x17 data items follow) | |
590 | case 0x80: | |
591 | case 0x81: | |
592 | case 0x82: | |
593 | case 0x83: | |
594 | case 0x84: | |
595 | case 0x85: | |
596 | case 0x86: | |
597 | case 0x87: | |
598 | case 0x88: | |
599 | case 0x89: | |
600 | case 0x8A: | |
601 | case 0x8B: | |
602 | case 0x8C: | |
603 | case 0x8D: | |
604 | case 0x8E: | |
605 | case 0x8F: | |
606 | case 0x90: | |
607 | case 0x91: | |
608 | case 0x92: | |
609 | case 0x93: | |
610 | case 0x94: | |
611 | case 0x95: | |
612 | case 0x96: | |
613 | case 0x97: | |
614 | return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler); | |
615 | ||
616 | case 0x98: // array (one-byte uint8_t for n follows) | |
617 | { | |
618 | std::uint8_t len{}; | |
619 | return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); | |
620 | } | |
621 | ||
622 | case 0x99: // array (two-byte uint16_t for n follow) | |
623 | { | |
624 | std::uint16_t len{}; | |
625 | return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); | |
626 | } | |
627 | ||
628 | case 0x9A: // array (four-byte uint32_t for n follow) | |
629 | { | |
630 | std::uint32_t len{}; | |
631 | return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); | |
632 | } | |
633 | ||
634 | case 0x9B: // array (eight-byte uint64_t for n follow) | |
635 | { | |
636 | std::uint64_t len{}; | |
637 | return get_number(input_format_t::cbor, len) && get_cbor_array(detail::conditional_static_cast<std::size_t>(len), tag_handler); | |
638 | } | |
639 | ||
640 | case 0x9F: // array (indefinite length) | |
641 | return get_cbor_array(static_cast<std::size_t>(-1), tag_handler); | |
642 | ||
643 | // map (0x00..0x17 pairs of data items follow) | |
644 | case 0xA0: | |
645 | case 0xA1: | |
646 | case 0xA2: | |
647 | case 0xA3: | |
648 | case 0xA4: | |
649 | case 0xA5: | |
650 | case 0xA6: | |
651 | case 0xA7: | |
652 | case 0xA8: | |
653 | case 0xA9: | |
654 | case 0xAA: | |
655 | case 0xAB: | |
656 | case 0xAC: | |
657 | case 0xAD: | |
658 | case 0xAE: | |
659 | case 0xAF: | |
660 | case 0xB0: | |
661 | case 0xB1: | |
662 | case 0xB2: | |
663 | case 0xB3: | |
664 | case 0xB4: | |
665 | case 0xB5: | |
666 | case 0xB6: | |
667 | case 0xB7: | |
668 | return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler); | |
669 | ||
670 | case 0xB8: // map (one-byte uint8_t for n follows) | |
671 | { | |
672 | std::uint8_t len{}; | |
673 | return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); | |
674 | } | |
675 | ||
676 | case 0xB9: // map (two-byte uint16_t for n follow) | |
677 | { | |
678 | std::uint16_t len{}; | |
679 | return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); | |
680 | } | |
681 | ||
682 | case 0xBA: // map (four-byte uint32_t for n follow) | |
683 | { | |
684 | std::uint32_t len{}; | |
685 | return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); | |
686 | } | |
687 | ||
688 | case 0xBB: // map (eight-byte uint64_t for n follow) | |
689 | { | |
690 | std::uint64_t len{}; | |
691 | return get_number(input_format_t::cbor, len) && get_cbor_object(detail::conditional_static_cast<std::size_t>(len), tag_handler); | |
692 | } | |
693 | ||
694 | case 0xBF: // map (indefinite length) | |
695 | return get_cbor_object(static_cast<std::size_t>(-1), tag_handler); | |
696 | ||
697 | case 0xC6: // tagged item | |
698 | case 0xC7: | |
699 | case 0xC8: | |
700 | case 0xC9: | |
701 | case 0xCA: | |
702 | case 0xCB: | |
703 | case 0xCC: | |
704 | case 0xCD: | |
705 | case 0xCE: | |
706 | case 0xCF: | |
707 | case 0xD0: | |
708 | case 0xD1: | |
709 | case 0xD2: | |
710 | case 0xD3: | |
711 | case 0xD4: | |
712 | case 0xD8: // tagged item (1 bytes follow) | |
713 | case 0xD9: // tagged item (2 bytes follow) | |
714 | case 0xDA: // tagged item (4 bytes follow) | |
715 | case 0xDB: // tagged item (8 bytes follow) | |
716 | { | |
717 | switch (tag_handler) | |
718 | { | |
719 | case cbor_tag_handler_t::error: | |
720 | { | |
721 | auto last_token = get_token_string(); | |
722 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType())); | |
723 | } | |
724 | ||
725 | case cbor_tag_handler_t::ignore: | |
726 | { | |
727 | // ignore binary subtype | |
728 | switch (current) | |
729 | { | |
730 | case 0xD8: | |
731 | { | |
732 | std::uint8_t subtype_to_ignore{}; | |
733 | get_number(input_format_t::cbor, subtype_to_ignore); | |
734 | break; | |
735 | } | |
736 | case 0xD9: | |
737 | { | |
738 | std::uint16_t subtype_to_ignore{}; | |
739 | get_number(input_format_t::cbor, subtype_to_ignore); | |
740 | break; | |
741 | } | |
742 | case 0xDA: | |
743 | { | |
744 | std::uint32_t subtype_to_ignore{}; | |
745 | get_number(input_format_t::cbor, subtype_to_ignore); | |
746 | break; | |
747 | } | |
748 | case 0xDB: | |
749 | { | |
750 | std::uint64_t subtype_to_ignore{}; | |
751 | get_number(input_format_t::cbor, subtype_to_ignore); | |
752 | break; | |
753 | } | |
754 | default: | |
755 | break; | |
756 | } | |
757 | return parse_cbor_internal(true, tag_handler); | |
758 | } | |
759 | ||
760 | case cbor_tag_handler_t::store: | |
761 | { | |
762 | binary_t b; | |
763 | // use binary subtype and store in binary container | |
764 | switch (current) | |
765 | { | |
766 | case 0xD8: | |
767 | { | |
768 | std::uint8_t subtype{}; | |
769 | get_number(input_format_t::cbor, subtype); | |
770 | b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); | |
771 | break; | |
772 | } | |
773 | case 0xD9: | |
774 | { | |
775 | std::uint16_t subtype{}; | |
776 | get_number(input_format_t::cbor, subtype); | |
777 | b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); | |
778 | break; | |
779 | } | |
780 | case 0xDA: | |
781 | { | |
782 | std::uint32_t subtype{}; | |
783 | get_number(input_format_t::cbor, subtype); | |
784 | b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); | |
785 | break; | |
786 | } | |
787 | case 0xDB: | |
788 | { | |
789 | std::uint64_t subtype{}; | |
790 | get_number(input_format_t::cbor, subtype); | |
791 | b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); | |
792 | break; | |
793 | } | |
794 | default: | |
795 | return parse_cbor_internal(true, tag_handler); | |
796 | } | |
797 | get(); | |
798 | return get_cbor_binary(b) && sax->binary(b); | |
799 | } | |
800 | ||
801 | default: // LCOV_EXCL_LINE | |
802 | JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE | |
803 | return false; // LCOV_EXCL_LINE | |
804 | } | |
805 | } | |
806 | ||
807 | case 0xF4: // false | |
808 | return sax->boolean(false); | |
809 | ||
810 | case 0xF5: // true | |
811 | return sax->boolean(true); | |
812 | ||
813 | case 0xF6: // null | |
814 | return sax->null(); | |
815 | ||
816 | case 0xF9: // Half-Precision Float (two-byte IEEE 754) | |
817 | { | |
818 | const auto byte1_raw = get(); | |
819 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) | |
820 | { | |
821 | return false; | |
822 | } | |
823 | const auto byte2_raw = get(); | |
824 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) | |
825 | { | |
826 | return false; | |
827 | } | |
828 | ||
829 | const auto byte1 = static_cast<unsigned char>(byte1_raw); | |
830 | const auto byte2 = static_cast<unsigned char>(byte2_raw); | |
831 | ||
832 | // code from RFC 7049, Appendix D, Figure 3: | |
833 | // As half-precision floating-point numbers were only added | |
834 | // to IEEE 754 in 2008, today's programming platforms often | |
835 | // still only have limited support for them. It is very | |
836 | // easy to include at least decoding support for them even | |
837 | // without such support. An example of a small decoder for | |
838 | // half-precision floating-point numbers in the C language | |
839 | // is shown in Fig. 3. | |
840 | const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2); | |
841 | const double val = [&half] | |
842 | { | |
843 | const int exp = (half >> 10u) & 0x1Fu; | |
844 | const unsigned int mant = half & 0x3FFu; | |
845 | JSON_ASSERT(0 <= exp&& exp <= 32); | |
846 | JSON_ASSERT(mant <= 1024); | |
847 | switch (exp) | |
848 | { | |
849 | case 0: | |
850 | return std::ldexp(mant, -24); | |
851 | case 31: | |
852 | return (mant == 0) | |
853 | ? std::numeric_limits<double>::infinity() | |
854 | : std::numeric_limits<double>::quiet_NaN(); | |
855 | default: | |
856 | return std::ldexp(mant + 1024, exp - 25); | |
857 | } | |
858 | }(); | |
859 | return sax->number_float((half & 0x8000u) != 0 | |
860 | ? static_cast<number_float_t>(-val) | |
861 | : static_cast<number_float_t>(val), ""); | |
862 | } | |
863 | ||
864 | case 0xFA: // Single-Precision Float (four-byte IEEE 754) | |
865 | { | |
866 | float number{}; | |
867 | return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
868 | } | |
869 | ||
870 | case 0xFB: // Double-Precision Float (eight-byte IEEE 754) | |
871 | { | |
872 | double number{}; | |
873 | return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
874 | } | |
875 | ||
876 | default: // anything else (0xFF is handled inside the other types) | |
877 | { | |
878 | auto last_token = get_token_string(); | |
879 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType())); | |
880 | } | |
881 | } | |
882 | } | |
883 | ||
884 | /*! | |
885 | @brief reads a CBOR string | |
886 | ||
887 | This function first reads starting bytes to determine the expected | |
888 | string length and then copies this number of bytes into a string. | |
889 | Additionally, CBOR's strings with indefinite lengths are supported. | |
890 | ||
891 | @param[out] result created string | |
892 | ||
893 | @return whether string creation completed | |
894 | */ | |
895 | bool get_cbor_string(string_t& result) | |
896 | { | |
897 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string"))) | |
898 | { | |
899 | return false; | |
900 | } | |
901 | ||
902 | switch (current) | |
903 | { | |
904 | // UTF-8 string (0x00..0x17 bytes follow) | |
905 | case 0x60: | |
906 | case 0x61: | |
907 | case 0x62: | |
908 | case 0x63: | |
909 | case 0x64: | |
910 | case 0x65: | |
911 | case 0x66: | |
912 | case 0x67: | |
913 | case 0x68: | |
914 | case 0x69: | |
915 | case 0x6A: | |
916 | case 0x6B: | |
917 | case 0x6C: | |
918 | case 0x6D: | |
919 | case 0x6E: | |
920 | case 0x6F: | |
921 | case 0x70: | |
922 | case 0x71: | |
923 | case 0x72: | |
924 | case 0x73: | |
925 | case 0x74: | |
926 | case 0x75: | |
927 | case 0x76: | |
928 | case 0x77: | |
929 | { | |
930 | return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result); | |
931 | } | |
932 | ||
933 | case 0x78: // UTF-8 string (one-byte uint8_t for n follows) | |
934 | { | |
935 | std::uint8_t len{}; | |
936 | return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); | |
937 | } | |
938 | ||
939 | case 0x79: // UTF-8 string (two-byte uint16_t for n follow) | |
940 | { | |
941 | std::uint16_t len{}; | |
942 | return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); | |
943 | } | |
944 | ||
945 | case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) | |
946 | { | |
947 | std::uint32_t len{}; | |
948 | return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); | |
949 | } | |
950 | ||
951 | case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) | |
952 | { | |
953 | std::uint64_t len{}; | |
954 | return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); | |
955 | } | |
956 | ||
957 | case 0x7F: // UTF-8 string (indefinite length) | |
958 | { | |
959 | while (get() != 0xFF) | |
960 | { | |
961 | string_t chunk; | |
962 | if (!get_cbor_string(chunk)) | |
963 | { | |
964 | return false; | |
965 | } | |
966 | result.append(chunk); | |
967 | } | |
968 | return true; | |
969 | } | |
970 | ||
971 | default: | |
972 | { | |
973 | auto last_token = get_token_string(); | |
974 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"), BasicJsonType())); | |
975 | } | |
976 | } | |
977 | } | |
978 | ||
979 | /*! | |
980 | @brief reads a CBOR byte array | |
981 | ||
982 | This function first reads starting bytes to determine the expected | |
983 | byte array length and then copies this number of bytes into the byte array. | |
984 | Additionally, CBOR's byte arrays with indefinite lengths are supported. | |
985 | ||
986 | @param[out] result created byte array | |
987 | ||
988 | @return whether byte array creation completed | |
989 | */ | |
990 | bool get_cbor_binary(binary_t& result) | |
991 | { | |
992 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary"))) | |
993 | { | |
994 | return false; | |
995 | } | |
996 | ||
997 | switch (current) | |
998 | { | |
999 | // Binary data (0x00..0x17 bytes follow) | |
1000 | case 0x40: | |
1001 | case 0x41: | |
1002 | case 0x42: | |
1003 | case 0x43: | |
1004 | case 0x44: | |
1005 | case 0x45: | |
1006 | case 0x46: | |
1007 | case 0x47: | |
1008 | case 0x48: | |
1009 | case 0x49: | |
1010 | case 0x4A: | |
1011 | case 0x4B: | |
1012 | case 0x4C: | |
1013 | case 0x4D: | |
1014 | case 0x4E: | |
1015 | case 0x4F: | |
1016 | case 0x50: | |
1017 | case 0x51: | |
1018 | case 0x52: | |
1019 | case 0x53: | |
1020 | case 0x54: | |
1021 | case 0x55: | |
1022 | case 0x56: | |
1023 | case 0x57: | |
1024 | { | |
1025 | return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result); | |
1026 | } | |
1027 | ||
1028 | case 0x58: // Binary data (one-byte uint8_t for n follows) | |
1029 | { | |
1030 | std::uint8_t len{}; | |
1031 | return get_number(input_format_t::cbor, len) && | |
1032 | get_binary(input_format_t::cbor, len, result); | |
1033 | } | |
1034 | ||
1035 | case 0x59: // Binary data (two-byte uint16_t for n follow) | |
1036 | { | |
1037 | std::uint16_t len{}; | |
1038 | return get_number(input_format_t::cbor, len) && | |
1039 | get_binary(input_format_t::cbor, len, result); | |
1040 | } | |
1041 | ||
1042 | case 0x5A: // Binary data (four-byte uint32_t for n follow) | |
1043 | { | |
1044 | std::uint32_t len{}; | |
1045 | return get_number(input_format_t::cbor, len) && | |
1046 | get_binary(input_format_t::cbor, len, result); | |
1047 | } | |
1048 | ||
1049 | case 0x5B: // Binary data (eight-byte uint64_t for n follow) | |
1050 | { | |
1051 | std::uint64_t len{}; | |
1052 | return get_number(input_format_t::cbor, len) && | |
1053 | get_binary(input_format_t::cbor, len, result); | |
1054 | } | |
1055 | ||
1056 | case 0x5F: // Binary data (indefinite length) | |
1057 | { | |
1058 | while (get() != 0xFF) | |
1059 | { | |
1060 | binary_t chunk; | |
1061 | if (!get_cbor_binary(chunk)) | |
1062 | { | |
1063 | return false; | |
1064 | } | |
1065 | result.insert(result.end(), chunk.begin(), chunk.end()); | |
1066 | } | |
1067 | return true; | |
1068 | } | |
1069 | ||
1070 | default: | |
1071 | { | |
1072 | auto last_token = get_token_string(); | |
1073 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"), BasicJsonType())); | |
1074 | } | |
1075 | } | |
1076 | } | |
1077 | ||
1078 | /*! | |
1079 | @param[in] len the length of the array or static_cast<std::size_t>(-1) for an | |
1080 | array of indefinite size | |
1081 | @param[in] tag_handler how CBOR tags should be treated | |
1082 | @return whether array creation completed | |
1083 | */ | |
1084 | bool get_cbor_array(const std::size_t len, | |
1085 | const cbor_tag_handler_t tag_handler) | |
1086 | { | |
1087 | if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) | |
1088 | { | |
1089 | return false; | |
1090 | } | |
1091 | ||
1092 | if (len != static_cast<std::size_t>(-1)) | |
1093 | { | |
1094 | for (std::size_t i = 0; i < len; ++i) | |
1095 | { | |
1096 | if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) | |
1097 | { | |
1098 | return false; | |
1099 | } | |
1100 | } | |
1101 | } | |
1102 | else | |
1103 | { | |
1104 | while (get() != 0xFF) | |
1105 | { | |
1106 | if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler))) | |
1107 | { | |
1108 | return false; | |
1109 | } | |
1110 | } | |
1111 | } | |
1112 | ||
1113 | return sax->end_array(); | |
1114 | } | |
1115 | ||
1116 | /*! | |
1117 | @param[in] len the length of the object or static_cast<std::size_t>(-1) for an | |
1118 | object of indefinite size | |
1119 | @param[in] tag_handler how CBOR tags should be treated | |
1120 | @return whether object creation completed | |
1121 | */ | |
1122 | bool get_cbor_object(const std::size_t len, | |
1123 | const cbor_tag_handler_t tag_handler) | |
1124 | { | |
1125 | if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len))) | |
1126 | { | |
1127 | return false; | |
1128 | } | |
1129 | ||
1130 | if (len != 0) | |
1131 | { | |
1132 | string_t key; | |
1133 | if (len != static_cast<std::size_t>(-1)) | |
1134 | { | |
1135 | for (std::size_t i = 0; i < len; ++i) | |
1136 | { | |
1137 | get(); | |
1138 | if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) | |
1139 | { | |
1140 | return false; | |
1141 | } | |
1142 | ||
1143 | if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) | |
1144 | { | |
1145 | return false; | |
1146 | } | |
1147 | key.clear(); | |
1148 | } | |
1149 | } | |
1150 | else | |
1151 | { | |
1152 | while (get() != 0xFF) | |
1153 | { | |
1154 | if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) | |
1155 | { | |
1156 | return false; | |
1157 | } | |
1158 | ||
1159 | if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) | |
1160 | { | |
1161 | return false; | |
1162 | } | |
1163 | key.clear(); | |
1164 | } | |
1165 | } | |
1166 | } | |
1167 | ||
1168 | return sax->end_object(); | |
1169 | } | |
1170 | ||
1171 | ///////////// | |
1172 | // MsgPack // | |
1173 | ///////////// | |
1174 | ||
1175 | /*! | |
1176 | @return whether a valid MessagePack value was passed to the SAX parser | |
1177 | */ | |
1178 | bool parse_msgpack_internal() | |
1179 | { | |
1180 | switch (get()) | |
1181 | { | |
1182 | // EOF | |
1183 | case std::char_traits<char_type>::eof(): | |
1184 | return unexpect_eof(input_format_t::msgpack, "value"); | |
1185 | ||
1186 | // positive fixint | |
1187 | case 0x00: | |
1188 | case 0x01: | |
1189 | case 0x02: | |
1190 | case 0x03: | |
1191 | case 0x04: | |
1192 | case 0x05: | |
1193 | case 0x06: | |
1194 | case 0x07: | |
1195 | case 0x08: | |
1196 | case 0x09: | |
1197 | case 0x0A: | |
1198 | case 0x0B: | |
1199 | case 0x0C: | |
1200 | case 0x0D: | |
1201 | case 0x0E: | |
1202 | case 0x0F: | |
1203 | case 0x10: | |
1204 | case 0x11: | |
1205 | case 0x12: | |
1206 | case 0x13: | |
1207 | case 0x14: | |
1208 | case 0x15: | |
1209 | case 0x16: | |
1210 | case 0x17: | |
1211 | case 0x18: | |
1212 | case 0x19: | |
1213 | case 0x1A: | |
1214 | case 0x1B: | |
1215 | case 0x1C: | |
1216 | case 0x1D: | |
1217 | case 0x1E: | |
1218 | case 0x1F: | |
1219 | case 0x20: | |
1220 | case 0x21: | |
1221 | case 0x22: | |
1222 | case 0x23: | |
1223 | case 0x24: | |
1224 | case 0x25: | |
1225 | case 0x26: | |
1226 | case 0x27: | |
1227 | case 0x28: | |
1228 | case 0x29: | |
1229 | case 0x2A: | |
1230 | case 0x2B: | |
1231 | case 0x2C: | |
1232 | case 0x2D: | |
1233 | case 0x2E: | |
1234 | case 0x2F: | |
1235 | case 0x30: | |
1236 | case 0x31: | |
1237 | case 0x32: | |
1238 | case 0x33: | |
1239 | case 0x34: | |
1240 | case 0x35: | |
1241 | case 0x36: | |
1242 | case 0x37: | |
1243 | case 0x38: | |
1244 | case 0x39: | |
1245 | case 0x3A: | |
1246 | case 0x3B: | |
1247 | case 0x3C: | |
1248 | case 0x3D: | |
1249 | case 0x3E: | |
1250 | case 0x3F: | |
1251 | case 0x40: | |
1252 | case 0x41: | |
1253 | case 0x42: | |
1254 | case 0x43: | |
1255 | case 0x44: | |
1256 | case 0x45: | |
1257 | case 0x46: | |
1258 | case 0x47: | |
1259 | case 0x48: | |
1260 | case 0x49: | |
1261 | case 0x4A: | |
1262 | case 0x4B: | |
1263 | case 0x4C: | |
1264 | case 0x4D: | |
1265 | case 0x4E: | |
1266 | case 0x4F: | |
1267 | case 0x50: | |
1268 | case 0x51: | |
1269 | case 0x52: | |
1270 | case 0x53: | |
1271 | case 0x54: | |
1272 | case 0x55: | |
1273 | case 0x56: | |
1274 | case 0x57: | |
1275 | case 0x58: | |
1276 | case 0x59: | |
1277 | case 0x5A: | |
1278 | case 0x5B: | |
1279 | case 0x5C: | |
1280 | case 0x5D: | |
1281 | case 0x5E: | |
1282 | case 0x5F: | |
1283 | case 0x60: | |
1284 | case 0x61: | |
1285 | case 0x62: | |
1286 | case 0x63: | |
1287 | case 0x64: | |
1288 | case 0x65: | |
1289 | case 0x66: | |
1290 | case 0x67: | |
1291 | case 0x68: | |
1292 | case 0x69: | |
1293 | case 0x6A: | |
1294 | case 0x6B: | |
1295 | case 0x6C: | |
1296 | case 0x6D: | |
1297 | case 0x6E: | |
1298 | case 0x6F: | |
1299 | case 0x70: | |
1300 | case 0x71: | |
1301 | case 0x72: | |
1302 | case 0x73: | |
1303 | case 0x74: | |
1304 | case 0x75: | |
1305 | case 0x76: | |
1306 | case 0x77: | |
1307 | case 0x78: | |
1308 | case 0x79: | |
1309 | case 0x7A: | |
1310 | case 0x7B: | |
1311 | case 0x7C: | |
1312 | case 0x7D: | |
1313 | case 0x7E: | |
1314 | case 0x7F: | |
1315 | return sax->number_unsigned(static_cast<number_unsigned_t>(current)); | |
1316 | ||
1317 | // fixmap | |
1318 | case 0x80: | |
1319 | case 0x81: | |
1320 | case 0x82: | |
1321 | case 0x83: | |
1322 | case 0x84: | |
1323 | case 0x85: | |
1324 | case 0x86: | |
1325 | case 0x87: | |
1326 | case 0x88: | |
1327 | case 0x89: | |
1328 | case 0x8A: | |
1329 | case 0x8B: | |
1330 | case 0x8C: | |
1331 | case 0x8D: | |
1332 | case 0x8E: | |
1333 | case 0x8F: | |
1334 | return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); | |
1335 | ||
1336 | // fixarray | |
1337 | case 0x90: | |
1338 | case 0x91: | |
1339 | case 0x92: | |
1340 | case 0x93: | |
1341 | case 0x94: | |
1342 | case 0x95: | |
1343 | case 0x96: | |
1344 | case 0x97: | |
1345 | case 0x98: | |
1346 | case 0x99: | |
1347 | case 0x9A: | |
1348 | case 0x9B: | |
1349 | case 0x9C: | |
1350 | case 0x9D: | |
1351 | case 0x9E: | |
1352 | case 0x9F: | |
1353 | return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); | |
1354 | ||
1355 | // fixstr | |
1356 | case 0xA0: | |
1357 | case 0xA1: | |
1358 | case 0xA2: | |
1359 | case 0xA3: | |
1360 | case 0xA4: | |
1361 | case 0xA5: | |
1362 | case 0xA6: | |
1363 | case 0xA7: | |
1364 | case 0xA8: | |
1365 | case 0xA9: | |
1366 | case 0xAA: | |
1367 | case 0xAB: | |
1368 | case 0xAC: | |
1369 | case 0xAD: | |
1370 | case 0xAE: | |
1371 | case 0xAF: | |
1372 | case 0xB0: | |
1373 | case 0xB1: | |
1374 | case 0xB2: | |
1375 | case 0xB3: | |
1376 | case 0xB4: | |
1377 | case 0xB5: | |
1378 | case 0xB6: | |
1379 | case 0xB7: | |
1380 | case 0xB8: | |
1381 | case 0xB9: | |
1382 | case 0xBA: | |
1383 | case 0xBB: | |
1384 | case 0xBC: | |
1385 | case 0xBD: | |
1386 | case 0xBE: | |
1387 | case 0xBF: | |
1388 | case 0xD9: // str 8 | |
1389 | case 0xDA: // str 16 | |
1390 | case 0xDB: // str 32 | |
1391 | { | |
1392 | string_t s; | |
1393 | return get_msgpack_string(s) && sax->string(s); | |
1394 | } | |
1395 | ||
1396 | case 0xC0: // nil | |
1397 | return sax->null(); | |
1398 | ||
1399 | case 0xC2: // false | |
1400 | return sax->boolean(false); | |
1401 | ||
1402 | case 0xC3: // true | |
1403 | return sax->boolean(true); | |
1404 | ||
1405 | case 0xC4: // bin 8 | |
1406 | case 0xC5: // bin 16 | |
1407 | case 0xC6: // bin 32 | |
1408 | case 0xC7: // ext 8 | |
1409 | case 0xC8: // ext 16 | |
1410 | case 0xC9: // ext 32 | |
1411 | case 0xD4: // fixext 1 | |
1412 | case 0xD5: // fixext 2 | |
1413 | case 0xD6: // fixext 4 | |
1414 | case 0xD7: // fixext 8 | |
1415 | case 0xD8: // fixext 16 | |
1416 | { | |
1417 | binary_t b; | |
1418 | return get_msgpack_binary(b) && sax->binary(b); | |
1419 | } | |
1420 | ||
1421 | case 0xCA: // float 32 | |
1422 | { | |
1423 | float number{}; | |
1424 | return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
1425 | } | |
1426 | ||
1427 | case 0xCB: // float 64 | |
1428 | { | |
1429 | double number{}; | |
1430 | return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
1431 | } | |
1432 | ||
1433 | case 0xCC: // uint 8 | |
1434 | { | |
1435 | std::uint8_t number{}; | |
1436 | return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); | |
1437 | } | |
1438 | ||
1439 | case 0xCD: // uint 16 | |
1440 | { | |
1441 | std::uint16_t number{}; | |
1442 | return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); | |
1443 | } | |
1444 | ||
1445 | case 0xCE: // uint 32 | |
1446 | { | |
1447 | std::uint32_t number{}; | |
1448 | return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); | |
1449 | } | |
1450 | ||
1451 | case 0xCF: // uint 64 | |
1452 | { | |
1453 | std::uint64_t number{}; | |
1454 | return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); | |
1455 | } | |
1456 | ||
1457 | case 0xD0: // int 8 | |
1458 | { | |
1459 | std::int8_t number{}; | |
1460 | return get_number(input_format_t::msgpack, number) && sax->number_integer(number); | |
1461 | } | |
1462 | ||
1463 | case 0xD1: // int 16 | |
1464 | { | |
1465 | std::int16_t number{}; | |
1466 | return get_number(input_format_t::msgpack, number) && sax->number_integer(number); | |
1467 | } | |
1468 | ||
1469 | case 0xD2: // int 32 | |
1470 | { | |
1471 | std::int32_t number{}; | |
1472 | return get_number(input_format_t::msgpack, number) && sax->number_integer(number); | |
1473 | } | |
1474 | ||
1475 | case 0xD3: // int 64 | |
1476 | { | |
1477 | std::int64_t number{}; | |
1478 | return get_number(input_format_t::msgpack, number) && sax->number_integer(number); | |
1479 | } | |
1480 | ||
1481 | case 0xDC: // array 16 | |
1482 | { | |
1483 | std::uint16_t len{}; | |
1484 | return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len)); | |
1485 | } | |
1486 | ||
1487 | case 0xDD: // array 32 | |
1488 | { | |
1489 | std::uint32_t len{}; | |
1490 | return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len)); | |
1491 | } | |
1492 | ||
1493 | case 0xDE: // map 16 | |
1494 | { | |
1495 | std::uint16_t len{}; | |
1496 | return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len)); | |
1497 | } | |
1498 | ||
1499 | case 0xDF: // map 32 | |
1500 | { | |
1501 | std::uint32_t len{}; | |
1502 | return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len)); | |
1503 | } | |
1504 | ||
1505 | // negative fixint | |
1506 | case 0xE0: | |
1507 | case 0xE1: | |
1508 | case 0xE2: | |
1509 | case 0xE3: | |
1510 | case 0xE4: | |
1511 | case 0xE5: | |
1512 | case 0xE6: | |
1513 | case 0xE7: | |
1514 | case 0xE8: | |
1515 | case 0xE9: | |
1516 | case 0xEA: | |
1517 | case 0xEB: | |
1518 | case 0xEC: | |
1519 | case 0xED: | |
1520 | case 0xEE: | |
1521 | case 0xEF: | |
1522 | case 0xF0: | |
1523 | case 0xF1: | |
1524 | case 0xF2: | |
1525 | case 0xF3: | |
1526 | case 0xF4: | |
1527 | case 0xF5: | |
1528 | case 0xF6: | |
1529 | case 0xF7: | |
1530 | case 0xF8: | |
1531 | case 0xF9: | |
1532 | case 0xFA: | |
1533 | case 0xFB: | |
1534 | case 0xFC: | |
1535 | case 0xFD: | |
1536 | case 0xFE: | |
1537 | case 0xFF: | |
1538 | return sax->number_integer(static_cast<std::int8_t>(current)); | |
1539 | ||
1540 | default: // anything else | |
1541 | { | |
1542 | auto last_token = get_token_string(); | |
1543 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"), BasicJsonType())); | |
1544 | } | |
1545 | } | |
1546 | } | |
1547 | ||
1548 | /*! | |
1549 | @brief reads a MessagePack string | |
1550 | ||
1551 | This function first reads starting bytes to determine the expected | |
1552 | string length and then copies this number of bytes into a string. | |
1553 | ||
1554 | @param[out] result created string | |
1555 | ||
1556 | @return whether string creation completed | |
1557 | */ | |
1558 | bool get_msgpack_string(string_t& result) | |
1559 | { | |
1560 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string"))) | |
1561 | { | |
1562 | return false; | |
1563 | } | |
1564 | ||
1565 | switch (current) | |
1566 | { | |
1567 | // fixstr | |
1568 | case 0xA0: | |
1569 | case 0xA1: | |
1570 | case 0xA2: | |
1571 | case 0xA3: | |
1572 | case 0xA4: | |
1573 | case 0xA5: | |
1574 | case 0xA6: | |
1575 | case 0xA7: | |
1576 | case 0xA8: | |
1577 | case 0xA9: | |
1578 | case 0xAA: | |
1579 | case 0xAB: | |
1580 | case 0xAC: | |
1581 | case 0xAD: | |
1582 | case 0xAE: | |
1583 | case 0xAF: | |
1584 | case 0xB0: | |
1585 | case 0xB1: | |
1586 | case 0xB2: | |
1587 | case 0xB3: | |
1588 | case 0xB4: | |
1589 | case 0xB5: | |
1590 | case 0xB6: | |
1591 | case 0xB7: | |
1592 | case 0xB8: | |
1593 | case 0xB9: | |
1594 | case 0xBA: | |
1595 | case 0xBB: | |
1596 | case 0xBC: | |
1597 | case 0xBD: | |
1598 | case 0xBE: | |
1599 | case 0xBF: | |
1600 | { | |
1601 | return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result); | |
1602 | } | |
1603 | ||
1604 | case 0xD9: // str 8 | |
1605 | { | |
1606 | std::uint8_t len{}; | |
1607 | return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result); | |
1608 | } | |
1609 | ||
1610 | case 0xDA: // str 16 | |
1611 | { | |
1612 | std::uint16_t len{}; | |
1613 | return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result); | |
1614 | } | |
1615 | ||
1616 | case 0xDB: // str 32 | |
1617 | { | |
1618 | std::uint32_t len{}; | |
1619 | return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result); | |
1620 | } | |
1621 | ||
1622 | default: | |
1623 | { | |
1624 | auto last_token = get_token_string(); | |
1625 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"), BasicJsonType())); | |
1626 | } | |
1627 | } | |
1628 | } | |
1629 | ||
1630 | /*! | |
1631 | @brief reads a MessagePack byte array | |
1632 | ||
1633 | This function first reads starting bytes to determine the expected | |
1634 | byte array length and then copies this number of bytes into a byte array. | |
1635 | ||
1636 | @param[out] result created byte array | |
1637 | ||
1638 | @return whether byte array creation completed | |
1639 | */ | |
1640 | bool get_msgpack_binary(binary_t& result) | |
1641 | { | |
1642 | // helper function to set the subtype | |
1643 | auto assign_and_return_true = [&result](std::int8_t subtype) | |
1644 | { | |
1645 | result.set_subtype(static_cast<std::uint8_t>(subtype)); | |
1646 | return true; | |
1647 | }; | |
1648 | ||
1649 | switch (current) | |
1650 | { | |
1651 | case 0xC4: // bin 8 | |
1652 | { | |
1653 | std::uint8_t len{}; | |
1654 | return get_number(input_format_t::msgpack, len) && | |
1655 | get_binary(input_format_t::msgpack, len, result); | |
1656 | } | |
1657 | ||
1658 | case 0xC5: // bin 16 | |
1659 | { | |
1660 | std::uint16_t len{}; | |
1661 | return get_number(input_format_t::msgpack, len) && | |
1662 | get_binary(input_format_t::msgpack, len, result); | |
1663 | } | |
1664 | ||
1665 | case 0xC6: // bin 32 | |
1666 | { | |
1667 | std::uint32_t len{}; | |
1668 | return get_number(input_format_t::msgpack, len) && | |
1669 | get_binary(input_format_t::msgpack, len, result); | |
1670 | } | |
1671 | ||
1672 | case 0xC7: // ext 8 | |
1673 | { | |
1674 | std::uint8_t len{}; | |
1675 | std::int8_t subtype{}; | |
1676 | return get_number(input_format_t::msgpack, len) && | |
1677 | get_number(input_format_t::msgpack, subtype) && | |
1678 | get_binary(input_format_t::msgpack, len, result) && | |
1679 | assign_and_return_true(subtype); | |
1680 | } | |
1681 | ||
1682 | case 0xC8: // ext 16 | |
1683 | { | |
1684 | std::uint16_t len{}; | |
1685 | std::int8_t subtype{}; | |
1686 | return get_number(input_format_t::msgpack, len) && | |
1687 | get_number(input_format_t::msgpack, subtype) && | |
1688 | get_binary(input_format_t::msgpack, len, result) && | |
1689 | assign_and_return_true(subtype); | |
1690 | } | |
1691 | ||
1692 | case 0xC9: // ext 32 | |
1693 | { | |
1694 | std::uint32_t len{}; | |
1695 | std::int8_t subtype{}; | |
1696 | return get_number(input_format_t::msgpack, len) && | |
1697 | get_number(input_format_t::msgpack, subtype) && | |
1698 | get_binary(input_format_t::msgpack, len, result) && | |
1699 | assign_and_return_true(subtype); | |
1700 | } | |
1701 | ||
1702 | case 0xD4: // fixext 1 | |
1703 | { | |
1704 | std::int8_t subtype{}; | |
1705 | return get_number(input_format_t::msgpack, subtype) && | |
1706 | get_binary(input_format_t::msgpack, 1, result) && | |
1707 | assign_and_return_true(subtype); | |
1708 | } | |
1709 | ||
1710 | case 0xD5: // fixext 2 | |
1711 | { | |
1712 | std::int8_t subtype{}; | |
1713 | return get_number(input_format_t::msgpack, subtype) && | |
1714 | get_binary(input_format_t::msgpack, 2, result) && | |
1715 | assign_and_return_true(subtype); | |
1716 | } | |
1717 | ||
1718 | case 0xD6: // fixext 4 | |
1719 | { | |
1720 | std::int8_t subtype{}; | |
1721 | return get_number(input_format_t::msgpack, subtype) && | |
1722 | get_binary(input_format_t::msgpack, 4, result) && | |
1723 | assign_and_return_true(subtype); | |
1724 | } | |
1725 | ||
1726 | case 0xD7: // fixext 8 | |
1727 | { | |
1728 | std::int8_t subtype{}; | |
1729 | return get_number(input_format_t::msgpack, subtype) && | |
1730 | get_binary(input_format_t::msgpack, 8, result) && | |
1731 | assign_and_return_true(subtype); | |
1732 | } | |
1733 | ||
1734 | case 0xD8: // fixext 16 | |
1735 | { | |
1736 | std::int8_t subtype{}; | |
1737 | return get_number(input_format_t::msgpack, subtype) && | |
1738 | get_binary(input_format_t::msgpack, 16, result) && | |
1739 | assign_and_return_true(subtype); | |
1740 | } | |
1741 | ||
1742 | default: // LCOV_EXCL_LINE | |
1743 | return false; // LCOV_EXCL_LINE | |
1744 | } | |
1745 | } | |
1746 | ||
1747 | /*! | |
1748 | @param[in] len the length of the array | |
1749 | @return whether array creation completed | |
1750 | */ | |
1751 | bool get_msgpack_array(const std::size_t len) | |
1752 | { | |
1753 | if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) | |
1754 | { | |
1755 | return false; | |
1756 | } | |
1757 | ||
1758 | for (std::size_t i = 0; i < len; ++i) | |
1759 | { | |
1760 | if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal())) | |
1761 | { | |
1762 | return false; | |
1763 | } | |
1764 | } | |
1765 | ||
1766 | return sax->end_array(); | |
1767 | } | |
1768 | ||
1769 | /*! | |
1770 | @param[in] len the length of the object | |
1771 | @return whether object creation completed | |
1772 | */ | |
1773 | bool get_msgpack_object(const std::size_t len) | |
1774 | { | |
1775 | if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len))) | |
1776 | { | |
1777 | return false; | |
1778 | } | |
1779 | ||
1780 | string_t key; | |
1781 | for (std::size_t i = 0; i < len; ++i) | |
1782 | { | |
1783 | get(); | |
1784 | if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key))) | |
1785 | { | |
1786 | return false; | |
1787 | } | |
1788 | ||
1789 | if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal())) | |
1790 | { | |
1791 | return false; | |
1792 | } | |
1793 | key.clear(); | |
1794 | } | |
1795 | ||
1796 | return sax->end_object(); | |
1797 | } | |
1798 | ||
1799 | //////////// | |
1800 | // UBJSON // | |
1801 | //////////// | |
1802 | ||
1803 | /*! | |
1804 | @param[in] get_char whether a new character should be retrieved from the | |
1805 | input (true, default) or whether the last read | |
1806 | character should be considered instead | |
1807 | ||
1808 | @return whether a valid UBJSON value was passed to the SAX parser | |
1809 | */ | |
1810 | bool parse_ubjson_internal(const bool get_char = true) | |
1811 | { | |
1812 | return get_ubjson_value(get_char ? get_ignore_noop() : current); | |
1813 | } | |
1814 | ||
1815 | /*! | |
1816 | @brief reads a UBJSON string | |
1817 | ||
1818 | This function is either called after reading the 'S' byte explicitly | |
1819 | indicating a string, or in case of an object key where the 'S' byte can be | |
1820 | left out. | |
1821 | ||
1822 | @param[out] result created string | |
1823 | @param[in] get_char whether a new character should be retrieved from the | |
1824 | input (true, default) or whether the last read | |
1825 | character should be considered instead | |
1826 | ||
1827 | @return whether string creation completed | |
1828 | */ | |
1829 | bool get_ubjson_string(string_t& result, const bool get_char = true) | |
1830 | { | |
1831 | if (get_char) | |
1832 | { | |
1833 | get(); // TODO(niels): may we ignore N here? | |
1834 | } | |
1835 | ||
1836 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value"))) | |
1837 | { | |
1838 | return false; | |
1839 | } | |
1840 | ||
1841 | switch (current) | |
1842 | { | |
1843 | case 'U': | |
1844 | { | |
1845 | std::uint8_t len{}; | |
1846 | return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result); | |
1847 | } | |
1848 | ||
1849 | case 'i': | |
1850 | { | |
1851 | std::int8_t len{}; | |
1852 | return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result); | |
1853 | } | |
1854 | ||
1855 | case 'I': | |
1856 | { | |
1857 | std::int16_t len{}; | |
1858 | return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result); | |
1859 | } | |
1860 | ||
1861 | case 'l': | |
1862 | { | |
1863 | std::int32_t len{}; | |
1864 | return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result); | |
1865 | } | |
1866 | ||
1867 | case 'L': | |
1868 | { | |
1869 | std::int64_t len{}; | |
1870 | return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result); | |
1871 | } | |
1872 | ||
1873 | default: | |
1874 | auto last_token = get_token_string(); | |
1875 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"), BasicJsonType())); | |
1876 | } | |
1877 | } | |
1878 | ||
1879 | /*! | |
1880 | @param[out] result determined size | |
1881 | @return whether size determination completed | |
1882 | */ | |
1883 | bool get_ubjson_size_value(std::size_t& result) | |
1884 | { | |
1885 | switch (get_ignore_noop()) | |
1886 | { | |
1887 | case 'U': | |
1888 | { | |
1889 | std::uint8_t number{}; | |
1890 | if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number))) | |
1891 | { | |
1892 | return false; | |
1893 | } | |
1894 | result = static_cast<std::size_t>(number); | |
1895 | return true; | |
1896 | } | |
1897 | ||
1898 | case 'i': | |
1899 | { | |
1900 | std::int8_t number{}; | |
1901 | if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number))) | |
1902 | { | |
1903 | return false; | |
1904 | } | |
1905 | result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char | |
1906 | return true; | |
1907 | } | |
1908 | ||
1909 | case 'I': | |
1910 | { | |
1911 | std::int16_t number{}; | |
1912 | if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number))) | |
1913 | { | |
1914 | return false; | |
1915 | } | |
1916 | result = static_cast<std::size_t>(number); | |
1917 | return true; | |
1918 | } | |
1919 | ||
1920 | case 'l': | |
1921 | { | |
1922 | std::int32_t number{}; | |
1923 | if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number))) | |
1924 | { | |
1925 | return false; | |
1926 | } | |
1927 | result = static_cast<std::size_t>(number); | |
1928 | return true; | |
1929 | } | |
1930 | ||
1931 | case 'L': | |
1932 | { | |
1933 | std::int64_t number{}; | |
1934 | if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number))) | |
1935 | { | |
1936 | return false; | |
1937 | } | |
1938 | result = static_cast<std::size_t>(number); | |
1939 | return true; | |
1940 | } | |
1941 | ||
1942 | default: | |
1943 | { | |
1944 | auto last_token = get_token_string(); | |
1945 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), BasicJsonType())); | |
1946 | } | |
1947 | } | |
1948 | } | |
1949 | ||
1950 | /*! | |
1951 | @brief determine the type and size for a container | |
1952 | ||
1953 | In the optimized UBJSON format, a type and a size can be provided to allow | |
1954 | for a more compact representation. | |
1955 | ||
1956 | @param[out] result pair of the size and the type | |
1957 | ||
1958 | @return whether pair creation completed | |
1959 | */ | |
1960 | bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result) | |
1961 | { | |
1962 | result.first = string_t::npos; // size | |
1963 | result.second = 0; // type | |
1964 | ||
1965 | get_ignore_noop(); | |
1966 | ||
1967 | if (current == '$') | |
1968 | { | |
1969 | result.second = get(); // must not ignore 'N', because 'N' maybe the type | |
1970 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type"))) | |
1971 | { | |
1972 | return false; | |
1973 | } | |
1974 | ||
1975 | get_ignore_noop(); | |
1976 | if (JSON_HEDLEY_UNLIKELY(current != '#')) | |
1977 | { | |
1978 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value"))) | |
1979 | { | |
1980 | return false; | |
1981 | } | |
1982 | auto last_token = get_token_string(); | |
1983 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"), BasicJsonType())); | |
1984 | } | |
1985 | ||
1986 | return get_ubjson_size_value(result.first); | |
1987 | } | |
1988 | ||
1989 | if (current == '#') | |
1990 | { | |
1991 | return get_ubjson_size_value(result.first); | |
1992 | } | |
1993 | ||
1994 | return true; | |
1995 | } | |
1996 | ||
1997 | /*! | |
1998 | @param prefix the previously read or set type prefix | |
1999 | @return whether value creation completed | |
2000 | */ | |
2001 | bool get_ubjson_value(const char_int_type prefix) | |
2002 | { | |
2003 | switch (prefix) | |
2004 | { | |
2005 | case std::char_traits<char_type>::eof(): // EOF | |
2006 | return unexpect_eof(input_format_t::ubjson, "value"); | |
2007 | ||
2008 | case 'T': // true | |
2009 | return sax->boolean(true); | |
2010 | case 'F': // false | |
2011 | return sax->boolean(false); | |
2012 | ||
2013 | case 'Z': // null | |
2014 | return sax->null(); | |
2015 | ||
2016 | case 'U': | |
2017 | { | |
2018 | std::uint8_t number{}; | |
2019 | return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number); | |
2020 | } | |
2021 | ||
2022 | case 'i': | |
2023 | { | |
2024 | std::int8_t number{}; | |
2025 | return get_number(input_format_t::ubjson, number) && sax->number_integer(number); | |
2026 | } | |
2027 | ||
2028 | case 'I': | |
2029 | { | |
2030 | std::int16_t number{}; | |
2031 | return get_number(input_format_t::ubjson, number) && sax->number_integer(number); | |
2032 | } | |
2033 | ||
2034 | case 'l': | |
2035 | { | |
2036 | std::int32_t number{}; | |
2037 | return get_number(input_format_t::ubjson, number) && sax->number_integer(number); | |
2038 | } | |
2039 | ||
2040 | case 'L': | |
2041 | { | |
2042 | std::int64_t number{}; | |
2043 | return get_number(input_format_t::ubjson, number) && sax->number_integer(number); | |
2044 | } | |
2045 | ||
2046 | case 'd': | |
2047 | { | |
2048 | float number{}; | |
2049 | return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
2050 | } | |
2051 | ||
2052 | case 'D': | |
2053 | { | |
2054 | double number{}; | |
2055 | return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), ""); | |
2056 | } | |
2057 | ||
2058 | case 'H': | |
2059 | { | |
2060 | return get_ubjson_high_precision_number(); | |
2061 | } | |
2062 | ||
2063 | case 'C': // char | |
2064 | { | |
2065 | get(); | |
2066 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char"))) | |
2067 | { | |
2068 | return false; | |
2069 | } | |
2070 | if (JSON_HEDLEY_UNLIKELY(current > 127)) | |
2071 | { | |
2072 | auto last_token = get_token_string(); | |
2073 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"), BasicJsonType())); | |
2074 | } | |
2075 | string_t s(1, static_cast<typename string_t::value_type>(current)); | |
2076 | return sax->string(s); | |
2077 | } | |
2078 | ||
2079 | case 'S': // string | |
2080 | { | |
2081 | string_t s; | |
2082 | return get_ubjson_string(s) && sax->string(s); | |
2083 | } | |
2084 | ||
2085 | case '[': // array | |
2086 | return get_ubjson_array(); | |
2087 | ||
2088 | case '{': // object | |
2089 | return get_ubjson_object(); | |
2090 | ||
2091 | default: // anything else | |
2092 | { | |
2093 | auto last_token = get_token_string(); | |
2094 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"), BasicJsonType())); | |
2095 | } | |
2096 | } | |
2097 | } | |
2098 | ||
2099 | /*! | |
2100 | @return whether array creation completed | |
2101 | */ | |
2102 | bool get_ubjson_array() | |
2103 | { | |
2104 | std::pair<std::size_t, char_int_type> size_and_type; | |
2105 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) | |
2106 | { | |
2107 | return false; | |
2108 | } | |
2109 | ||
2110 | if (size_and_type.first != string_t::npos) | |
2111 | { | |
2112 | if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) | |
2113 | { | |
2114 | return false; | |
2115 | } | |
2116 | ||
2117 | if (size_and_type.second != 0) | |
2118 | { | |
2119 | if (size_and_type.second != 'N') | |
2120 | { | |
2121 | for (std::size_t i = 0; i < size_and_type.first; ++i) | |
2122 | { | |
2123 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) | |
2124 | { | |
2125 | return false; | |
2126 | } | |
2127 | } | |
2128 | } | |
2129 | } | |
2130 | else | |
2131 | { | |
2132 | for (std::size_t i = 0; i < size_and_type.first; ++i) | |
2133 | { | |
2134 | if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) | |
2135 | { | |
2136 | return false; | |
2137 | } | |
2138 | } | |
2139 | } | |
2140 | } | |
2141 | else | |
2142 | { | |
2143 | if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) | |
2144 | { | |
2145 | return false; | |
2146 | } | |
2147 | ||
2148 | while (current != ']') | |
2149 | { | |
2150 | if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false))) | |
2151 | { | |
2152 | return false; | |
2153 | } | |
2154 | get_ignore_noop(); | |
2155 | } | |
2156 | } | |
2157 | ||
2158 | return sax->end_array(); | |
2159 | } | |
2160 | ||
2161 | /*! | |
2162 | @return whether object creation completed | |
2163 | */ | |
2164 | bool get_ubjson_object() | |
2165 | { | |
2166 | std::pair<std::size_t, char_int_type> size_and_type; | |
2167 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) | |
2168 | { | |
2169 | return false; | |
2170 | } | |
2171 | ||
2172 | string_t key; | |
2173 | if (size_and_type.first != string_t::npos) | |
2174 | { | |
2175 | if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) | |
2176 | { | |
2177 | return false; | |
2178 | } | |
2179 | ||
2180 | if (size_and_type.second != 0) | |
2181 | { | |
2182 | for (std::size_t i = 0; i < size_and_type.first; ++i) | |
2183 | { | |
2184 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) | |
2185 | { | |
2186 | return false; | |
2187 | } | |
2188 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) | |
2189 | { | |
2190 | return false; | |
2191 | } | |
2192 | key.clear(); | |
2193 | } | |
2194 | } | |
2195 | else | |
2196 | { | |
2197 | for (std::size_t i = 0; i < size_and_type.first; ++i) | |
2198 | { | |
2199 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) | |
2200 | { | |
2201 | return false; | |
2202 | } | |
2203 | if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) | |
2204 | { | |
2205 | return false; | |
2206 | } | |
2207 | key.clear(); | |
2208 | } | |
2209 | } | |
2210 | } | |
2211 | else | |
2212 | { | |
2213 | if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) | |
2214 | { | |
2215 | return false; | |
2216 | } | |
2217 | ||
2218 | while (current != '}') | |
2219 | { | |
2220 | if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key))) | |
2221 | { | |
2222 | return false; | |
2223 | } | |
2224 | if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) | |
2225 | { | |
2226 | return false; | |
2227 | } | |
2228 | get_ignore_noop(); | |
2229 | key.clear(); | |
2230 | } | |
2231 | } | |
2232 | ||
2233 | return sax->end_object(); | |
2234 | } | |
2235 | ||
2236 | // Note, no reader for UBJSON binary types is implemented because they do | |
2237 | // not exist | |
2238 | ||
2239 | bool get_ubjson_high_precision_number() | |
2240 | { | |
2241 | // get size of following number string | |
2242 | std::size_t size{}; | |
2243 | auto res = get_ubjson_size_value(size); | |
2244 | if (JSON_HEDLEY_UNLIKELY(!res)) | |
2245 | { | |
2246 | return res; | |
2247 | } | |
2248 | ||
2249 | // get number string | |
2250 | std::vector<char> number_vector; | |
2251 | for (std::size_t i = 0; i < size; ++i) | |
2252 | { | |
2253 | get(); | |
2254 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number"))) | |
2255 | { | |
2256 | return false; | |
2257 | } | |
2258 | number_vector.push_back(static_cast<char>(current)); | |
2259 | } | |
2260 | ||
2261 | // parse number string | |
2262 | using ia_type = decltype(detail::input_adapter(number_vector)); | |
2263 | auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false); | |
2264 | const auto result_number = number_lexer.scan(); | |
2265 | const auto number_string = number_lexer.get_token_string(); | |
2266 | const auto result_remainder = number_lexer.scan(); | |
2267 | ||
2268 | using token_type = typename detail::lexer_base<BasicJsonType>::token_type; | |
2269 | ||
2270 | if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input)) | |
2271 | { | |
2272 | return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType())); | |
2273 | } | |
2274 | ||
2275 | switch (result_number) | |
2276 | { | |
2277 | case token_type::value_integer: | |
2278 | return sax->number_integer(number_lexer.get_number_integer()); | |
2279 | case token_type::value_unsigned: | |
2280 | return sax->number_unsigned(number_lexer.get_number_unsigned()); | |
2281 | case token_type::value_float: | |
2282 | return sax->number_float(number_lexer.get_number_float(), std::move(number_string)); | |
2283 | case token_type::uninitialized: | |
2284 | case token_type::literal_true: | |
2285 | case token_type::literal_false: | |
2286 | case token_type::literal_null: | |
2287 | case token_type::value_string: | |
2288 | case token_type::begin_array: | |
2289 | case token_type::begin_object: | |
2290 | case token_type::end_array: | |
2291 | case token_type::end_object: | |
2292 | case token_type::name_separator: | |
2293 | case token_type::value_separator: | |
2294 | case token_type::parse_error: | |
2295 | case token_type::end_of_input: | |
2296 | case token_type::literal_or_value: | |
2297 | default: | |
2298 | return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType())); | |
2299 | } | |
2300 | } | |
2301 | ||
2302 | /////////////////////// | |
2303 | // Utility functions // | |
2304 | /////////////////////// | |
2305 | ||
2306 | /*! | |
2307 | @brief get next character from the input | |
2308 | ||
2309 | This function provides the interface to the used input adapter. It does | |
2310 | not throw in case the input reached EOF, but returns a -'ve valued | |
2311 | `std::char_traits<char_type>::eof()` in that case. | |
2312 | ||
2313 | @return character read from the input | |
2314 | */ | |
2315 | char_int_type get() | |
2316 | { | |
2317 | ++chars_read; | |
2318 | return current = ia.get_character(); | |
2319 | } | |
2320 | ||
2321 | /*! | |
2322 | @return character read from the input after ignoring all 'N' entries | |
2323 | */ | |
2324 | char_int_type get_ignore_noop() | |
2325 | { | |
2326 | do | |
2327 | { | |
2328 | get(); | |
2329 | } | |
2330 | while (current == 'N'); | |
2331 | ||
2332 | return current; | |
2333 | } | |
2334 | ||
2335 | /* | |
2336 | @brief read a number from the input | |
2337 | ||
2338 | @tparam NumberType the type of the number | |
2339 | @param[in] format the current format (for diagnostics) | |
2340 | @param[out] result number of type @a NumberType | |
2341 | ||
2342 | @return whether conversion completed | |
2343 | ||
2344 | @note This function needs to respect the system's endianness, because | |
2345 | bytes in CBOR, MessagePack, and UBJSON are stored in network order | |
2346 | (big endian) and therefore need reordering on little endian systems. | |
2347 | */ | |
2348 | template<typename NumberType, bool InputIsLittleEndian = false> | |
2349 | bool get_number(const input_format_t format, NumberType& result) | |
2350 | { | |
2351 | // step 1: read input into array with system's byte order | |
2352 | std::array<std::uint8_t, sizeof(NumberType)> vec{}; | |
2353 | for (std::size_t i = 0; i < sizeof(NumberType); ++i) | |
2354 | { | |
2355 | get(); | |
2356 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number"))) | |
2357 | { | |
2358 | return false; | |
2359 | } | |
2360 | ||
2361 | // reverse byte order prior to conversion if necessary | |
2362 | if (is_little_endian != InputIsLittleEndian) | |
2363 | { | |
2364 | vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current); | |
2365 | } | |
2366 | else | |
2367 | { | |
2368 | vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE | |
2369 | } | |
2370 | } | |
2371 | ||
2372 | // step 2: convert array into number of type T and return | |
2373 | std::memcpy(&result, vec.data(), sizeof(NumberType)); | |
2374 | return true; | |
2375 | } | |
2376 | ||
2377 | /*! | |
2378 | @brief create a string by reading characters from the input | |
2379 | ||
2380 | @tparam NumberType the type of the number | |
2381 | @param[in] format the current format (for diagnostics) | |
2382 | @param[in] len number of characters to read | |
2383 | @param[out] result string created by reading @a len bytes | |
2384 | ||
2385 | @return whether string creation completed | |
2386 | ||
2387 | @note We can not reserve @a len bytes for the result, because @a len | |
2388 | may be too large. Usually, @ref unexpect_eof() detects the end of | |
2389 | the input before we run out of string memory. | |
2390 | */ | |
2391 | template<typename NumberType> | |
2392 | bool get_string(const input_format_t format, | |
2393 | const NumberType len, | |
2394 | string_t& result) | |
2395 | { | |
2396 | bool success = true; | |
2397 | for (NumberType i = 0; i < len; i++) | |
2398 | { | |
2399 | get(); | |
2400 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string"))) | |
2401 | { | |
2402 | success = false; | |
2403 | break; | |
2404 | } | |
2405 | result.push_back(static_cast<typename string_t::value_type>(current)); | |
2406 | } | |
2407 | return success; | |
2408 | } | |
2409 | ||
2410 | /*! | |
2411 | @brief create a byte array by reading bytes from the input | |
2412 | ||
2413 | @tparam NumberType the type of the number | |
2414 | @param[in] format the current format (for diagnostics) | |
2415 | @param[in] len number of bytes to read | |
2416 | @param[out] result byte array created by reading @a len bytes | |
2417 | ||
2418 | @return whether byte array creation completed | |
2419 | ||
2420 | @note We can not reserve @a len bytes for the result, because @a len | |
2421 | may be too large. Usually, @ref unexpect_eof() detects the end of | |
2422 | the input before we run out of memory. | |
2423 | */ | |
2424 | template<typename NumberType> | |
2425 | bool get_binary(const input_format_t format, | |
2426 | const NumberType len, | |
2427 | binary_t& result) | |
2428 | { | |
2429 | bool success = true; | |
2430 | for (NumberType i = 0; i < len; i++) | |
2431 | { | |
2432 | get(); | |
2433 | if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary"))) | |
2434 | { | |
2435 | success = false; | |
2436 | break; | |
2437 | } | |
2438 | result.push_back(static_cast<std::uint8_t>(current)); | |
2439 | } | |
2440 | return success; | |
2441 | } | |
2442 | ||
2443 | /*! | |
2444 | @param[in] format the current format (for diagnostics) | |
2445 | @param[in] context further context information (for diagnostics) | |
2446 | @return whether the last read character is not EOF | |
2447 | */ | |
2448 | JSON_HEDLEY_NON_NULL(3) | |
2449 | bool unexpect_eof(const input_format_t format, const char* context) const | |
2450 | { | |
2451 | if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof())) | |
2452 | { | |
2453 | return sax->parse_error(chars_read, "<end of file>", | |
2454 | parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), BasicJsonType())); | |
2455 | } | |
2456 | return true; | |
2457 | } | |
2458 | ||
2459 | /*! | |
2460 | @return a string representation of the last read byte | |
2461 | */ | |
2462 | std::string get_token_string() const | |
2463 | { | |
2464 | std::array<char, 3> cr{{}}; | |
2465 | static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) | |
2466 | return std::string{cr.data()}; | |
2467 | } | |
2468 | ||
2469 | /*! | |
2470 | @param[in] format the current format | |
2471 | @param[in] detail a detailed error message | |
2472 | @param[in] context further context information | |
2473 | @return a message string to use in the parse_error exceptions | |
2474 | */ | |
2475 | std::string exception_message(const input_format_t format, | |
2476 | const std::string& detail, | |
2477 | const std::string& context) const | |
2478 | { | |
2479 | std::string error_msg = "syntax error while parsing "; | |
2480 | ||
2481 | switch (format) | |
2482 | { | |
2483 | case input_format_t::cbor: | |
2484 | error_msg += "CBOR"; | |
2485 | break; | |
2486 | ||
2487 | case input_format_t::msgpack: | |
2488 | error_msg += "MessagePack"; | |
2489 | break; | |
2490 | ||
2491 | case input_format_t::ubjson: | |
2492 | error_msg += "UBJSON"; | |
2493 | break; | |
2494 | ||
2495 | case input_format_t::bson: | |
2496 | error_msg += "BSON"; | |
2497 | break; | |
2498 | ||
2499 | case input_format_t::json: // LCOV_EXCL_LINE | |
2500 | default: // LCOV_EXCL_LINE | |
2501 | JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE | |
2502 | } | |
2503 | ||
2504 | return error_msg + " " + context + ": " + detail; | |
2505 | } | |
2506 | ||
2507 | private: | |
2508 | /// input adapter | |
2509 | InputAdapterType ia; | |
2510 | ||
2511 | /// the current character | |
2512 | char_int_type current = std::char_traits<char_type>::eof(); | |
2513 | ||
2514 | /// the number of characters read | |
2515 | std::size_t chars_read = 0; | |
2516 | ||
2517 | /// whether we can assume little endianness | |
2518 | const bool is_little_endian = little_endianness(); | |
2519 | ||
2520 | /// the SAX parser | |
2521 | json_sax_t* sax = nullptr; | |
2522 | }; | |
2523 | } // namespace detail | |
2524 | } // namespace nlohmann |