]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/third-party/fbson/FbsonJsonParser.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
7 * This file defines FbsonJsonParserT (template) and FbsonJsonParser.
9 * FbsonJsonParserT is a template class which implements a JSON parser.
10 * FbsonJsonParserT parses JSON text, and serialize it to FBSON binary format
11 * by using FbsonWriterT object. By default, FbsonJsonParserT creates a new
12 * FbsonWriterT object with an output stream object. However, you can also
13 * pass in your FbsonWriterT or any stream object that implements some basic
14 * interface of std::ostream (see FbsonStream.h).
16 * FbsonJsonParser specializes FbsonJsonParserT with FbsonOutStream type (see
17 * FbsonStream.h). So unless you want to provide own a different output stream
18 * type, use FbsonJsonParser object.
21 * FbsonJsonParserT parses JSON string, and directly serializes into FBSON
22 * packed bytes. There are three ways to parse a JSON string: (1) using
23 * c-string, (2) using string with len, (3) using std::istream object. You can
24 * use custome streambuf to redirect output. FbsonOutBuffer is a streambuf used
25 * internally if the input is raw character buffer.
27 * You can reuse an FbsonJsonParserT object to parse/serialize multiple JSON
28 * strings, and the previous FBSON will be overwritten.
30 * If parsing fails (returned false), the error code will be set to one of
31 * FbsonErrType, and can be retrieved by calling getErrorCode().
33 * ** External dictionary **
34 * During parsing a JSON string, you can pass a callback function to map a key
35 * string to an id, and store the dictionary id in FBSON to save space. The
36 * purpose of using an external dictionary is more towards a collection of
37 * documents (which has common keys) rather than a single document, so that
38 * space saving will be significant.
41 * Note: FBSON serialization doesn't assume endianness of the server. However
42 * you will need to ensure that the endianness at the reader side is the same
43 * as that at the writer side (if they are on different machines). Otherwise,
44 * proper conversion is needed when a number value is returned to the
47 * @author Tian Xia <tianx@fb.com>
54 #include "FbsonDocument.h"
55 #include "FbsonWriter.h"
59 const char* const kJsonDelim
= " ,]}\t\r\n";
60 const char* const kWhiteSpace
= " \t\n\r";
65 enum class FbsonErrType
{
88 * Template FbsonJsonParserT
90 template <class OS_TYPE
>
91 class FbsonJsonParserT
{
93 FbsonJsonParserT() : err_(FbsonErrType::E_NONE
) {}
95 explicit FbsonJsonParserT(OS_TYPE
& os
)
96 : writer_(os
), err_(FbsonErrType::E_NONE
) {}
98 // parse a UTF-8 JSON string
99 bool parse(const std::string
& str
, hDictInsert handler
= nullptr) {
100 return parse(str
.c_str(), (unsigned int)str
.size(), handler
);
103 // parse a UTF-8 JSON c-style string (NULL terminated)
104 bool parse(const char* c_str
, hDictInsert handler
= nullptr) {
105 return parse(c_str
, (unsigned int)strlen(c_str
), handler
);
108 // parse a UTF-8 JSON string with length
109 bool parse(const char* pch
, unsigned int len
, hDictInsert handler
= nullptr) {
110 if (!pch
|| len
== 0) {
111 err_
= FbsonErrType::E_EMPTY_STR
;
115 FbsonInBuffer
sb(pch
, len
);
116 std::istream
in(&sb
);
117 return parse(in
, handler
);
120 // parse UTF-8 JSON text from an input stream
121 bool parse(std::istream
& in
, hDictInsert handler
= nullptr) {
124 // reset output stream
129 if (in
.peek() == '{') {
131 res
= parseObject(in
, handler
);
132 } else if (in
.peek() == '[') {
134 res
= parseArray(in
, handler
);
136 err_
= FbsonErrType::E_INVALID_DOCU
;
140 if (res
&& !in
.eof()) {
141 err_
= FbsonErrType::E_INVALID_DOCU
;
148 FbsonWriterT
<OS_TYPE
>& getWriter() { return writer_
; }
150 FbsonErrType
getErrorCode() { return err_
; }
153 void clearErr() { err_
= FbsonErrType::E_NONE
; }
156 // parse a JSON object (comma-separated list of key-value pairs)
157 bool parseObject(std::istream
& in
, hDictInsert handler
) {
158 if (!writer_
.writeStartObject()) {
159 err_
= FbsonErrType::E_OUTPUT_FAIL
;
165 if (in
.peek() == '}') {
168 if (!writer_
.writeEndObject()) {
169 err_
= FbsonErrType::E_OUTPUT_FAIL
;
176 if (in
.get() != '"') {
177 err_
= FbsonErrType::E_INVALID_KEY
;
181 if (!parseKVPair(in
, handler
)) {
190 if (!writer_
.writeEndObject()) {
191 err_
= FbsonErrType::E_OUTPUT_FAIL
;
195 } else if (ch
!= ',') {
196 err_
= FbsonErrType::E_INVALID_OBJ
;
203 err_
= FbsonErrType::E_INVALID_OBJ
;
207 // parse a JSON array (comma-separated list of values)
208 bool parseArray(std::istream
& in
, hDictInsert handler
) {
209 if (!writer_
.writeStartArray()) {
210 err_
= FbsonErrType::E_OUTPUT_FAIL
;
216 if (in
.peek() == ']') {
219 if (!writer_
.writeEndArray()) {
220 err_
= FbsonErrType::E_OUTPUT_FAIL
;
227 if (!parseValue(in
, handler
)) {
236 if (!writer_
.writeEndArray()) {
237 err_
= FbsonErrType::E_OUTPUT_FAIL
;
241 } else if (ch
!= ',') {
242 err_
= FbsonErrType::E_INVALID_ARR
;
249 err_
= FbsonErrType::E_INVALID_ARR
;
253 // parse a key-value pair, separated by ":"
254 bool parseKVPair(std::istream
& in
, hDictInsert handler
) {
255 if (parseKey(in
, handler
) && parseValue(in
, handler
)) {
262 // parse a key (must be string)
263 bool parseKey(std::istream
& in
, hDictInsert handler
) {
264 char key
[FbsonKeyValue::sMaxKeyLen
];
266 while (in
.good() && in
.peek() != '"' && i
< FbsonKeyValue::sMaxKeyLen
) {
270 if (!in
.good() || in
.peek() != '"' || i
== 0) {
271 err_
= FbsonErrType::E_INVALID_KEY
;
275 in
.ignore(); // discard '"'
279 key_id
= handler(key
, i
);
283 writer_
.writeKey(key
, i
);
285 writer_
.writeKey(key_id
);
290 if (in
.get() != ':') {
291 err_
= FbsonErrType::E_INVALID_OBJ
;
299 bool parseValue(std::istream
& in
, hDictInsert handler
) {
320 res
= parseFalse(in
);
325 res
= parseString(in
);
330 res
= parseObject(in
, handler
);
335 res
= parseArray(in
, handler
);
339 res
= parseNumber(in
);
348 bool parseNull(std::istream
& in
) {
349 if (tolower(in
.get()) == 'u' && tolower(in
.get()) == 'l' &&
350 tolower(in
.get()) == 'l') {
355 err_
= FbsonErrType::E_INVALID_VALUE
;
360 bool parseTrue(std::istream
& in
) {
361 if (tolower(in
.get()) == 'r' && tolower(in
.get()) == 'u' &&
362 tolower(in
.get()) == 'e') {
363 writer_
.writeBool(true);
367 err_
= FbsonErrType::E_INVALID_VALUE
;
372 bool parseFalse(std::istream
& in
) {
373 if (tolower(in
.get()) == 'a' && tolower(in
.get()) == 'l' &&
374 tolower(in
.get()) == 's' && tolower(in
.get()) == 'e') {
375 writer_
.writeBool(false);
379 err_
= FbsonErrType::E_INVALID_VALUE
;
384 bool parseString(std::istream
& in
) {
385 if (!writer_
.writeStartString()) {
386 err_
= FbsonErrType::E_OUTPUT_FAIL
;
390 bool escaped
= false;
391 char buffer
[4096]; // write 4KB at a time
395 if (ch
!= '"' || escaped
) {
396 buffer
[nread
++] = ch
;
399 if (!writer_
.writeString(buffer
, nread
)) {
400 err_
= FbsonErrType::E_OUTPUT_FAIL
;
406 if (ch
== '\\' || escaped
) {
410 // write all remaining bytes in the buffer
412 if (!writer_
.writeString(buffer
, nread
)) {
413 err_
= FbsonErrType::E_OUTPUT_FAIL
;
417 // end writing string
418 if (!writer_
.writeEndString()) {
419 err_
= FbsonErrType::E_OUTPUT_FAIL
;
426 err_
= FbsonErrType::E_INVALID_STR
;
431 // Number format can be hex, octal, or decimal (including float).
432 // Only decimal can have (+/-) sign prefix.
433 bool parseNumber(std::istream
& in
) {
439 if (in
.peek() == 'x' || in
.peek() == 'X') {
442 } else if (in
.peek() == '.') {
444 ret
= parseDouble(in
, 0, 0, 1);
446 ret
= parseOctal(in
);
453 ret
= parseDecimal(in
, -1);
458 #if defined(__clang__)
459 [[clang::fallthrough
]];
460 #elif defined(__GNUC__) && __GNUC__ >= 7
461 [[gnu::fallthrough
]];
464 ret
= parseDecimal(in
, 1);
471 // parse a number in hex format
472 bool parseHex(std::istream
& in
) {
475 char ch
= tolower(in
.peek());
476 while (in
.good() && !strchr(kJsonDelim
, ch
) && (++num_digits
) <= 16) {
477 if (ch
>= '0' && ch
<= '9') {
478 val
= (val
<< 4) + (ch
- '0');
479 } else if (ch
>= 'a' && ch
<= 'f') {
480 val
= (val
<< 4) + (ch
- 'a' + 10);
481 } else { // unrecognized hex digit
482 err_
= FbsonErrType::E_INVALID_HEX
;
487 ch
= tolower(in
.peek());
491 if (num_digits
<= 2) {
492 size
= writer_
.writeInt8((int8_t)val
);
493 } else if (num_digits
<= 4) {
494 size
= writer_
.writeInt16((int16_t)val
);
495 } else if (num_digits
<= 8) {
496 size
= writer_
.writeInt32((int32_t)val
);
497 } else if (num_digits
<= 16) {
498 size
= writer_
.writeInt64(val
);
500 err_
= FbsonErrType::E_HEX_OVERFLOW
;
505 err_
= FbsonErrType::E_OUTPUT_FAIL
;
512 // parse a number in octal format
513 bool parseOctal(std::istream
& in
) {
516 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
517 if (ch
>= '0' && ch
<= '7') {
518 val
= val
* 8 + (ch
- '0');
520 err_
= FbsonErrType::E_INVALID_OCTAL
;
524 // check if the number overflows
526 err_
= FbsonErrType::E_OCTAL_OVERFLOW
;
535 if (val
<= std::numeric_limits
<int8_t>::max()) {
536 size
= writer_
.writeInt8((int8_t)val
);
537 } else if (val
<= std::numeric_limits
<int16_t>::max()) {
538 size
= writer_
.writeInt16((int16_t)val
);
539 } else if (val
<= std::numeric_limits
<int32_t>::max()) {
540 size
= writer_
.writeInt32((int32_t)val
);
541 } else { // val <= INT64_MAX
542 size
= writer_
.writeInt64(val
);
546 err_
= FbsonErrType::E_OUTPUT_FAIL
;
553 // parse a number in decimal (including float)
554 bool parseDecimal(std::istream
& in
, int sign
) {
559 while (in
.good() && (ch
= in
.peek()) == '0')
562 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
563 if (ch
>= '0' && ch
<= '9') {
564 val
= val
* 10 + (ch
- '0');
566 } else if (ch
== '.') {
567 // note we don't pop out '.'
568 return parseDouble(in
, static_cast<double>(val
), precision
, sign
);
570 err_
= FbsonErrType::E_INVALID_DECIMAL
;
576 // if the number overflows int64_t, first parse it as double iff we see a
577 // decimal point later. Otherwise, will treat it as overflow
578 if (val
< 0 && val
> std::numeric_limits
<int64_t>::min()) {
579 return parseDouble(in
, static_cast<double>(val
), precision
, sign
);
590 if (val
>= std::numeric_limits
<int8_t>::min() &&
591 val
<= std::numeric_limits
<int8_t>::max()) {
592 size
= writer_
.writeInt8((int8_t)val
);
593 } else if (val
>= std::numeric_limits
<int16_t>::min() &&
594 val
<= std::numeric_limits
<int16_t>::max()) {
595 size
= writer_
.writeInt16((int16_t)val
);
596 } else if (val
>= std::numeric_limits
<int32_t>::min() &&
597 val
<= std::numeric_limits
<int32_t>::max()) {
598 size
= writer_
.writeInt32((int32_t)val
);
599 } else { // val <= INT64_MAX
600 size
= writer_
.writeInt64(val
);
604 err_
= FbsonErrType::E_OUTPUT_FAIL
;
611 // parse IEEE745 double precision:
612 // Significand precision length - 15
613 // Maximum exponent value - 308
615 // "If a decimal string with at most 15 significant digits is converted to
616 // IEEE 754 double precision representation and then converted back to a
617 // string with the same number of significant digits, then the final string
618 // should match the original"
619 bool parseDouble(std::istream
& in
, double val
, int precision
, int sign
) {
620 int integ
= precision
;
622 bool is_frac
= false;
632 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
633 if (ch
>= '0' && ch
<= '9') {
634 if (precision
< 15) {
635 val
= val
* 10 + (ch
- '0');
642 } else if (!is_frac
) {
645 } else if (ch
== 'e' || ch
== 'E') {
648 if (!parseExponent(in
, exp2
)) {
653 // check if exponent overflows
654 if (exp
> 308 || exp
< -308) {
655 err_
= FbsonErrType::E_EXPONENT_OVERFLOW
;
668 err_
= FbsonErrType::E_DECIMAL_OVERFLOW
;
672 val
*= std::pow(10, exp
- frac
);
673 if (std::isnan(val
) || std::isinf(val
)) {
674 err_
= FbsonErrType::E_DOUBLE_OVERFLOW
;
682 if (writer_
.writeDouble(val
) == 0) {
683 err_
= FbsonErrType::E_OUTPUT_FAIL
;
690 // parse the exponent part of a double number
691 bool parseExponent(std::istream
& in
, int& exp
) {
698 } else if (ch
== '-') {
705 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
706 if (ch
>= '0' && ch
<= '9') {
707 exp
= exp
* 10 + (ch
- '0');
709 err_
= FbsonErrType::E_INVALID_EXPONENT
;
714 err_
= FbsonErrType::E_EXPONENT_OVERFLOW
;
729 void trim(std::istream
& in
) {
730 while (in
.good() && strchr(kWhiteSpace
, in
.peek())) {
736 FbsonWriterT
<OS_TYPE
> writer_
;
740 typedef FbsonJsonParserT
<FbsonOutStream
> FbsonJsonParser
;