]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/third-party/fbson/FbsonJsonParser.h
2 * Copyright (c) 2011-present, Facebook, Inc.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
12 * This file defines FbsonJsonParserT (template) and FbsonJsonParser.
14 * FbsonJsonParserT is a template class which implements a JSON parser.
15 * FbsonJsonParserT parses JSON text, and serialize it to FBSON binary format
16 * by using FbsonWriterT object. By default, FbsonJsonParserT creates a new
17 * FbsonWriterT object with an output stream object. However, you can also
18 * pass in your FbsonWriterT or any stream object that implements some basic
19 * interface of std::ostream (see FbsonStream.h).
21 * FbsonJsonParser specializes FbsonJsonParserT with FbsonOutStream type (see
22 * FbsonStream.h). So unless you want to provide own a different output stream
23 * type, use FbsonJsonParser object.
26 * FbsonJsonParserT parses JSON string, and directly serializes into FBSON
27 * packed bytes. There are three ways to parse a JSON string: (1) using
28 * c-string, (2) using string with len, (3) using std::istream object. You can
29 * use custome streambuf to redirect output. FbsonOutBuffer is a streambuf used
30 * internally if the input is raw character buffer.
32 * You can reuse an FbsonJsonParserT object to parse/serialize multiple JSON
33 * strings, and the previous FBSON will be overwritten.
35 * If parsing fails (returned false), the error code will be set to one of
36 * FbsonErrType, and can be retrieved by calling getErrorCode().
38 * ** External dictionary **
39 * During parsing a JSON string, you can pass a call-back function to map a key
40 * string to an id, and store the dictionary id in FBSON to save space. The
41 * purpose of using an external dictionary is more towards a collection of
42 * documents (which has common keys) rather than a single document, so that
43 * space saving will be significant.
46 * Note: FBSON serialization doesn't assume endianness of the server. However
47 * you will need to ensure that the endianness at the reader side is the same
48 * as that at the writer side (if they are on different machines). Otherwise,
49 * proper conversion is needed when a number value is returned to the
52 * @author Tian Xia <tianx@fb.com>
55 #ifndef FBSON_FBSONPARSER_H
56 #define FBSON_FBSONPARSER_H
60 #include "FbsonDocument.h"
61 #include "FbsonWriter.h"
65 const char* const kJsonDelim
= " ,]}\t\r\n";
66 const char* const kWhiteSpace
= " \t\n\r";
71 enum class FbsonErrType
{
94 * Template FbsonJsonParserT
96 template <class OS_TYPE
>
97 class FbsonJsonParserT
{
99 FbsonJsonParserT() : err_(FbsonErrType::E_NONE
) {}
101 explicit FbsonJsonParserT(OS_TYPE
& os
)
102 : writer_(os
), err_(FbsonErrType::E_NONE
) {}
104 // parse a UTF-8 JSON string
105 bool parse(const std::string
& str
, hDictInsert handler
= nullptr) {
106 return parse(str
.c_str(), (unsigned int)str
.size(), handler
);
109 // parse a UTF-8 JSON c-style string (NULL terminated)
110 bool parse(const char* c_str
, hDictInsert handler
= nullptr) {
111 return parse(c_str
, (unsigned int)strlen(c_str
), handler
);
114 // parse a UTF-8 JSON string with length
115 bool parse(const char* pch
, unsigned int len
, hDictInsert handler
= nullptr) {
116 if (!pch
|| len
== 0) {
117 err_
= FbsonErrType::E_EMPTY_STR
;
121 FbsonInBuffer
sb(pch
, len
);
122 std::istream
in(&sb
);
123 return parse(in
, handler
);
126 // parse UTF-8 JSON text from an input stream
127 bool parse(std::istream
& in
, hDictInsert handler
= nullptr) {
130 // reset output stream
135 if (in
.peek() == '{') {
137 res
= parseObject(in
, handler
);
138 } else if (in
.peek() == '[') {
140 res
= parseArray(in
, handler
);
142 err_
= FbsonErrType::E_INVALID_DOCU
;
146 if (res
&& !in
.eof()) {
147 err_
= FbsonErrType::E_INVALID_DOCU
;
154 FbsonWriterT
<OS_TYPE
>& getWriter() { return writer_
; }
156 FbsonErrType
getErrorCode() { return err_
; }
159 void clearErr() { err_
= FbsonErrType::E_NONE
; }
162 // parse a JSON object (comma-separated list of key-value pairs)
163 bool parseObject(std::istream
& in
, hDictInsert handler
) {
164 if (!writer_
.writeStartObject()) {
165 err_
= FbsonErrType::E_OUTPUT_FAIL
;
171 if (in
.peek() == '}') {
174 if (!writer_
.writeEndObject()) {
175 err_
= FbsonErrType::E_OUTPUT_FAIL
;
182 if (in
.get() != '"') {
183 err_
= FbsonErrType::E_INVALID_KEY
;
187 if (!parseKVPair(in
, handler
)) {
196 if (!writer_
.writeEndObject()) {
197 err_
= FbsonErrType::E_OUTPUT_FAIL
;
201 } else if (ch
!= ',') {
202 err_
= FbsonErrType::E_INVALID_OBJ
;
209 err_
= FbsonErrType::E_INVALID_OBJ
;
213 // parse a JSON array (comma-separated list of values)
214 bool parseArray(std::istream
& in
, hDictInsert handler
) {
215 if (!writer_
.writeStartArray()) {
216 err_
= FbsonErrType::E_OUTPUT_FAIL
;
222 if (in
.peek() == ']') {
225 if (!writer_
.writeEndArray()) {
226 err_
= FbsonErrType::E_OUTPUT_FAIL
;
233 if (!parseValue(in
, handler
)) {
242 if (!writer_
.writeEndArray()) {
243 err_
= FbsonErrType::E_OUTPUT_FAIL
;
247 } else if (ch
!= ',') {
248 err_
= FbsonErrType::E_INVALID_ARR
;
255 err_
= FbsonErrType::E_INVALID_ARR
;
259 // parse a key-value pair, separated by ":"
260 bool parseKVPair(std::istream
& in
, hDictInsert handler
) {
261 if (parseKey(in
, handler
) && parseValue(in
, handler
)) {
268 // parse a key (must be string)
269 bool parseKey(std::istream
& in
, hDictInsert handler
) {
270 char key
[FbsonKeyValue::sMaxKeyLen
];
272 while (in
.good() && in
.peek() != '"' && i
< FbsonKeyValue::sMaxKeyLen
) {
276 if (!in
.good() || in
.peek() != '"' || i
== 0) {
277 err_
= FbsonErrType::E_INVALID_KEY
;
281 in
.ignore(); // discard '"'
285 key_id
= handler(key
, i
);
289 writer_
.writeKey(key
, i
);
291 writer_
.writeKey(key_id
);
296 if (in
.get() != ':') {
297 err_
= FbsonErrType::E_INVALID_OBJ
;
305 bool parseValue(std::istream
& in
, hDictInsert handler
) {
326 res
= parseFalse(in
);
331 res
= parseString(in
);
336 res
= parseObject(in
, handler
);
341 res
= parseArray(in
, handler
);
345 res
= parseNumber(in
);
354 bool parseNull(std::istream
& in
) {
355 if (tolower(in
.get()) == 'u' && tolower(in
.get()) == 'l' &&
356 tolower(in
.get()) == 'l') {
361 err_
= FbsonErrType::E_INVALID_VALUE
;
366 bool parseTrue(std::istream
& in
) {
367 if (tolower(in
.get()) == 'r' && tolower(in
.get()) == 'u' &&
368 tolower(in
.get()) == 'e') {
369 writer_
.writeBool(true);
373 err_
= FbsonErrType::E_INVALID_VALUE
;
378 bool parseFalse(std::istream
& in
) {
379 if (tolower(in
.get()) == 'a' && tolower(in
.get()) == 'l' &&
380 tolower(in
.get()) == 's' && tolower(in
.get()) == 'e') {
381 writer_
.writeBool(false);
385 err_
= FbsonErrType::E_INVALID_VALUE
;
390 bool parseString(std::istream
& in
) {
391 if (!writer_
.writeStartString()) {
392 err_
= FbsonErrType::E_OUTPUT_FAIL
;
396 bool escaped
= false;
397 char buffer
[4096]; // write 4KB at a time
401 if (ch
!= '"' || escaped
) {
402 buffer
[nread
++] = ch
;
405 if (!writer_
.writeString(buffer
, nread
)) {
406 err_
= FbsonErrType::E_OUTPUT_FAIL
;
412 if (ch
== '\\' || escaped
) {
416 // write all remaining bytes in the buffer
418 if (!writer_
.writeString(buffer
, nread
)) {
419 err_
= FbsonErrType::E_OUTPUT_FAIL
;
423 // end writing string
424 if (!writer_
.writeEndString()) {
425 err_
= FbsonErrType::E_OUTPUT_FAIL
;
432 err_
= FbsonErrType::E_INVALID_STR
;
437 // Number format can be hex, octal, or decimal (including float).
438 // Only decimal can have (+/-) sign prefix.
439 bool parseNumber(std::istream
& in
) {
445 if (in
.peek() == 'x' || in
.peek() == 'X') {
448 } else if (in
.peek() == '.') {
450 ret
= parseDouble(in
, 0, 0, 1);
452 ret
= parseOctal(in
);
459 ret
= parseDecimal(in
, -1);
466 ret
= parseDecimal(in
, 1);
473 // parse a number in hex format
474 bool parseHex(std::istream
& in
) {
477 char ch
= tolower(in
.peek());
478 while (in
.good() && !strchr(kJsonDelim
, ch
) && (++num_digits
) <= 16) {
479 if (ch
>= '0' && ch
<= '9') {
480 val
= (val
<< 4) + (ch
- '0');
481 } else if (ch
>= 'a' && ch
<= 'f') {
482 val
= (val
<< 4) + (ch
- 'a' + 10);
483 } else { // unrecognized hex digit
484 err_
= FbsonErrType::E_INVALID_HEX
;
489 ch
= tolower(in
.peek());
493 if (num_digits
<= 2) {
494 size
= writer_
.writeInt8((int8_t)val
);
495 } else if (num_digits
<= 4) {
496 size
= writer_
.writeInt16((int16_t)val
);
497 } else if (num_digits
<= 8) {
498 size
= writer_
.writeInt32((int32_t)val
);
499 } else if (num_digits
<= 16) {
500 size
= writer_
.writeInt64(val
);
502 err_
= FbsonErrType::E_HEX_OVERFLOW
;
507 err_
= FbsonErrType::E_OUTPUT_FAIL
;
514 // parse a number in octal format
515 bool parseOctal(std::istream
& in
) {
518 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
519 if (ch
>= '0' && ch
<= '7') {
520 val
= val
* 8 + (ch
- '0');
522 err_
= FbsonErrType::E_INVALID_OCTAL
;
526 // check if the number overflows
528 err_
= FbsonErrType::E_OCTAL_OVERFLOW
;
537 if (val
<= std::numeric_limits
<int8_t>::max()) {
538 size
= writer_
.writeInt8((int8_t)val
);
539 } else if (val
<= std::numeric_limits
<int16_t>::max()) {
540 size
= writer_
.writeInt16((int16_t)val
);
541 } else if (val
<= std::numeric_limits
<int32_t>::max()) {
542 size
= writer_
.writeInt32((int32_t)val
);
543 } else { // val <= INT64_MAX
544 size
= writer_
.writeInt64(val
);
548 err_
= FbsonErrType::E_OUTPUT_FAIL
;
555 // parse a number in decimal (including float)
556 bool parseDecimal(std::istream
& in
, int sign
) {
561 while (in
.good() && (ch
= in
.peek()) == '0')
564 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
565 if (ch
>= '0' && ch
<= '9') {
566 val
= val
* 10 + (ch
- '0');
568 } else if (ch
== '.') {
569 // note we don't pop out '.'
570 return parseDouble(in
, static_cast<double>(val
), precision
, sign
);
572 err_
= FbsonErrType::E_INVALID_DECIMAL
;
578 // if the number overflows int64_t, first parse it as double iff we see a
579 // decimal point later. Otherwise, will treat it as overflow
580 if (val
< 0 && val
> std::numeric_limits
<int64_t>::min()) {
581 return parseDouble(in
, static_cast<double>(val
), precision
, sign
);
592 if (val
>= std::numeric_limits
<int8_t>::min() &&
593 val
<= std::numeric_limits
<int8_t>::max()) {
594 size
= writer_
.writeInt8((int8_t)val
);
595 } else if (val
>= std::numeric_limits
<int16_t>::min() &&
596 val
<= std::numeric_limits
<int16_t>::max()) {
597 size
= writer_
.writeInt16((int16_t)val
);
598 } else if (val
>= std::numeric_limits
<int32_t>::min() &&
599 val
<= std::numeric_limits
<int32_t>::max()) {
600 size
= writer_
.writeInt32((int32_t)val
);
601 } else { // val <= INT64_MAX
602 size
= writer_
.writeInt64(val
);
606 err_
= FbsonErrType::E_OUTPUT_FAIL
;
613 // parse IEEE745 double precision:
614 // Significand precision length - 15
615 // Maximum exponent value - 308
617 // "If a decimal string with at most 15 significant digits is converted to
618 // IEEE 754 double precision representation and then converted back to a
619 // string with the same number of significant digits, then the final string
620 // should match the original"
621 bool parseDouble(std::istream
& in
, double val
, int precision
, int sign
) {
622 int integ
= precision
;
624 bool is_frac
= false;
634 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
635 if (ch
>= '0' && ch
<= '9') {
636 if (precision
< 15) {
637 val
= val
* 10 + (ch
- '0');
644 } else if (!is_frac
) {
647 } else if (ch
== 'e' || ch
== 'E') {
650 if (!parseExponent(in
, exp2
)) {
655 // check if exponent overflows
656 if (exp
> 308 || exp
< -308) {
657 err_
= FbsonErrType::E_EXPONENT_OVERFLOW
;
670 err_
= FbsonErrType::E_DECIMAL_OVERFLOW
;
674 val
*= std::pow(10, exp
- frac
);
675 if (std::isnan(val
) || std::isinf(val
)) {
676 err_
= FbsonErrType::E_DOUBLE_OVERFLOW
;
684 if (writer_
.writeDouble(val
) == 0) {
685 err_
= FbsonErrType::E_OUTPUT_FAIL
;
692 // parse the exponent part of a double number
693 bool parseExponent(std::istream
& in
, int& exp
) {
700 } else if (ch
== '-') {
707 while (in
.good() && !strchr(kJsonDelim
, ch
)) {
708 if (ch
>= '0' && ch
<= '9') {
709 exp
= exp
* 10 + (ch
- '0');
711 err_
= FbsonErrType::E_INVALID_EXPONENT
;
716 err_
= FbsonErrType::E_EXPONENT_OVERFLOW
;
731 void trim(std::istream
& in
) {
732 while (in
.good() && strchr(kWhiteSpace
, in
.peek())) {
738 FbsonWriterT
<OS_TYPE
> writer_
;
742 typedef FbsonJsonParserT
<FbsonOutStream
> FbsonJsonParser
;
746 #endif // FBSON_FBSONPARSER_H