]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/third-party/fbson/FbsonDocument.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
7 * This header defines FbsonDocument, FbsonKeyValue, and various value classes
8 * which are derived from FbsonValue, and a forward iterator for container
9 * values - essentially everything that is related to FBSON binary data
12 * Implementation notes:
14 * None of the classes in this header file can be instantiated directly (i.e.
15 * you cannot create a FbsonKeyValue or FbsonValue object - all constructors
16 * are declared non-public). We use the classes as wrappers on the packed FBSON
17 * bytes (serialized), and cast the classes (types) to the underlying packed
20 * For the same reason, we cannot define any FBSON value class to be virtual,
21 * since we never call constructors, and will not instantiate vtbl and vptrs.
23 * Therefore, the classes are defined as packed structures (i.e. no data
24 * alignment and padding), and the private member variables of the classes are
25 * defined precisely in the same order as the FBSON spec. This ensures we
26 * access the packed FBSON bytes correctly.
28 * The packed structures are highly optimized for in-place operations with low
29 * overhead. The reads (and in-place writes) are performed directly on packed
30 * bytes. There is no memory allocation at all at runtime.
32 * For updates/writes of values that will expand the original FBSON size, the
33 * write will fail, and the caller needs to handle buffer increase.
36 * Both ObjectVal class and ArrayVal class have iterator type that you can use
37 * to declare an iterator on a container object to go through the key-value
38 * pairs or value list. The iterator has both non-const and const types.
40 * Note: iterators are forward direction only.
43 * Querying into containers is through the member functions find (for key/value
44 * pairs) and get (for array elements), and is in streaming style. We don't
45 * need to read/scan the whole FBSON packed bytes in order to return results.
46 * Once the key/index is found, we will stop search. You can use text to query
47 * both objects and array (for array, text will be converted to integer index),
48 * and use index to retrieve from array. Array index is 0-based.
50 * ** External dictionary **
51 * During query processing, you can also pass a callback function, so the
52 * search will first try to check if the key string exists in the dictionary.
53 * If so, search will be based on the id instead of the key string.
55 * @author Tian Xia <tianx@fb.com>
70 // forward declaration
75 * FbsonDocument is the main object that accesses and queries FBSON packed
76 * bytes. NOTE: FbsonDocument only allows object container as the top level
77 * FBSON value. However, you can use the static method "createValue" to get any
78 * FbsonValue object from the packed bytes.
80 * FbsonDocument object also dereferences to an object container value
81 * (ObjectVal) once FBSON is loaded.
84 * FbsonDocument is usable after loading packed bytes (memory location) into
85 * the object. We only need the header and first few bytes of the payload after
86 * header to verify the FBSON.
88 * Note: creating an FbsonDocument (through createDocument) does not allocate
89 * any memory. The document object is an efficient wrapper on the packed bytes
90 * which is accessed directly.
93 * Query is through dereferencing into ObjectVal.
97 // create an FbsonDocument object from FBSON packed bytes
98 static FbsonDocument
* createDocument(const char* pb
, uint32_t size
);
100 // create an FbsonValue from FBSON packed bytes
101 static FbsonValue
* createValue(const char* pb
, uint32_t size
);
103 uint8_t version() { return header_
.ver_
; }
105 FbsonValue
* getValue() { return ((FbsonValue
*)payload_
); }
107 ObjectVal
* operator->() { return ((ObjectVal
*)payload_
); }
109 const ObjectVal
* operator->() const { return ((const ObjectVal
*)payload_
); }
113 * FbsonHeader class defines FBSON header (internal to FbsonDocument).
115 * Currently it only contains version information (1-byte). We may expand the
116 * header to include checksum of the FBSON binary for more security.
126 FbsonDocument(const FbsonDocument
&) = delete;
127 FbsonDocument
& operator=(const FbsonDocument
&) = delete;
131 * FbsonFwdIteratorT implements FBSON's iterator template.
133 * Note: it is an FORWARD iterator only due to the design of FBSON format.
135 template <class Iter_Type
, class Cont_Type
>
136 class FbsonFwdIteratorT
{
137 typedef Iter_Type iterator
;
138 typedef typename
std::iterator_traits
<Iter_Type
>::pointer pointer
;
139 typedef typename
std::iterator_traits
<Iter_Type
>::reference reference
;
142 explicit FbsonFwdIteratorT(const iterator
& i
) : current_(i
) {}
144 // allow non-const to const iterator conversion (same container type)
145 template <class Iter_Ty
>
146 FbsonFwdIteratorT(const FbsonFwdIteratorT
<Iter_Ty
, Cont_Type
>& rhs
)
147 : current_(rhs
.base()) {}
149 bool operator==(const FbsonFwdIteratorT
& rhs
) const {
150 return (current_
== rhs
.current_
);
153 bool operator!=(const FbsonFwdIteratorT
& rhs
) const {
154 return !operator==(rhs
);
157 bool operator<(const FbsonFwdIteratorT
& rhs
) const {
158 return (current_
< rhs
.current_
);
161 bool operator>(const FbsonFwdIteratorT
& rhs
) const { return !operator<(rhs
); }
163 FbsonFwdIteratorT
& operator++() {
164 current_
= (iterator
)(((char*)current_
) + current_
->numPackedBytes());
168 FbsonFwdIteratorT
operator++(int) {
170 current_
= (iterator
)(((char*)current_
) + current_
->numPackedBytes());
174 explicit operator pointer() { return current_
; }
176 reference
operator*() const { return *current_
; }
178 pointer
operator->() const { return current_
; }
180 iterator
base() const { return current_
; }
186 typedef int (*hDictInsert
)(const char* key
, unsigned len
);
187 typedef int (*hDictFind
)(const char* key
, unsigned len
);
190 * FbsonType defines 10 primitive types and 2 container types, as described
193 * primitive_value ::=
194 * 0x00 //null value (0 byte)
195 * | 0x01 //boolean true (0 byte)
196 * | 0x02 //boolean false (0 byte)
197 * | 0x03 int8 //char/int8 (1 byte)
198 * | 0x04 int16 //int16 (2 bytes)
199 * | 0x05 int32 //int32 (4 bytes)
200 * | 0x06 int64 //int64 (8 bytes)
201 * | 0x07 double //floating point (8 bytes)
202 * | 0x08 string //variable length string
203 * | 0x09 binary //variable length binary
206 * 0x0A int32 key_value_list //object, int32 is the total bytes of the object
207 * | 0x0B int32 value_list //array, int32 is the total bytes of the array
209 enum class FbsonType
: char {
225 typedef std::underlying_type
<FbsonType
>::type FbsonTypeUnder
;
228 * FbsonKeyValue class defines FBSON key type, as described below.
231 * 0x00 int8 //1-byte dictionary id
232 * | int8 (byte*) //int8 (>0) is the size of the key string
234 * value ::= primitive_value | container
236 * FbsonKeyValue can be either an id mapping to the key string in an external
237 * dictionary, or it is the original key string. Whether to read an id or a
238 * string is decided by the first byte (size_).
240 * Note: a key object must be followed by a value object. Therefore, a key
241 * object implicitly refers to a key-value pair, and you can get the value
242 * object right after the key object. The function numPackedBytes hence
243 * indicates the total size of the key-value pair, so that we will be able go
244 * to next pair from the key.
246 * ** Dictionary size **
247 * By default, the dictionary size is 255 (1-byte). Users can define
248 * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
250 class FbsonKeyValue
{
252 #ifdef USE_LARGE_DICT
253 static const int sMaxKeyId
= 65535;
254 typedef uint16_t keyid_type
;
256 static const int sMaxKeyId
= 255;
257 typedef uint8_t keyid_type
;
258 #endif // #ifdef USE_LARGE_DICT
260 static const uint8_t sMaxKeyLen
= 64;
262 // size of the key. 0 indicates it is stored as id
263 uint8_t klen() const { return size_
; }
265 // get the key string. Note the string may not be null terminated.
266 const char* getKeyStr() const { return key_
.str_
; }
268 keyid_type
getKeyId() const { return key_
.id_
; }
270 unsigned int keyPackedBytes() const {
271 return size_
? (sizeof(size_
) + size_
)
272 : (sizeof(size_
) + sizeof(keyid_type
));
275 FbsonValue
* value() const {
276 return (FbsonValue
*)(((char*)this) + keyPackedBytes());
279 // size of the total packed bytes (key+value)
280 unsigned int numPackedBytes() const;
294 * FbsonValue is the base class of all FBSON types. It contains only one member
295 * variable - type info, which can be retrieved by member functions is[Type]()
300 static const uint32_t sMaxValueLen
= 1 << 24; // 16M
302 bool isNull() const { return (type_
== FbsonType::T_Null
); }
303 bool isTrue() const { return (type_
== FbsonType::T_True
); }
304 bool isFalse() const { return (type_
== FbsonType::T_False
); }
305 bool isInt8() const { return (type_
== FbsonType::T_Int8
); }
306 bool isInt16() const { return (type_
== FbsonType::T_Int16
); }
307 bool isInt32() const { return (type_
== FbsonType::T_Int32
); }
308 bool isInt64() const { return (type_
== FbsonType::T_Int64
); }
309 bool isDouble() const { return (type_
== FbsonType::T_Double
); }
310 bool isString() const { return (type_
== FbsonType::T_String
); }
311 bool isBinary() const { return (type_
== FbsonType::T_Binary
); }
312 bool isObject() const { return (type_
== FbsonType::T_Object
); }
313 bool isArray() const { return (type_
== FbsonType::T_Array
); }
315 FbsonType
type() const { return type_
; }
317 // size of the total packed bytes
318 unsigned int numPackedBytes() const;
320 // size of the value in bytes
321 unsigned int size() const;
323 // get the raw byte array of the value
324 const char* getValuePtr() const;
326 // find the FBSON value by a key path string (null terminated)
327 FbsonValue
* findPath(const char* key_path
,
328 const char* delim
= ".",
329 hDictFind handler
= nullptr) {
330 return findPath(key_path
, (unsigned int)strlen(key_path
), delim
, handler
);
333 // find the FBSON value by a key path string (with length)
334 FbsonValue
* findPath(const char* key_path
,
340 FbsonType type_
; // type info
346 * NumerValT is the template class (derived from FbsonValue) of all number
347 * types (integers and double).
350 class NumberValT
: public FbsonValue
{
352 T
val() const { return num_
; }
354 unsigned int numPackedBytes() const { return sizeof(FbsonValue
) + sizeof(T
); }
356 // catch all unknow specialization of the template class
357 bool setVal(T
/*value*/) { return false; }
365 typedef NumberValT
<int8_t> Int8Val
;
367 // override setVal for Int8Val
369 inline bool Int8Val::setVal(int8_t value
) {
378 typedef NumberValT
<int16_t> Int16Val
;
380 // override setVal for Int16Val
382 inline bool Int16Val::setVal(int16_t value
) {
391 typedef NumberValT
<int32_t> Int32Val
;
393 // override setVal for Int32Val
395 inline bool Int32Val::setVal(int32_t value
) {
404 typedef NumberValT
<int64_t> Int64Val
;
406 // override setVal for Int64Val
408 inline bool Int64Val::setVal(int64_t value
) {
417 typedef NumberValT
<double> DoubleVal
;
419 // override setVal for DoubleVal
421 inline bool DoubleVal::setVal(double value
) {
431 * BlobVal is the base class (derived from FbsonValue) for string and binary
432 * types. The size_ indicates the total bytes of the payload_.
434 class BlobVal
: public FbsonValue
{
436 // size of the blob payload only
437 unsigned int getBlobLen() const { return size_
; }
439 // return the blob as byte array
440 const char* getBlob() const { return payload_
; }
442 // size of the total packed bytes
443 unsigned int numPackedBytes() const {
444 return sizeof(FbsonValue
) + sizeof(size_
) + size_
;
451 // set new blob bytes
452 bool internalSetVal(const char* blob
, uint32_t blobSize
) {
453 // if we cannot fit the new blob, fail the operation
454 if (blobSize
> size_
) {
458 memcpy(payload_
, blob
, blobSize
);
460 // Set the reset of the bytes to 0. Note we cannot change the size_ of the
461 // current payload, as all values are packed.
462 memset(payload_
+ blobSize
, 0, size_
- blobSize
);
470 // Disable as this class can only be allocated dynamically
471 BlobVal(const BlobVal
&) = delete;
472 BlobVal
& operator=(const BlobVal
&) = delete;
478 class BinaryVal
: public BlobVal
{
480 bool setVal(const char* blob
, uint32_t blobSize
) {
485 return internalSetVal(blob
, blobSize
);
494 * Note: FBSON string may not be a c-string (NULL-terminated)
496 class StringVal
: public BlobVal
{
498 bool setVal(const char* str
, uint32_t blobSize
) {
503 return internalSetVal(str
, blobSize
);
511 * ContainerVal is the base class (derived from FbsonValue) for object and
512 * array types. The size_ indicates the total bytes of the payload_.
514 class ContainerVal
: public FbsonValue
{
516 // size of the container payload only
517 unsigned int getContainerSize() const { return size_
; }
519 // return the container payload as byte array
520 const char* getPayload() const { return payload_
; }
522 // size of the total packed bytes
523 unsigned int numPackedBytes() const {
524 return sizeof(FbsonValue
) + sizeof(size_
) + size_
;
533 ContainerVal(const ContainerVal
&) = delete;
534 ContainerVal
& operator=(const ContainerVal
&) = delete;
540 class ObjectVal
: public ContainerVal
{
542 // find the FBSON value by a key string (null terminated)
543 FbsonValue
* find(const char* key
, hDictFind handler
= nullptr) const {
547 return find(key
, (unsigned int)strlen(key
), handler
);
550 // find the FBSON value by a key string (with length)
551 FbsonValue
* find(const char* key
,
553 hDictFind handler
= nullptr) const {
558 if (handler
&& (key_id
= handler(key
, klen
)) >= 0) {
562 return internalFind(key
, klen
);
565 // find the FBSON value by a key dictionary ID
566 FbsonValue
* find(int key_id
) const {
567 if (key_id
< 0 || key_id
> FbsonKeyValue::sMaxKeyId
)
570 const char* pch
= payload_
;
571 const char* fence
= payload_
+ size_
;
573 while (pch
< fence
) {
574 FbsonKeyValue
* pkey
= (FbsonKeyValue
*)(pch
);
575 if (!pkey
->klen() && key_id
== pkey
->getKeyId()) {
576 return pkey
->value();
578 pch
+= pkey
->numPackedBytes();
581 assert(pch
== fence
);
586 typedef FbsonKeyValue value_type
;
587 typedef value_type
* pointer
;
588 typedef const value_type
* const_pointer
;
589 typedef FbsonFwdIteratorT
<pointer
, ObjectVal
> iterator
;
590 typedef FbsonFwdIteratorT
<const_pointer
, ObjectVal
> const_iterator
;
592 iterator
begin() { return iterator((pointer
)payload_
); }
594 const_iterator
begin() const { return const_iterator((pointer
)payload_
); }
596 iterator
end() { return iterator((pointer
)(payload_
+ size_
)); }
598 const_iterator
end() const {
599 return const_iterator((pointer
)(payload_
+ size_
));
603 FbsonValue
* internalFind(const char* key
, unsigned int klen
) const {
604 const char* pch
= payload_
;
605 const char* fence
= payload_
+ size_
;
607 while (pch
< fence
) {
608 FbsonKeyValue
* pkey
= (FbsonKeyValue
*)(pch
);
609 if (klen
== pkey
->klen() && strncmp(key
, pkey
->getKeyStr(), klen
) == 0) {
610 return pkey
->value();
612 pch
+= pkey
->numPackedBytes();
615 assert(pch
== fence
);
627 class ArrayVal
: public ContainerVal
{
629 // get the FBSON value at index
630 FbsonValue
* get(int idx
) const {
634 const char* pch
= payload_
;
635 const char* fence
= payload_
+ size_
;
637 while (pch
< fence
&& idx
-- > 0)
638 pch
+= ((FbsonValue
*)pch
)->numPackedBytes();
641 return (FbsonValue
*)pch
;
643 assert(pch
== fence
);
648 // Get number of elements in array
649 unsigned int numElem() const {
650 const char* pch
= payload_
;
651 const char* fence
= payload_
+ size_
;
653 unsigned int num
= 0;
654 while (pch
< fence
) {
656 pch
+= ((FbsonValue
*)pch
)->numPackedBytes();
659 assert(pch
== fence
);
664 typedef FbsonValue value_type
;
665 typedef value_type
* pointer
;
666 typedef const value_type
* const_pointer
;
667 typedef FbsonFwdIteratorT
<pointer
, ArrayVal
> iterator
;
668 typedef FbsonFwdIteratorT
<const_pointer
, ArrayVal
> const_iterator
;
670 iterator
begin() { return iterator((pointer
)payload_
); }
672 const_iterator
begin() const { return const_iterator((pointer
)payload_
); }
674 iterator
end() { return iterator((pointer
)(payload_
+ size_
)); }
676 const_iterator
end() const {
677 return const_iterator((pointer
)(payload_
+ size_
));
684 inline FbsonDocument
* FbsonDocument::createDocument(const char* pb
,
686 if (!pb
|| size
< sizeof(FbsonHeader
) + sizeof(FbsonValue
)) {
690 FbsonDocument
* doc
= (FbsonDocument
*)pb
;
691 if (doc
->header_
.ver_
!= FBSON_VER
) {
695 FbsonValue
* val
= (FbsonValue
*)doc
->payload_
;
696 if (!val
->isObject() || size
!= sizeof(FbsonHeader
) + val
->numPackedBytes()) {
703 inline FbsonValue
* FbsonDocument::createValue(const char* pb
, uint32_t size
) {
704 if (!pb
|| size
< sizeof(FbsonHeader
) + sizeof(FbsonValue
)) {
708 FbsonDocument
* doc
= (FbsonDocument
*)pb
;
709 if (doc
->header_
.ver_
!= FBSON_VER
) {
713 FbsonValue
* val
= (FbsonValue
*)doc
->payload_
;
714 if (size
!= sizeof(FbsonHeader
) + val
->numPackedBytes()) {
721 inline unsigned int FbsonKeyValue::numPackedBytes() const {
722 unsigned int ks
= keyPackedBytes();
723 FbsonValue
* val
= (FbsonValue
*)(((char*)this) + ks
);
724 return ks
+ val
->numPackedBytes();
727 // Poor man's "virtual" function FbsonValue::numPackedBytes
728 inline unsigned int FbsonValue::numPackedBytes() const {
730 case FbsonType::T_Null
:
731 case FbsonType::T_True
:
732 case FbsonType::T_False
: {
733 return sizeof(type_
);
736 case FbsonType::T_Int8
: {
737 return sizeof(type_
) + sizeof(int8_t);
739 case FbsonType::T_Int16
: {
740 return sizeof(type_
) + sizeof(int16_t);
742 case FbsonType::T_Int32
: {
743 return sizeof(type_
) + sizeof(int32_t);
745 case FbsonType::T_Int64
: {
746 return sizeof(type_
) + sizeof(int64_t);
748 case FbsonType::T_Double
: {
749 return sizeof(type_
) + sizeof(double);
751 case FbsonType::T_String
:
752 case FbsonType::T_Binary
: {
753 return ((BlobVal
*)(this))->numPackedBytes();
756 case FbsonType::T_Object
:
757 case FbsonType::T_Array
: {
758 return ((ContainerVal
*)(this))->numPackedBytes();
765 inline unsigned int FbsonValue::size() const {
767 case FbsonType::T_Int8
: {
768 return sizeof(int8_t);
770 case FbsonType::T_Int16
: {
771 return sizeof(int16_t);
773 case FbsonType::T_Int32
: {
774 return sizeof(int32_t);
776 case FbsonType::T_Int64
: {
777 return sizeof(int64_t);
779 case FbsonType::T_Double
: {
780 return sizeof(double);
782 case FbsonType::T_String
:
783 case FbsonType::T_Binary
: {
784 return ((BlobVal
*)(this))->getBlobLen();
787 case FbsonType::T_Object
:
788 case FbsonType::T_Array
: {
789 return ((ContainerVal
*)(this))->getContainerSize();
791 case FbsonType::T_Null
:
792 case FbsonType::T_True
:
793 case FbsonType::T_False
:
799 inline const char* FbsonValue::getValuePtr() const {
801 case FbsonType::T_Int8
:
802 case FbsonType::T_Int16
:
803 case FbsonType::T_Int32
:
804 case FbsonType::T_Int64
:
805 case FbsonType::T_Double
:
806 return ((char*)this) + sizeof(FbsonType
);
808 case FbsonType::T_String
:
809 case FbsonType::T_Binary
:
810 return ((BlobVal
*)(this))->getBlob();
812 case FbsonType::T_Object
:
813 case FbsonType::T_Array
:
814 return ((ContainerVal
*)(this))->getPayload();
816 case FbsonType::T_Null
:
817 case FbsonType::T_True
:
818 case FbsonType::T_False
:
824 inline FbsonValue
* FbsonValue::findPath(const char* key_path
,
826 const char* delim
= ".",
827 hDictFind handler
= nullptr) {
828 if (!key_path
|| !kp_len
)
832 delim
= "."; // default delimiter
834 FbsonValue
* pval
= this;
835 const char* fence
= key_path
+ kp_len
;
836 char idx_buf
[21]; // buffer to parse array index (integer value)
838 while (pval
&& key_path
< fence
) {
839 const char* key
= key_path
;
840 unsigned int klen
= 0;
841 // find the current key
842 for (; key_path
!= fence
&& *key_path
!= *delim
; ++key_path
, ++klen
)
848 switch (pval
->type_
) {
849 case FbsonType::T_Object
: {
850 pval
= ((ObjectVal
*)pval
)->find(key
, klen
, handler
);
854 case FbsonType::T_Array
: {
855 // parse string into an integer (array index)
856 if (klen
>= sizeof(idx_buf
))
859 memcpy(idx_buf
, key
, klen
);
863 int index
= (int)strtol(idx_buf
, &end
, 10);
865 pval
= ((fbson::ArrayVal
*)pval
)->get(index
);
867 // incorrect index string
876 // skip the delimiter
877 if (key_path
< fence
) {
879 if (key_path
== fence
)
880 // we have a trailing delimiter at the end