1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
6 #include "utilities/col_buf_encoder.h"
13 ColBufEncoder::~ColBufEncoder() {}
17 inline uint64_t DecodeFixed64WithEndian(uint64_t val
, bool big_endian
,
19 if (big_endian
&& port::kLittleEndian
) {
20 val
= EndianTransform(val
, size
);
21 } else if (!big_endian
&& !port::kLittleEndian
) {
22 val
= EndianTransform(val
, size
);
29 const std::string
&ColBufEncoder::GetData() { return buffer_
; }
31 ColBufEncoder
*ColBufEncoder::NewColBufEncoder(
32 const ColDeclaration
&col_declaration
) {
33 if (col_declaration
.col_type
== "FixedLength") {
34 return new FixedLengthColBufEncoder(
35 col_declaration
.size
, col_declaration
.col_compression_type
,
36 col_declaration
.nullable
, col_declaration
.big_endian
);
37 } else if (col_declaration
.col_type
== "VariableLength") {
38 return new VariableLengthColBufEncoder();
39 } else if (col_declaration
.col_type
== "VariableChunk") {
40 return new VariableChunkColBufEncoder(col_declaration
.col_compression_type
);
41 } else if (col_declaration
.col_type
== "LongFixedLength") {
42 return new LongFixedLengthColBufEncoder(col_declaration
.size
,
43 col_declaration
.nullable
);
45 // Unrecognized column type
49 #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 9)
50 __attribute__((__no_sanitize__("undefined")))
51 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)
52 __attribute__((__no_sanitize_undefined__
))
54 size_t FixedLengthColBufEncoder::Append(const char *buf
) {
63 uint64_t read_val
= 0;
64 memcpy(&read_val
, buf
, size_
);
65 read_val
= DecodeFixed64WithEndian(read_val
, big_endian_
, size_
);
67 // Determine write value
68 uint64_t write_val
= read_val
;
69 if (col_compression_type_
== kColDeltaVarint
||
70 col_compression_type_
== kColRleDeltaVarint
) {
71 int64_t delta
= read_val
- last_val_
;
72 // Encode signed delta value
73 delta
= (delta
<< 1) ^ (delta
>> 63);
76 } else if (col_compression_type_
== kColDict
||
77 col_compression_type_
== kColRleDict
) {
78 auto iter
= dictionary_
.find(read_val
);
80 if (iter
== dictionary_
.end()) {
81 // Add new entry to dictionary
82 dict_val
= dictionary_
.size();
83 dictionary_
.insert(std::make_pair(read_val
, dict_val
));
84 dict_vec_
.push_back(read_val
);
86 dict_val
= iter
->second
;
92 if (IsRunLength(col_compression_type_
)) {
93 if (run_length_
== -1) {
97 } else if (write_val
!= run_val_
) {
100 if (col_compression_type_
== kColRle
) {
101 buffer_
.append(reinterpret_cast<char *>(&run_val_
), size_
);
103 PutVarint64(&buffer_
, run_val_
);
106 PutVarint64(&buffer_
, run_length_
);
107 run_val_
= write_val
;
112 } else { // non run-length encodings
113 if (col_compression_type_
== kColNoCompression
) {
114 buffer_
.append(reinterpret_cast<char *>(&write_val
), size_
);
116 PutVarint64(&buffer_
, write_val
);
122 void FixedLengthColBufEncoder::Finish() {
123 if (col_compression_type_
== kColDict
||
124 col_compression_type_
== kColRleDict
) {
126 PutVarint64(&header
, dict_vec_
.size());
127 // Put dictionary in the header
128 for (auto item
: dict_vec_
) {
129 PutVarint64(&header
, item
);
131 buffer_
= header
+ buffer_
;
133 if (IsRunLength(col_compression_type_
)) {
134 // Finish last run value
135 if (col_compression_type_
== kColRle
) {
136 buffer_
.append(reinterpret_cast<char *>(&run_val_
), size_
);
138 PutVarint64(&buffer_
, run_val_
);
140 PutVarint64(&buffer_
, run_length_
);
144 size_t LongFixedLengthColBufEncoder::Append(const char *buf
) {
146 if (buf
== nullptr) {
147 buffer_
.append(1, 0);
150 buffer_
.append(1, 1);
153 buffer_
.append(buf
, size_
);
157 void LongFixedLengthColBufEncoder::Finish() {}
159 size_t VariableLengthColBufEncoder::Append(const char *buf
) {
162 buffer_
.append(buf
, 1);
164 buffer_
.append(buf
, length
);
168 void VariableLengthColBufEncoder::Finish() {}
170 size_t VariableChunkColBufEncoder::Append(const char *buf
) {
171 const char *orig_buf
= buf
;
174 std::string tmp_buffer
;
175 while (mark
== 0xFF) {
177 memcpy(&val
, buf
, 8);
181 int8_t chunk_size
= 8 - (0xFF - mark
);
182 if (col_compression_type_
== kColDict
) {
183 auto iter
= dictionary_
.find(val
);
185 if (iter
== dictionary_
.end()) {
186 dict_val
= dictionary_
.size();
187 dictionary_
.insert(std::make_pair(val
, dict_val
));
188 dict_vec_
.push_back(val
);
190 dict_val
= iter
->second
;
192 PutVarint64(&tmp_buffer
, dict_val
);
194 tmp_buffer
.append(reinterpret_cast<char *>(&val
), chunk_size
);
196 length
+= chunk_size
;
199 PutVarint64(&buffer_
, length
);
200 buffer_
.append(tmp_buffer
);
201 return buf
- orig_buf
;
204 void VariableChunkColBufEncoder::Finish() {
205 if (col_compression_type_
== kColDict
) {
207 PutVarint64(&header
, dict_vec_
.size());
208 for (auto item
: dict_vec_
) {
209 PutVarint64(&header
, item
);
211 buffer_
= header
+ buffer_
;
215 } // namespace rocksdb