]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/col_buf_encoder.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / utilities / col_buf_encoder.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5
6 #include "utilities/col_buf_encoder.h"
7 #include <cstring>
8 #include <string>
9 #include "port/port.h"
10
11 namespace rocksdb {
12
13 ColBufEncoder::~ColBufEncoder() {}
14
15 namespace {
16
17 inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian,
18 size_t size) {
19 if (big_endian && port::kLittleEndian) {
20 val = EndianTransform(val, size);
21 } else if (!big_endian && !port::kLittleEndian) {
22 val = EndianTransform(val, size);
23 }
24 return val;
25 }
26
27 } // namespace
28
29 const std::string &ColBufEncoder::GetData() { return buffer_; }
30
31 ColBufEncoder *ColBufEncoder::NewColBufEncoder(
32 const ColDeclaration &col_declaration) {
33 if (col_declaration.col_type == "FixedLength") {
34 return new FixedLengthColBufEncoder(
35 col_declaration.size, col_declaration.col_compression_type,
36 col_declaration.nullable, col_declaration.big_endian);
37 } else if (col_declaration.col_type == "VariableLength") {
38 return new VariableLengthColBufEncoder();
39 } else if (col_declaration.col_type == "VariableChunk") {
40 return new VariableChunkColBufEncoder(col_declaration.col_compression_type);
41 } else if (col_declaration.col_type == "LongFixedLength") {
42 return new LongFixedLengthColBufEncoder(col_declaration.size,
43 col_declaration.nullable);
44 }
45 // Unrecognized column type
46 return nullptr;
47 }
48
49 #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 9)
50 __attribute__((__no_sanitize__("undefined")))
51 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)
52 __attribute__((__no_sanitize_undefined__))
53 #endif
54 size_t FixedLengthColBufEncoder::Append(const char *buf) {
55 if (nullable_) {
56 if (buf == nullptr) {
57 buffer_.append(1, 0);
58 return 0;
59 } else {
60 buffer_.append(1, 1);
61 }
62 }
63 uint64_t read_val = 0;
64 memcpy(&read_val, buf, size_);
65 read_val = DecodeFixed64WithEndian(read_val, big_endian_, size_);
66
67 // Determine write value
68 uint64_t write_val = read_val;
69 if (col_compression_type_ == kColDeltaVarint ||
70 col_compression_type_ == kColRleDeltaVarint) {
71 int64_t delta = read_val - last_val_;
72 // Encode signed delta value
73 delta = (delta << 1) ^ (delta >> 63);
74 write_val = delta;
75 last_val_ = read_val;
76 } else if (col_compression_type_ == kColDict ||
77 col_compression_type_ == kColRleDict) {
78 auto iter = dictionary_.find(read_val);
79 uint64_t dict_val;
80 if (iter == dictionary_.end()) {
81 // Add new entry to dictionary
82 dict_val = dictionary_.size();
83 dictionary_.insert(std::make_pair(read_val, dict_val));
84 dict_vec_.push_back(read_val);
85 } else {
86 dict_val = iter->second;
87 }
88 write_val = dict_val;
89 }
90
91 // Write into buffer
92 if (IsRunLength(col_compression_type_)) {
93 if (run_length_ == -1) {
94 // First element
95 run_val_ = write_val;
96 run_length_ = 1;
97 } else if (write_val != run_val_) {
98 // End of run
99 // Write run value
100 if (col_compression_type_ == kColRle) {
101 buffer_.append(reinterpret_cast<char *>(&run_val_), size_);
102 } else {
103 PutVarint64(&buffer_, run_val_);
104 }
105 // Write run length
106 PutVarint64(&buffer_, run_length_);
107 run_val_ = write_val;
108 run_length_ = 1;
109 } else {
110 run_length_++;
111 }
112 } else { // non run-length encodings
113 if (col_compression_type_ == kColNoCompression) {
114 buffer_.append(reinterpret_cast<char *>(&write_val), size_);
115 } else {
116 PutVarint64(&buffer_, write_val);
117 }
118 }
119 return size_;
120 }
121
122 void FixedLengthColBufEncoder::Finish() {
123 if (col_compression_type_ == kColDict ||
124 col_compression_type_ == kColRleDict) {
125 std::string header;
126 PutVarint64(&header, dict_vec_.size());
127 // Put dictionary in the header
128 for (auto item : dict_vec_) {
129 PutVarint64(&header, item);
130 }
131 buffer_ = header + buffer_;
132 }
133 if (IsRunLength(col_compression_type_)) {
134 // Finish last run value
135 if (col_compression_type_ == kColRle) {
136 buffer_.append(reinterpret_cast<char *>(&run_val_), size_);
137 } else {
138 PutVarint64(&buffer_, run_val_);
139 }
140 PutVarint64(&buffer_, run_length_);
141 }
142 }
143
144 size_t LongFixedLengthColBufEncoder::Append(const char *buf) {
145 if (nullable_) {
146 if (buf == nullptr) {
147 buffer_.append(1, 0);
148 return 0;
149 } else {
150 buffer_.append(1, 1);
151 }
152 }
153 buffer_.append(buf, size_);
154 return size_;
155 }
156
157 void LongFixedLengthColBufEncoder::Finish() {}
158
159 size_t VariableLengthColBufEncoder::Append(const char *buf) {
160 uint8_t length = 0;
161 length = *buf;
162 buffer_.append(buf, 1);
163 buf += 1;
164 buffer_.append(buf, length);
165 return length + 1;
166 }
167
168 void VariableLengthColBufEncoder::Finish() {}
169
170 size_t VariableChunkColBufEncoder::Append(const char *buf) {
171 const char *orig_buf = buf;
172 uint8_t mark = 0xFF;
173 size_t length = 0;
174 std::string tmp_buffer;
175 while (mark == 0xFF) {
176 uint64_t val;
177 memcpy(&val, buf, 8);
178 buf += 8;
179 mark = *buf;
180 buf += 1;
181 int8_t chunk_size = 8 - (0xFF - mark);
182 if (col_compression_type_ == kColDict) {
183 auto iter = dictionary_.find(val);
184 uint64_t dict_val;
185 if (iter == dictionary_.end()) {
186 dict_val = dictionary_.size();
187 dictionary_.insert(std::make_pair(val, dict_val));
188 dict_vec_.push_back(val);
189 } else {
190 dict_val = iter->second;
191 }
192 PutVarint64(&tmp_buffer, dict_val);
193 } else {
194 tmp_buffer.append(reinterpret_cast<char *>(&val), chunk_size);
195 }
196 length += chunk_size;
197 }
198
199 PutVarint64(&buffer_, length);
200 buffer_.append(tmp_buffer);
201 return buf - orig_buf;
202 }
203
204 void VariableChunkColBufEncoder::Finish() {
205 if (col_compression_type_ == kColDict) {
206 std::string header;
207 PutVarint64(&header, dict_vec_.size());
208 for (auto item : dict_vec_) {
209 PutVarint64(&header, item);
210 }
211 buffer_ = header + buffer_;
212 }
213 }
214
215 } // namespace rocksdb