]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/col_buf_encoder.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / utilities / col_buf_encoder.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7 #include <cstdio>
8 #include <cstring>
9 #include <memory>
10 #include <string>
11 #include <unordered_map>
12 #include <vector>
13 #include "util/coding.h"
14
15 namespace rocksdb {
16
17 enum ColCompressionType {
18 kColNoCompression,
19 kColRle,
20 kColVarint,
21 kColRleVarint,
22 kColDeltaVarint,
23 kColRleDeltaVarint,
24 kColDict,
25 kColRleDict
26 };
27
28 struct ColDeclaration;
29
30 // ColBufEncoder is a class to encode column buffers. It can be populated from a
31 // ColDeclaration. Each time it takes a column value into Append() method to
32 // encode the column and store it into an internal buffer. After all rows for
33 // this column are consumed, a Finish() should be called to add header and
34 // remaining data.
35 class ColBufEncoder {
36 public:
37 // Read a column, encode data and append into internal buffer.
38 virtual size_t Append(const char *buf) = 0;
39 virtual ~ColBufEncoder() = 0;
40 // Get the internal column buffer. Should only be called after Finish().
41 const std::string &GetData();
42 // Finish encoding. Add header and remaining data.
43 virtual void Finish() = 0;
44 // Populate a ColBufEncoder from ColDeclaration.
45 static ColBufEncoder *NewColBufEncoder(const ColDeclaration &col_declaration);
46
47 protected:
48 std::string buffer_;
49 static inline bool IsRunLength(ColCompressionType type) {
50 return type == kColRle || type == kColRleVarint ||
51 type == kColRleDeltaVarint || type == kColRleDict;
52 }
53 };
54
55 // Encoder for fixed length column buffer. In fixed length column buffer, the
56 // size of the column should not exceed 8 bytes.
57 // The following encodings are supported:
58 // Varint: Variable length integer. See util/coding.h for more details
59 // Rle (Run length encoding): encode a sequence of contiguous value as
60 // [run_value][run_length]. Can be combined with Varint
61 // Delta: Encode value to its delta with its adjacent entry. Use varint to
62 // possibly reduce stored bytes. Can be combined with Rle.
63 // Dictionary: Use a dictionary to record all possible values in the block and
64 // encode them with an ID started from 0. IDs are encoded as varint. A column
65 // with dictionary encoding will have a header to store all actual values,
66 // ordered by their dictionary value, and the data will be replaced by
67 // dictionary value. Can be combined with Rle.
68 class FixedLengthColBufEncoder : public ColBufEncoder {
69 public:
70 explicit FixedLengthColBufEncoder(
71 size_t size, ColCompressionType col_compression_type = kColNoCompression,
72 bool nullable = false, bool big_endian = false)
73 : size_(size),
74 col_compression_type_(col_compression_type),
75 nullable_(nullable),
76 big_endian_(big_endian),
77 last_val_(0),
78 run_length_(-1),
79 run_val_(0) {}
80
81 size_t Append(const char *buf) override;
82 void Finish() override;
83 ~FixedLengthColBufEncoder() {}
84
85 private:
86 size_t size_;
87 ColCompressionType col_compression_type_;
88 // If set as true, the input value can be null (represented as nullptr). When
89 // nullable is true, use one more byte before actual value to indicate if the
90 // current value is null.
91 bool nullable_;
92 // If set as true, input value will be treated as big endian encoded.
93 bool big_endian_;
94
95 // for encoding
96 uint64_t last_val_;
97 int16_t run_length_;
98 uint64_t run_val_;
99 // Map to store dictionary for dictionary encoding
100 std::unordered_map<uint64_t, uint64_t> dictionary_;
101 // Vector of dictionary keys.
102 std::vector<uint64_t> dict_vec_;
103 };
104
105 // Long fixed length column buffer is a variant of fixed length buffer to hold
106 // fixed length buffer with more than 8 bytes. We do not support any special
107 // encoding schemes in LongFixedLengthColBufEncoder.
108 class LongFixedLengthColBufEncoder : public ColBufEncoder {
109 public:
110 LongFixedLengthColBufEncoder(size_t size, bool nullable)
111 : size_(size), nullable_(nullable) {}
112 size_t Append(const char *buf) override;
113 void Finish() override;
114
115 ~LongFixedLengthColBufEncoder() {}
116
117 private:
118 size_t size_;
119 bool nullable_;
120 };
121
122 // Variable length column buffer holds a format of variable length column. In
123 // this format, a column is composed of one byte length k, followed by data with
124 // k bytes long data.
125 class VariableLengthColBufEncoder : public ColBufEncoder {
126 public:
127 size_t Append(const char *buf) override;
128 void Finish() override;
129
130 ~VariableLengthColBufEncoder() {}
131 };
132
133 // Variable chunk column buffer holds another format of variable length column.
134 // In this format, a column contains multiple chunks of data, each of which is
135 // composed of 8 bytes long data, and one byte as a mask to indicate whether we
136 // have more data to come. If no more data coming, the mask is set as 0xFF. If
137 // the chunk is the last chunk and has only k valid bytes, the mask is set as
138 // 0xFF - (8 - k).
139 class VariableChunkColBufEncoder : public VariableLengthColBufEncoder {
140 public:
141 size_t Append(const char *buf) override;
142 void Finish() override;
143 explicit VariableChunkColBufEncoder(ColCompressionType col_compression_type)
144 : col_compression_type_(col_compression_type) {}
145 VariableChunkColBufEncoder() : col_compression_type_(kColNoCompression) {}
146
147 private:
148 ColCompressionType col_compression_type_;
149 // Map to store dictionary for dictionary encoding
150 std::unordered_map<uint64_t, uint64_t> dictionary_;
151 // Vector of dictionary keys.
152 std::vector<uint64_t> dict_vec_;
153 };
154
155 // ColDeclaration declares a column's type, algorithm of column-aware encoding,
156 // and other column data like endian and nullability.
157 struct ColDeclaration {
158 explicit ColDeclaration(
159 std::string _col_type,
160 ColCompressionType _col_compression_type = kColNoCompression,
161 size_t _size = 0, bool _nullable = false, bool _big_endian = false)
162 : col_type(_col_type),
163 col_compression_type(_col_compression_type),
164 size(_size),
165 nullable(_nullable),
166 big_endian(_big_endian) {}
167 std::string col_type;
168 ColCompressionType col_compression_type;
169 size_t size;
170 bool nullable;
171 bool big_endian;
172 };
173
174 // KVPairColDeclarations is a class to hold column declaration of columns in
175 // key and value.
176 struct KVPairColDeclarations {
177 std::vector<ColDeclaration> *key_col_declarations;
178 std::vector<ColDeclaration> *value_col_declarations;
179 ColDeclaration *value_checksum_declaration;
180 KVPairColDeclarations(std::vector<ColDeclaration> *_key_col_declarations,
181 std::vector<ColDeclaration> *_value_col_declarations,
182 ColDeclaration *_value_checksum_declaration)
183 : key_col_declarations(_key_col_declarations),
184 value_col_declarations(_value_col_declarations),
185 value_checksum_declaration(_value_checksum_declaration) {}
186 };
187
188 // Similar to KVPairDeclarations, KVPairColBufEncoders is used to hold column
189 // buffer encoders of all columns in key and value.
190 struct KVPairColBufEncoders {
191 std::vector<std::unique_ptr<ColBufEncoder>> key_col_bufs;
192 std::vector<std::unique_ptr<ColBufEncoder>> value_col_bufs;
193 std::unique_ptr<ColBufEncoder> value_checksum_buf;
194
195 explicit KVPairColBufEncoders(const KVPairColDeclarations &kvp_cd) {
196 for (auto kcd : *kvp_cd.key_col_declarations) {
197 key_col_bufs.emplace_back(
198 std::move(ColBufEncoder::NewColBufEncoder(kcd)));
199 }
200 for (auto vcd : *kvp_cd.value_col_declarations) {
201 value_col_bufs.emplace_back(
202 std::move(ColBufEncoder::NewColBufEncoder(vcd)));
203 }
204 value_checksum_buf.reset(
205 ColBufEncoder::NewColBufEncoder(*kvp_cd.value_checksum_declaration));
206 }
207
208 // Helper function to call Finish()
209 void Finish() {
210 for (auto &col_buf : key_col_bufs) {
211 col_buf->Finish();
212 }
213 for (auto &col_buf : value_col_bufs) {
214 col_buf->Finish();
215 }
216 value_checksum_buf->Finish();
217 }
218 };
219 } // namespace rocksdb