]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/plain_table_builder.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / table / plain_table_builder.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#ifndef ROCKSDB_LITE
7#include "table/plain_table_builder.h"
8
9#include <assert.h>
10
11#include <string>
12#include <limits>
13#include <map>
14
15#include "rocksdb/comparator.h"
16#include "rocksdb/env.h"
17#include "rocksdb/filter_policy.h"
18#include "rocksdb/options.h"
19#include "rocksdb/table.h"
20#include "table/plain_table_factory.h"
21#include "db/dbformat.h"
22#include "table/block_builder.h"
23#include "table/bloom_block.h"
24#include "table/plain_table_index.h"
25#include "table/format.h"
26#include "table/meta_blocks.h"
27#include "util/coding.h"
28#include "util/crc32c.h"
29#include "util/file_reader_writer.h"
30#include "util/stop_watch.h"
31
32namespace rocksdb {
33
34namespace {
35
36// a utility that helps writing block content to the file
37// @offset will advance if @block_contents was successfully written.
38// @block_handle the block handle this particular block.
39Status WriteBlock(const Slice& block_contents, WritableFileWriter* file,
40 uint64_t* offset, BlockHandle* block_handle) {
41 block_handle->set_offset(*offset);
42 block_handle->set_size(block_contents.size());
43 Status s = file->Append(block_contents);
44
45 if (s.ok()) {
46 *offset += block_contents.size();
47 }
48 return s;
49}
50
51} // namespace
52
53// kPlainTableMagicNumber was picked by running
54// echo rocksdb.table.plain | sha1sum
55// and taking the leading 64 bits.
56extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull;
57extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
58
59PlainTableBuilder::PlainTableBuilder(
11fdf7f2 60 const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions,
7c673cae
FG
61 const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
62 int_tbl_prop_collector_factories,
63 uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len,
64 EncodingType encoding_type, size_t index_sparseness,
65 uint32_t bloom_bits_per_key, const std::string& column_family_name,
66 uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
67 bool store_index_in_file)
68 : ioptions_(ioptions),
11fdf7f2 69 moptions_(moptions),
7c673cae
FG
70 bloom_block_(num_probes),
71 file_(file),
72 bloom_bits_per_key_(bloom_bits_per_key),
73 huge_page_tlb_size_(huge_page_tlb_size),
11fdf7f2 74 encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(),
7c673cae
FG
75 index_sparseness),
76 store_index_in_file_(store_index_in_file),
11fdf7f2 77 prefix_extractor_(moptions.prefix_extractor.get()) {
7c673cae
FG
78 // Build index block and save it in the file if hash_table_ratio > 0
79 if (store_index_in_file_) {
80 assert(hash_table_ratio > 0 || IsTotalOrderMode());
11fdf7f2
TL
81 index_builder_.reset(new PlainTableIndexBuilder(
82 &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness,
83 hash_table_ratio, huge_page_tlb_size_));
7c673cae
FG
84 properties_.user_collected_properties
85 [PlainTablePropertyNames::kBloomVersion] = "1"; // For future use
86 }
87
88 properties_.fixed_key_len = user_key_len;
89
90 // for plain table, we put all the data in a big chuck.
91 properties_.num_data_blocks = 1;
92 // Fill it later if store_index_in_file_ == true
93 properties_.index_size = 0;
94 properties_.filter_size = 0;
95 // To support roll-back to previous version, now still use version 0 for
96 // plain encoding.
97 properties_.format_version = (encoding_type == kPlain) ? 0 : 1;
98 properties_.column_family_id = column_family_id;
99 properties_.column_family_name = column_family_name;
11fdf7f2
TL
100 properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr
101 ? moptions_.prefix_extractor->Name()
7c673cae
FG
102 : "nullptr";
103
104 std::string val;
105 PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType()));
106 properties_.user_collected_properties
107 [PlainTablePropertyNames::kEncodingType] = val;
108
109 for (auto& collector_factories : *int_tbl_prop_collector_factories) {
110 table_properties_collectors_.emplace_back(
111 collector_factories->CreateIntTblPropCollector(column_family_id));
112 }
113}
114
115PlainTableBuilder::~PlainTableBuilder() {
116}
117
118void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
119 // temp buffer for metadata bytes between key and value.
120 char meta_bytes_buf[6];
121 size_t meta_bytes_buf_size = 0;
122
123 ParsedInternalKey internal_key;
124 if (!ParseInternalKey(key, &internal_key)) {
125 assert(false);
126 return;
127 }
128 if (internal_key.type == kTypeRangeDeletion) {
129 status_ = Status::NotSupported("Range deletion unsupported");
130 return;
131 }
132
133 // Store key hash
134 if (store_index_in_file_) {
11fdf7f2 135 if (moptions_.prefix_extractor == nullptr) {
7c673cae
FG
136 keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
137 } else {
138 Slice prefix =
11fdf7f2 139 moptions_.prefix_extractor->Transform(internal_key.user_key);
7c673cae
FG
140 keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
141 }
142 }
143
144 // Write value
145 assert(offset_ <= std::numeric_limits<uint32_t>::max());
146 auto prev_offset = static_cast<uint32_t>(offset_);
147 // Write out the key
148 encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
149 &meta_bytes_buf_size);
150 if (SaveIndexInFile()) {
151 index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
152 }
153
154 // Write value length
155 uint32_t value_size = static_cast<uint32_t>(value.size());
156 char* end_ptr =
157 EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size);
158 assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf));
159 meta_bytes_buf_size = end_ptr - meta_bytes_buf;
160 file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size));
161
162 // Write value
163 file_->Append(value);
164 offset_ += value_size + meta_bytes_buf_size;
165
166 properties_.num_entries++;
167 properties_.raw_key_size += key.size();
168 properties_.raw_value_size += value.size();
494da23a
TL
169 if (internal_key.type == kTypeDeletion ||
170 internal_key.type == kTypeSingleDeletion) {
171 properties_.num_deletions++;
172 } else if (internal_key.type == kTypeMerge) {
173 properties_.num_merge_operands++;
174 }
7c673cae
FG
175
176 // notify property collectors
177 NotifyCollectTableCollectorsOnAdd(
178 key, value, offset_, table_properties_collectors_, ioptions_.info_log);
179}
180
181Status PlainTableBuilder::status() const { return status_; }
182
183Status PlainTableBuilder::Finish() {
184 assert(!closed_);
185 closed_ = true;
186
187 properties_.data_size = offset_;
188
189 // Write the following blocks
190 // 1. [meta block: bloom] - optional
191 // 2. [meta block: index] - optional
192 // 3. [meta block: properties]
193 // 4. [metaindex block]
194 // 5. [footer]
195
196 MetaIndexBuilder meta_index_builer;
197
198 if (store_index_in_file_ && (properties_.num_entries > 0)) {
199 assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
200 Status s;
201 BlockHandle bloom_block_handle;
202 if (bloom_bits_per_key_ > 0) {
203 bloom_block_.SetTotalBits(
204 &arena_,
205 static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
206 ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log);
207
208 PutVarint32(&properties_.user_collected_properties
209 [PlainTablePropertyNames::kNumBloomBlocks],
210 bloom_block_.GetNumBlocks());
211
212 bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
213
214 Slice bloom_finish_result = bloom_block_.Finish();
215
216 properties_.filter_size = bloom_finish_result.size();
217 s = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
218
219 if (!s.ok()) {
220 return s;
221 }
222 meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
223 }
224 BlockHandle index_block_handle;
225 Slice index_finish_result = index_builder_->Finish();
226
227 properties_.index_size = index_finish_result.size();
228 s = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
229
230 if (!s.ok()) {
231 return s;
232 }
233
234 meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
235 index_block_handle);
236 }
237
238 // Calculate bloom block size and index block size
239 PropertyBlockBuilder property_block_builder;
240 // -- Add basic properties
241 property_block_builder.AddTableProperty(properties_);
242
243 property_block_builder.Add(properties_.user_collected_properties);
244
245 // -- Add user collected properties
246 NotifyCollectTableCollectorsOnFinish(table_properties_collectors_,
247 ioptions_.info_log,
248 &property_block_builder);
249
250 // -- Write property block
251 BlockHandle property_block_handle;
252 auto s = WriteBlock(
253 property_block_builder.Finish(),
254 file_,
255 &offset_,
256 &property_block_handle
257 );
258 if (!s.ok()) {
259 return s;
260 }
261 meta_index_builer.Add(kPropertiesBlock, property_block_handle);
262
263 // -- write metaindex block
264 BlockHandle metaindex_block_handle;
265 s = WriteBlock(
266 meta_index_builer.Finish(),
267 file_,
268 &offset_,
269 &metaindex_block_handle
270 );
271 if (!s.ok()) {
272 return s;
273 }
274
275 // Write Footer
276 // no need to write out new footer if we're using default checksum
277 Footer footer(kLegacyPlainTableMagicNumber, 0);
278 footer.set_metaindex_handle(metaindex_block_handle);
279 footer.set_index_handle(BlockHandle::NullBlockHandle());
280 std::string footer_encoding;
281 footer.EncodeTo(&footer_encoding);
282 s = file_->Append(footer_encoding);
283 if (s.ok()) {
284 offset_ += footer_encoding.size();
285 }
286
287 return s;
288}
289
290void PlainTableBuilder::Abandon() {
291 closed_ = true;
292}
293
294uint64_t PlainTableBuilder::NumEntries() const {
295 return properties_.num_entries;
296}
297
298uint64_t PlainTableBuilder::FileSize() const {
299 return offset_;
300}
301
302} // namespace rocksdb
303#endif // ROCKSDB_LITE