]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/plain_table_builder.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / table / plain_table_builder.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5
6 #ifndef ROCKSDB_LITE
7 #include "table/plain_table_builder.h"
8
9 #include <assert.h>
10
11 #include <string>
12 #include <limits>
13 #include <map>
14
15 #include "rocksdb/comparator.h"
16 #include "rocksdb/env.h"
17 #include "rocksdb/filter_policy.h"
18 #include "rocksdb/options.h"
19 #include "rocksdb/table.h"
20 #include "table/plain_table_factory.h"
21 #include "db/dbformat.h"
22 #include "table/block_builder.h"
23 #include "table/bloom_block.h"
24 #include "table/plain_table_index.h"
25 #include "table/format.h"
26 #include "table/meta_blocks.h"
27 #include "util/coding.h"
28 #include "util/crc32c.h"
29 #include "util/file_reader_writer.h"
30 #include "util/stop_watch.h"
31
32 namespace rocksdb {
33
34 namespace {
35
36 // a utility that helps writing block content to the file
37 // @offset will advance if @block_contents was successfully written.
38 // @block_handle the block handle this particular block.
39 Status WriteBlock(const Slice& block_contents, WritableFileWriter* file,
40 uint64_t* offset, BlockHandle* block_handle) {
41 block_handle->set_offset(*offset);
42 block_handle->set_size(block_contents.size());
43 Status s = file->Append(block_contents);
44
45 if (s.ok()) {
46 *offset += block_contents.size();
47 }
48 return s;
49 }
50
51 } // namespace
52
53 // kPlainTableMagicNumber was picked by running
54 // echo rocksdb.table.plain | sha1sum
55 // and taking the leading 64 bits.
56 extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull;
57 extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
58
59 PlainTableBuilder::PlainTableBuilder(
60 const ImmutableCFOptions& ioptions,
61 const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
62 int_tbl_prop_collector_factories,
63 uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len,
64 EncodingType encoding_type, size_t index_sparseness,
65 uint32_t bloom_bits_per_key, const std::string& column_family_name,
66 uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
67 bool store_index_in_file)
68 : ioptions_(ioptions),
69 bloom_block_(num_probes),
70 file_(file),
71 bloom_bits_per_key_(bloom_bits_per_key),
72 huge_page_tlb_size_(huge_page_tlb_size),
73 encoder_(encoding_type, user_key_len, ioptions.prefix_extractor,
74 index_sparseness),
75 store_index_in_file_(store_index_in_file),
76 prefix_extractor_(ioptions.prefix_extractor) {
77 // Build index block and save it in the file if hash_table_ratio > 0
78 if (store_index_in_file_) {
79 assert(hash_table_ratio > 0 || IsTotalOrderMode());
80 index_builder_.reset(
81 new PlainTableIndexBuilder(&arena_, ioptions, index_sparseness,
82 hash_table_ratio, huge_page_tlb_size_));
83 properties_.user_collected_properties
84 [PlainTablePropertyNames::kBloomVersion] = "1"; // For future use
85 }
86
87 properties_.fixed_key_len = user_key_len;
88
89 // for plain table, we put all the data in a big chuck.
90 properties_.num_data_blocks = 1;
91 // Fill it later if store_index_in_file_ == true
92 properties_.index_size = 0;
93 properties_.filter_size = 0;
94 // To support roll-back to previous version, now still use version 0 for
95 // plain encoding.
96 properties_.format_version = (encoding_type == kPlain) ? 0 : 1;
97 properties_.column_family_id = column_family_id;
98 properties_.column_family_name = column_family_name;
99 properties_.prefix_extractor_name = ioptions_.prefix_extractor != nullptr
100 ? ioptions_.prefix_extractor->Name()
101 : "nullptr";
102
103 std::string val;
104 PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType()));
105 properties_.user_collected_properties
106 [PlainTablePropertyNames::kEncodingType] = val;
107
108 for (auto& collector_factories : *int_tbl_prop_collector_factories) {
109 table_properties_collectors_.emplace_back(
110 collector_factories->CreateIntTblPropCollector(column_family_id));
111 }
112 }
113
114 PlainTableBuilder::~PlainTableBuilder() {
115 }
116
117 void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
118 // temp buffer for metadata bytes between key and value.
119 char meta_bytes_buf[6];
120 size_t meta_bytes_buf_size = 0;
121
122 ParsedInternalKey internal_key;
123 if (!ParseInternalKey(key, &internal_key)) {
124 assert(false);
125 return;
126 }
127 if (internal_key.type == kTypeRangeDeletion) {
128 status_ = Status::NotSupported("Range deletion unsupported");
129 return;
130 }
131
132 // Store key hash
133 if (store_index_in_file_) {
134 if (ioptions_.prefix_extractor == nullptr) {
135 keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
136 } else {
137 Slice prefix =
138 ioptions_.prefix_extractor->Transform(internal_key.user_key);
139 keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
140 }
141 }
142
143 // Write value
144 assert(offset_ <= std::numeric_limits<uint32_t>::max());
145 auto prev_offset = static_cast<uint32_t>(offset_);
146 // Write out the key
147 encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
148 &meta_bytes_buf_size);
149 if (SaveIndexInFile()) {
150 index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
151 }
152
153 // Write value length
154 uint32_t value_size = static_cast<uint32_t>(value.size());
155 char* end_ptr =
156 EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size);
157 assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf));
158 meta_bytes_buf_size = end_ptr - meta_bytes_buf;
159 file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size));
160
161 // Write value
162 file_->Append(value);
163 offset_ += value_size + meta_bytes_buf_size;
164
165 properties_.num_entries++;
166 properties_.raw_key_size += key.size();
167 properties_.raw_value_size += value.size();
168
169 // notify property collectors
170 NotifyCollectTableCollectorsOnAdd(
171 key, value, offset_, table_properties_collectors_, ioptions_.info_log);
172 }
173
174 Status PlainTableBuilder::status() const { return status_; }
175
176 Status PlainTableBuilder::Finish() {
177 assert(!closed_);
178 closed_ = true;
179
180 properties_.data_size = offset_;
181
182 // Write the following blocks
183 // 1. [meta block: bloom] - optional
184 // 2. [meta block: index] - optional
185 // 3. [meta block: properties]
186 // 4. [metaindex block]
187 // 5. [footer]
188
189 MetaIndexBuilder meta_index_builer;
190
191 if (store_index_in_file_ && (properties_.num_entries > 0)) {
192 assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
193 Status s;
194 BlockHandle bloom_block_handle;
195 if (bloom_bits_per_key_ > 0) {
196 bloom_block_.SetTotalBits(
197 &arena_,
198 static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
199 ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log);
200
201 PutVarint32(&properties_.user_collected_properties
202 [PlainTablePropertyNames::kNumBloomBlocks],
203 bloom_block_.GetNumBlocks());
204
205 bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
206
207 Slice bloom_finish_result = bloom_block_.Finish();
208
209 properties_.filter_size = bloom_finish_result.size();
210 s = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
211
212 if (!s.ok()) {
213 return s;
214 }
215 meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
216 }
217 BlockHandle index_block_handle;
218 Slice index_finish_result = index_builder_->Finish();
219
220 properties_.index_size = index_finish_result.size();
221 s = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
222
223 if (!s.ok()) {
224 return s;
225 }
226
227 meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
228 index_block_handle);
229 }
230
231 // Calculate bloom block size and index block size
232 PropertyBlockBuilder property_block_builder;
233 // -- Add basic properties
234 property_block_builder.AddTableProperty(properties_);
235
236 property_block_builder.Add(properties_.user_collected_properties);
237
238 // -- Add user collected properties
239 NotifyCollectTableCollectorsOnFinish(table_properties_collectors_,
240 ioptions_.info_log,
241 &property_block_builder);
242
243 // -- Write property block
244 BlockHandle property_block_handle;
245 auto s = WriteBlock(
246 property_block_builder.Finish(),
247 file_,
248 &offset_,
249 &property_block_handle
250 );
251 if (!s.ok()) {
252 return s;
253 }
254 meta_index_builer.Add(kPropertiesBlock, property_block_handle);
255
256 // -- write metaindex block
257 BlockHandle metaindex_block_handle;
258 s = WriteBlock(
259 meta_index_builer.Finish(),
260 file_,
261 &offset_,
262 &metaindex_block_handle
263 );
264 if (!s.ok()) {
265 return s;
266 }
267
268 // Write Footer
269 // no need to write out new footer if we're using default checksum
270 Footer footer(kLegacyPlainTableMagicNumber, 0);
271 footer.set_metaindex_handle(metaindex_block_handle);
272 footer.set_index_handle(BlockHandle::NullBlockHandle());
273 std::string footer_encoding;
274 footer.EncodeTo(&footer_encoding);
275 s = file_->Append(footer_encoding);
276 if (s.ok()) {
277 offset_ += footer_encoding.size();
278 }
279
280 return s;
281 }
282
283 void PlainTableBuilder::Abandon() {
284 closed_ = true;
285 }
286
287 uint64_t PlainTableBuilder::NumEntries() const {
288 return properties_.num_entries;
289 }
290
291 uint64_t PlainTableBuilder::FileSize() const {
292 return offset_;
293 }
294
295 } // namespace rocksdb
296 #endif // ROCKSDB_LITE