]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/meta_blocks.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / table / meta_blocks.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5#include "table/meta_blocks.h"
6
7#include <map>
8#include <string>
9
10#include "db/table_properties_collector.h"
11#include "rocksdb/table.h"
12#include "rocksdb/table_properties.h"
13#include "table/block.h"
11fdf7f2 14#include "table/block_fetcher.h"
7c673cae
FG
15#include "table/format.h"
16#include "table/internal_iterator.h"
17#include "table/persistent_cache_helper.h"
18#include "table/table_properties_internal.h"
19#include "util/coding.h"
11fdf7f2 20#include "util/file_reader_writer.h"
7c673cae
FG
21
22namespace rocksdb {
23
24MetaIndexBuilder::MetaIndexBuilder()
25 : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
26
27void MetaIndexBuilder::Add(const std::string& key,
28 const BlockHandle& handle) {
29 std::string handle_encoding;
30 handle.EncodeTo(&handle_encoding);
31 meta_block_handles_.insert({key, handle_encoding});
32}
33
34Slice MetaIndexBuilder::Finish() {
35 for (const auto& metablock : meta_block_handles_) {
36 meta_index_block_->Add(metablock.first, metablock.second);
37 }
38 return meta_index_block_->Finish();
39}
40
11fdf7f2
TL
41// Property block will be read sequentially and cached in a heap located
42// object, so there's no need for restart points. Thus we set the restart
43// interval to infinity to save space.
7c673cae 44PropertyBlockBuilder::PropertyBlockBuilder()
11fdf7f2
TL
45 : properties_block_(
46 new BlockBuilder(port::kMaxInt32 /* restart interval */)) {}
7c673cae
FG
47
48void PropertyBlockBuilder::Add(const std::string& name,
49 const std::string& val) {
50 props_.insert({name, val});
51}
52
53void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) {
54 assert(props_.find(name) == props_.end());
55
56 std::string dst;
57 PutVarint64(&dst, val);
58
59 Add(name, dst);
60}
61
62void PropertyBlockBuilder::Add(
63 const UserCollectedProperties& user_collected_properties) {
64 for (const auto& prop : user_collected_properties) {
65 Add(prop.first, prop.second);
66 }
67}
68
69void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
70 Add(TablePropertiesNames::kRawKeySize, props.raw_key_size);
71 Add(TablePropertiesNames::kRawValueSize, props.raw_value_size);
72 Add(TablePropertiesNames::kDataSize, props.data_size);
73 Add(TablePropertiesNames::kIndexSize, props.index_size);
11fdf7f2
TL
74 if (props.index_partitions != 0) {
75 Add(TablePropertiesNames::kIndexPartitions, props.index_partitions);
76 Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size);
77 }
78 Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key);
79 Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
80 props.index_value_is_delta_encoded);
7c673cae 81 Add(TablePropertiesNames::kNumEntries, props.num_entries);
494da23a
TL
82 Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
83 Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
11fdf7f2 84 Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
7c673cae
FG
85 Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks);
86 Add(TablePropertiesNames::kFilterSize, props.filter_size);
87 Add(TablePropertiesNames::kFormatVersion, props.format_version);
88 Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len);
89 Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id);
11fdf7f2
TL
90 Add(TablePropertiesNames::kCreationTime, props.creation_time);
91 Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time);
7c673cae
FG
92
93 if (!props.filter_policy_name.empty()) {
94 Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
95 }
96 if (!props.comparator_name.empty()) {
97 Add(TablePropertiesNames::kComparator, props.comparator_name);
98 }
99
100 if (!props.merge_operator_name.empty()) {
101 Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name);
102 }
103 if (!props.prefix_extractor_name.empty()) {
104 Add(TablePropertiesNames::kPrefixExtractorName,
105 props.prefix_extractor_name);
106 }
107 if (!props.property_collectors_names.empty()) {
108 Add(TablePropertiesNames::kPropertyCollectors,
109 props.property_collectors_names);
110 }
111 if (!props.column_family_name.empty()) {
112 Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name);
113 }
114
115 if (!props.compression_name.empty()) {
116 Add(TablePropertiesNames::kCompression, props.compression_name);
117 }
494da23a
TL
118 if (!props.compression_options.empty()) {
119 Add(TablePropertiesNames::kCompressionOptions, props.compression_options);
120 }
7c673cae
FG
121}
122
123Slice PropertyBlockBuilder::Finish() {
124 for (const auto& prop : props_) {
125 properties_block_->Add(prop.first, prop.second);
126 }
127
128 return properties_block_->Finish();
129}
130
131void LogPropertiesCollectionError(
132 Logger* info_log, const std::string& method, const std::string& name) {
133 assert(method == "Add" || method == "Finish");
134
135 std::string msg =
136 "Encountered error when calling TablePropertiesCollector::" +
137 method + "() with collector name: " + name;
138 ROCKS_LOG_ERROR(info_log, "%s", msg.c_str());
139}
140
141bool NotifyCollectTableCollectorsOnAdd(
142 const Slice& key, const Slice& value, uint64_t file_size,
143 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
144 Logger* info_log) {
145 bool all_succeeded = true;
146 for (auto& collector : collectors) {
147 Status s = collector->InternalAdd(key, value, file_size);
148 all_succeeded = all_succeeded && s.ok();
149 if (!s.ok()) {
150 LogPropertiesCollectionError(info_log, "Add" /* method */,
151 collector->Name());
152 }
153 }
154 return all_succeeded;
155}
156
494da23a
TL
157void NotifyCollectTableCollectorsOnBlockAdd(
158 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
159 const uint64_t blockRawBytes, const uint64_t blockCompressedBytesFast,
160 const uint64_t blockCompressedBytesSlow) {
161 for (auto& collector : collectors) {
162 collector->BlockAdd(blockRawBytes, blockCompressedBytesFast,
163 blockCompressedBytesSlow);
164 }
165}
166
7c673cae
FG
167bool NotifyCollectTableCollectorsOnFinish(
168 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
169 Logger* info_log, PropertyBlockBuilder* builder) {
170 bool all_succeeded = true;
171 for (auto& collector : collectors) {
172 UserCollectedProperties user_collected_properties;
173 Status s = collector->Finish(&user_collected_properties);
174
175 all_succeeded = all_succeeded && s.ok();
176 if (!s.ok()) {
177 LogPropertiesCollectionError(info_log, "Finish" /* method */,
178 collector->Name());
179 } else {
180 builder->Add(user_collected_properties);
181 }
182 }
183
184 return all_succeeded;
185}
186
187Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
11fdf7f2
TL
188 FilePrefetchBuffer* prefetch_buffer, const Footer& footer,
189 const ImmutableCFOptions& ioptions,
494da23a
TL
190 TableProperties** table_properties, bool verify_checksum,
191 BlockHandle* ret_block_handle,
192 CacheAllocationPtr* verification_buf,
193 bool /*compression_type_missing*/,
194 MemoryAllocator* memory_allocator) {
7c673cae
FG
195 assert(table_properties);
196
197 Slice v = handle_value;
198 BlockHandle handle;
199 if (!handle.DecodeFrom(&v).ok()) {
200 return Status::InvalidArgument("Failed to decode properties block handle");
201 }
202
203 BlockContents block_contents;
204 ReadOptions read_options;
494da23a 205 read_options.verify_checksums = verify_checksum;
7c673cae 206 Status s;
11fdf7f2
TL
207 PersistentCacheOptions cache_options;
208
209 BlockFetcher block_fetcher(
210 file, prefetch_buffer, footer, read_options, handle, &block_contents,
494da23a
TL
211 ioptions, false /* decompress */, false /*maybe_compressed*/,
212 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
11fdf7f2 213 s = block_fetcher.ReadBlockContents();
494da23a
TL
214 // property block is never compressed. Need to add uncompress logic if we are
215 // to compress it..
7c673cae
FG
216
217 if (!s.ok()) {
218 return s;
219 }
220
221 Block properties_block(std::move(block_contents),
222 kDisableGlobalSequenceNumber);
11fdf7f2
TL
223 DataBlockIter iter;
224 properties_block.NewIterator<DataBlockIter>(BytewiseComparator(),
225 BytewiseComparator(), &iter);
7c673cae
FG
226
227 auto new_table_properties = new TableProperties();
228 // All pre-defined properties of type uint64_t
229 std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
230 {TablePropertiesNames::kDataSize, &new_table_properties->data_size},
231 {TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
11fdf7f2
TL
232 {TablePropertiesNames::kIndexPartitions,
233 &new_table_properties->index_partitions},
234 {TablePropertiesNames::kTopLevelIndexSize,
235 &new_table_properties->top_level_index_size},
236 {TablePropertiesNames::kIndexKeyIsUserKey,
237 &new_table_properties->index_key_is_user_key},
238 {TablePropertiesNames::kIndexValueIsDeltaEncoded,
239 &new_table_properties->index_value_is_delta_encoded},
7c673cae
FG
240 {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
241 {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
242 {TablePropertiesNames::kRawValueSize,
243 &new_table_properties->raw_value_size},
244 {TablePropertiesNames::kNumDataBlocks,
245 &new_table_properties->num_data_blocks},
246 {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
494da23a
TL
247 {TablePropertiesNames::kDeletedKeys,
248 &new_table_properties->num_deletions},
249 {TablePropertiesNames::kMergeOperands,
250 &new_table_properties->num_merge_operands},
11fdf7f2
TL
251 {TablePropertiesNames::kNumRangeDeletions,
252 &new_table_properties->num_range_deletions},
7c673cae
FG
253 {TablePropertiesNames::kFormatVersion,
254 &new_table_properties->format_version},
255 {TablePropertiesNames::kFixedKeyLen,
256 &new_table_properties->fixed_key_len},
257 {TablePropertiesNames::kColumnFamilyId,
258 &new_table_properties->column_family_id},
11fdf7f2
TL
259 {TablePropertiesNames::kCreationTime,
260 &new_table_properties->creation_time},
261 {TablePropertiesNames::kOldestKeyTime,
262 &new_table_properties->oldest_key_time},
7c673cae
FG
263 };
264
265 std::string last_key;
494da23a 266 for (iter.SeekToFirstOrReport(); iter.Valid(); iter.NextOrReport()) {
7c673cae
FG
267 s = iter.status();
268 if (!s.ok()) {
269 break;
270 }
271
272 auto key = iter.key().ToString();
494da23a
TL
273 // properties block should be strictly sorted with no duplicate key.
274 if (!last_key.empty() &&
275 BytewiseComparator()->Compare(key, last_key) <= 0) {
276 s = Status::Corruption("properties unsorted");
277 break;
278 }
7c673cae
FG
279 last_key = key;
280
281 auto raw_val = iter.value();
282 auto pos = predefined_uint64_properties.find(key);
283
284 new_table_properties->properties_offsets.insert(
285 {key, handle.offset() + iter.ValueOffset()});
286
287 if (pos != predefined_uint64_properties.end()) {
494da23a
TL
288 if (key == TablePropertiesNames::kDeletedKeys ||
289 key == TablePropertiesNames::kMergeOperands) {
290 // Insert in user-collected properties for API backwards compatibility
291 new_table_properties->user_collected_properties.insert(
292 {key, raw_val.ToString()});
293 }
7c673cae
FG
294 // handle predefined rocksdb properties
295 uint64_t val;
296 if (!GetVarint64(&raw_val, &val)) {
297 // skip malformed value
298 auto error_msg =
299 "Detect malformed value in properties meta-block:"
300 "\tkey: " + key + "\tval: " + raw_val.ToString();
301 ROCKS_LOG_ERROR(ioptions.info_log, "%s", error_msg.c_str());
302 continue;
303 }
304 *(pos->second) = val;
305 } else if (key == TablePropertiesNames::kFilterPolicy) {
306 new_table_properties->filter_policy_name = raw_val.ToString();
307 } else if (key == TablePropertiesNames::kColumnFamilyName) {
308 new_table_properties->column_family_name = raw_val.ToString();
309 } else if (key == TablePropertiesNames::kComparator) {
310 new_table_properties->comparator_name = raw_val.ToString();
311 } else if (key == TablePropertiesNames::kMergeOperator) {
312 new_table_properties->merge_operator_name = raw_val.ToString();
313 } else if (key == TablePropertiesNames::kPrefixExtractorName) {
314 new_table_properties->prefix_extractor_name = raw_val.ToString();
315 } else if (key == TablePropertiesNames::kPropertyCollectors) {
316 new_table_properties->property_collectors_names = raw_val.ToString();
317 } else if (key == TablePropertiesNames::kCompression) {
318 new_table_properties->compression_name = raw_val.ToString();
494da23a
TL
319 } else if (key == TablePropertiesNames::kCompressionOptions) {
320 new_table_properties->compression_options = raw_val.ToString();
7c673cae
FG
321 } else {
322 // handle user-collected properties
323 new_table_properties->user_collected_properties.insert(
324 {key, raw_val.ToString()});
325 }
326 }
327 if (s.ok()) {
328 *table_properties = new_table_properties;
494da23a
TL
329 if (ret_block_handle != nullptr) {
330 *ret_block_handle = handle;
331 }
332 if (verification_buf != nullptr) {
333 size_t len = handle.size() + kBlockTrailerSize;
334 *verification_buf = rocksdb::AllocateBlock(len, memory_allocator);
335 if (verification_buf->get() != nullptr) {
336 memcpy(verification_buf->get(), block_contents.data.data(), len);
337 }
338 }
7c673cae
FG
339 } else {
340 delete new_table_properties;
341 }
342
343 return s;
344}
345
346Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
347 uint64_t table_magic_number,
494da23a 348 const ImmutableCFOptions& ioptions,
11fdf7f2 349 TableProperties** properties,
494da23a
TL
350 bool compression_type_missing,
351 MemoryAllocator* memory_allocator) {
7c673cae
FG
352 // -- Read metaindex block
353 Footer footer;
11fdf7f2
TL
354 auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
355 &footer, table_magic_number);
7c673cae
FG
356 if (!s.ok()) {
357 return s;
358 }
359
360 auto metaindex_handle = footer.metaindex_handle();
361 BlockContents metaindex_contents;
362 ReadOptions read_options;
363 read_options.verify_checksums = false;
11fdf7f2
TL
364 PersistentCacheOptions cache_options;
365
366 BlockFetcher block_fetcher(
367 file, nullptr /* prefetch_buffer */, footer, read_options,
368 metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
494da23a
TL
369 false /*maybe_compressed*/, UncompressionDict::GetEmptyDict(),
370 cache_options, memory_allocator);
11fdf7f2 371 s = block_fetcher.ReadBlockContents();
7c673cae
FG
372 if (!s.ok()) {
373 return s;
374 }
494da23a
TL
375 // property blocks are never compressed. Need to add uncompress logic if we
376 // are to compress it.
7c673cae
FG
377 Block metaindex_block(std::move(metaindex_contents),
378 kDisableGlobalSequenceNumber);
379 std::unique_ptr<InternalIterator> meta_iter(
11fdf7f2
TL
380 metaindex_block.NewIterator<DataBlockIter>(BytewiseComparator(),
381 BytewiseComparator()));
7c673cae
FG
382
383 // -- Read property block
384 bool found_properties_block = true;
385 s = SeekToPropertiesBlock(meta_iter.get(), &found_properties_block);
386 if (!s.ok()) {
387 return s;
388 }
389
390 TableProperties table_properties;
391 if (found_properties_block == true) {
494da23a
TL
392 s = ReadProperties(
393 meta_iter->value(), file, nullptr /* prefetch_buffer */, footer,
394 ioptions, properties, false /* verify_checksum */,
395 nullptr /* ret_block_hanel */, nullptr /* ret_block_contents */,
396 compression_type_missing, memory_allocator);
7c673cae
FG
397 } else {
398 s = Status::NotFound();
399 }
400
401 return s;
402}
403
404Status FindMetaBlock(InternalIterator* meta_index_iter,
405 const std::string& meta_block_name,
406 BlockHandle* block_handle) {
407 meta_index_iter->Seek(meta_block_name);
408 if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
409 meta_index_iter->key() == meta_block_name) {
410 Slice v = meta_index_iter->value();
411 return block_handle->DecodeFrom(&v);
412 } else {
413 return Status::Corruption("Cannot find the meta block", meta_block_name);
414 }
415}
416
417Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
418 uint64_t table_magic_number,
494da23a 419 const ImmutableCFOptions& ioptions,
7c673cae 420 const std::string& meta_block_name,
11fdf7f2 421 BlockHandle* block_handle,
494da23a
TL
422 bool /*compression_type_missing*/,
423 MemoryAllocator* memory_allocator) {
7c673cae 424 Footer footer;
11fdf7f2
TL
425 auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
426 &footer, table_magic_number);
7c673cae
FG
427 if (!s.ok()) {
428 return s;
429 }
430
431 auto metaindex_handle = footer.metaindex_handle();
432 BlockContents metaindex_contents;
433 ReadOptions read_options;
434 read_options.verify_checksums = false;
11fdf7f2
TL
435 PersistentCacheOptions cache_options;
436 BlockFetcher block_fetcher(
437 file, nullptr /* prefetch_buffer */, footer, read_options,
438 metaindex_handle, &metaindex_contents, ioptions,
494da23a
TL
439 false /* do decompression */, false /*maybe_compressed*/,
440 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
11fdf7f2 441 s = block_fetcher.ReadBlockContents();
7c673cae
FG
442 if (!s.ok()) {
443 return s;
444 }
494da23a
TL
445 // meta blocks are never compressed. Need to add uncompress logic if we are to
446 // compress it.
7c673cae
FG
447 Block metaindex_block(std::move(metaindex_contents),
448 kDisableGlobalSequenceNumber);
449
450 std::unique_ptr<InternalIterator> meta_iter;
11fdf7f2
TL
451 meta_iter.reset(metaindex_block.NewIterator<DataBlockIter>(
452 BytewiseComparator(), BytewiseComparator()));
7c673cae
FG
453
454 return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
455}
456
11fdf7f2
TL
457Status ReadMetaBlock(RandomAccessFileReader* file,
458 FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
7c673cae 459 uint64_t table_magic_number,
11fdf7f2 460 const ImmutableCFOptions& ioptions,
7c673cae 461 const std::string& meta_block_name,
494da23a
TL
462 BlockContents* contents, bool /*compression_type_missing*/,
463 MemoryAllocator* memory_allocator) {
7c673cae
FG
464 Status status;
465 Footer footer;
11fdf7f2
TL
466 status = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer,
467 table_magic_number);
7c673cae
FG
468 if (!status.ok()) {
469 return status;
470 }
471
472 // Reading metaindex block
473 auto metaindex_handle = footer.metaindex_handle();
474 BlockContents metaindex_contents;
475 ReadOptions read_options;
476 read_options.verify_checksums = false;
11fdf7f2
TL
477 PersistentCacheOptions cache_options;
478
479 BlockFetcher block_fetcher(file, prefetch_buffer, footer, read_options,
480 metaindex_handle, &metaindex_contents, ioptions,
494da23a
TL
481 false /* decompress */, false /*maybe_compressed*/,
482 UncompressionDict::GetEmptyDict(), cache_options,
483 memory_allocator);
11fdf7f2 484 status = block_fetcher.ReadBlockContents();
7c673cae
FG
485 if (!status.ok()) {
486 return status;
487 }
494da23a
TL
488 // meta block is never compressed. Need to add uncompress logic if we are to
489 // compress it.
7c673cae
FG
490
491 // Finding metablock
492 Block metaindex_block(std::move(metaindex_contents),
493 kDisableGlobalSequenceNumber);
494
495 std::unique_ptr<InternalIterator> meta_iter;
11fdf7f2
TL
496 meta_iter.reset(metaindex_block.NewIterator<DataBlockIter>(
497 BytewiseComparator(), BytewiseComparator()));
7c673cae
FG
498
499 BlockHandle block_handle;
500 status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);
501
502 if (!status.ok()) {
503 return status;
504 }
505
506 // Reading metablock
11fdf7f2
TL
507 BlockFetcher block_fetcher2(
508 file, prefetch_buffer, footer, read_options, block_handle, contents,
494da23a
TL
509 ioptions, false /* decompress */, false /*maybe_compressed*/,
510 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
11fdf7f2 511 return block_fetcher2.ReadBlockContents();
7c673cae
FG
512}
513
514} // namespace rocksdb