]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/meta_blocks.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / table / meta_blocks.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 #include "table/meta_blocks.h"
6
7 #include <map>
8 #include <string>
9
10 #include "block_fetcher.h"
11 #include "db/table_properties_collector.h"
12 #include "file/random_access_file_reader.h"
13 #include "rocksdb/table.h"
14 #include "rocksdb/table_properties.h"
15 #include "table/block_based/block.h"
16 #include "table/format.h"
17 #include "table/internal_iterator.h"
18 #include "table/persistent_cache_helper.h"
19 #include "table/table_properties_internal.h"
20 #include "test_util/sync_point.h"
21 #include "util/coding.h"
22
23 namespace ROCKSDB_NAMESPACE {
24
25 MetaIndexBuilder::MetaIndexBuilder()
26 : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
27
28 void MetaIndexBuilder::Add(const std::string& key,
29 const BlockHandle& handle) {
30 std::string handle_encoding;
31 handle.EncodeTo(&handle_encoding);
32 meta_block_handles_.insert({key, handle_encoding});
33 }
34
35 Slice MetaIndexBuilder::Finish() {
36 for (const auto& metablock : meta_block_handles_) {
37 meta_index_block_->Add(metablock.first, metablock.second);
38 }
39 return meta_index_block_->Finish();
40 }
41
42 // Property block will be read sequentially and cached in a heap located
43 // object, so there's no need for restart points. Thus we set the restart
44 // interval to infinity to save space.
45 PropertyBlockBuilder::PropertyBlockBuilder()
46 : properties_block_(
47 new BlockBuilder(port::kMaxInt32 /* restart interval */)) {}
48
49 void PropertyBlockBuilder::Add(const std::string& name,
50 const std::string& val) {
51 props_.insert({name, val});
52 }
53
54 void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) {
55 assert(props_.find(name) == props_.end());
56
57 std::string dst;
58 PutVarint64(&dst, val);
59
60 Add(name, dst);
61 }
62
63 void PropertyBlockBuilder::Add(
64 const UserCollectedProperties& user_collected_properties) {
65 for (const auto& prop : user_collected_properties) {
66 Add(prop.first, prop.second);
67 }
68 }
69
70 void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
71 TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start",
72 const_cast<TableProperties*>(&props));
73
74 Add(TablePropertiesNames::kRawKeySize, props.raw_key_size);
75 Add(TablePropertiesNames::kRawValueSize, props.raw_value_size);
76 Add(TablePropertiesNames::kDataSize, props.data_size);
77 Add(TablePropertiesNames::kIndexSize, props.index_size);
78 if (props.index_partitions != 0) {
79 Add(TablePropertiesNames::kIndexPartitions, props.index_partitions);
80 Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size);
81 }
82 Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key);
83 Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
84 props.index_value_is_delta_encoded);
85 Add(TablePropertiesNames::kNumEntries, props.num_entries);
86 Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
87 Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
88 Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
89 Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks);
90 Add(TablePropertiesNames::kFilterSize, props.filter_size);
91 Add(TablePropertiesNames::kFormatVersion, props.format_version);
92 Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len);
93 Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id);
94 Add(TablePropertiesNames::kCreationTime, props.creation_time);
95 Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time);
96 if (props.file_creation_time > 0) {
97 Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time);
98 }
99 if (!props.db_id.empty()) {
100 Add(TablePropertiesNames::kDbId, props.db_id);
101 }
102 if (!props.db_session_id.empty()) {
103 Add(TablePropertiesNames::kDbSessionId, props.db_session_id);
104 }
105 if (!props.db_host_id.empty()) {
106 Add(TablePropertiesNames::kDbHostId, props.db_host_id);
107 }
108
109 if (!props.filter_policy_name.empty()) {
110 Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
111 }
112 if (!props.comparator_name.empty()) {
113 Add(TablePropertiesNames::kComparator, props.comparator_name);
114 }
115
116 if (!props.merge_operator_name.empty()) {
117 Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name);
118 }
119 if (!props.prefix_extractor_name.empty()) {
120 Add(TablePropertiesNames::kPrefixExtractorName,
121 props.prefix_extractor_name);
122 }
123 if (!props.property_collectors_names.empty()) {
124 Add(TablePropertiesNames::kPropertyCollectors,
125 props.property_collectors_names);
126 }
127 if (!props.column_family_name.empty()) {
128 Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name);
129 }
130
131 if (!props.compression_name.empty()) {
132 Add(TablePropertiesNames::kCompression, props.compression_name);
133 }
134 if (!props.compression_options.empty()) {
135 Add(TablePropertiesNames::kCompressionOptions, props.compression_options);
136 }
137 }
138
139 Slice PropertyBlockBuilder::Finish() {
140 for (const auto& prop : props_) {
141 properties_block_->Add(prop.first, prop.second);
142 }
143
144 return properties_block_->Finish();
145 }
146
147 void LogPropertiesCollectionError(
148 Logger* info_log, const std::string& method, const std::string& name) {
149 assert(method == "Add" || method == "Finish");
150
151 std::string msg =
152 "Encountered error when calling TablePropertiesCollector::" +
153 method + "() with collector name: " + name;
154 ROCKS_LOG_ERROR(info_log, "%s", msg.c_str());
155 }
156
157 bool NotifyCollectTableCollectorsOnAdd(
158 const Slice& key, const Slice& value, uint64_t file_size,
159 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
160 Logger* info_log) {
161 bool all_succeeded = true;
162 for (auto& collector : collectors) {
163 Status s = collector->InternalAdd(key, value, file_size);
164 all_succeeded = all_succeeded && s.ok();
165 if (!s.ok()) {
166 LogPropertiesCollectionError(info_log, "Add" /* method */,
167 collector->Name());
168 }
169 }
170 return all_succeeded;
171 }
172
173 void NotifyCollectTableCollectorsOnBlockAdd(
174 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
175 const uint64_t blockRawBytes, const uint64_t blockCompressedBytesFast,
176 const uint64_t blockCompressedBytesSlow) {
177 for (auto& collector : collectors) {
178 collector->BlockAdd(blockRawBytes, blockCompressedBytesFast,
179 blockCompressedBytesSlow);
180 }
181 }
182
183 bool NotifyCollectTableCollectorsOnFinish(
184 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
185 Logger* info_log, PropertyBlockBuilder* builder) {
186 bool all_succeeded = true;
187 for (auto& collector : collectors) {
188 UserCollectedProperties user_collected_properties;
189 Status s = collector->Finish(&user_collected_properties);
190
191 all_succeeded = all_succeeded && s.ok();
192 if (!s.ok()) {
193 LogPropertiesCollectionError(info_log, "Finish" /* method */,
194 collector->Name());
195 } else {
196 builder->Add(user_collected_properties);
197 }
198 }
199
200 return all_succeeded;
201 }
202
203 Status ReadProperties(const ReadOptions& read_options,
204 const Slice& handle_value, RandomAccessFileReader* file,
205 FilePrefetchBuffer* prefetch_buffer, const Footer& footer,
206 const ImmutableCFOptions& ioptions,
207 TableProperties** table_properties, bool verify_checksum,
208 BlockHandle* ret_block_handle,
209 CacheAllocationPtr* verification_buf,
210 bool /*compression_type_missing*/,
211 MemoryAllocator* memory_allocator) {
212 assert(table_properties);
213
214 Slice v = handle_value;
215 BlockHandle handle;
216 if (!handle.DecodeFrom(&v).ok()) {
217 return Status::InvalidArgument("Failed to decode properties block handle");
218 }
219
220 BlockContents block_contents;
221 Status s;
222 PersistentCacheOptions cache_options;
223 ReadOptions ro = read_options;
224 ro.verify_checksums = verify_checksum;
225
226 BlockFetcher block_fetcher(file, prefetch_buffer, footer, ro, handle,
227 &block_contents, ioptions, false /* decompress */,
228 false /*maybe_compressed*/, BlockType::kProperties,
229 UncompressionDict::GetEmptyDict(), cache_options,
230 memory_allocator);
231 s = block_fetcher.ReadBlockContents();
232 // property block is never compressed. Need to add uncompress logic if we are
233 // to compress it..
234
235 if (!s.ok()) {
236 return s;
237 }
238
239 Block properties_block(std::move(block_contents));
240 DataBlockIter iter;
241 properties_block.NewDataIterator(BytewiseComparator(),
242 kDisableGlobalSequenceNumber, &iter);
243
244 auto new_table_properties = new TableProperties();
245 // All pre-defined properties of type uint64_t
246 std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
247 {TablePropertiesNames::kDataSize, &new_table_properties->data_size},
248 {TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
249 {TablePropertiesNames::kIndexPartitions,
250 &new_table_properties->index_partitions},
251 {TablePropertiesNames::kTopLevelIndexSize,
252 &new_table_properties->top_level_index_size},
253 {TablePropertiesNames::kIndexKeyIsUserKey,
254 &new_table_properties->index_key_is_user_key},
255 {TablePropertiesNames::kIndexValueIsDeltaEncoded,
256 &new_table_properties->index_value_is_delta_encoded},
257 {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
258 {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
259 {TablePropertiesNames::kRawValueSize,
260 &new_table_properties->raw_value_size},
261 {TablePropertiesNames::kNumDataBlocks,
262 &new_table_properties->num_data_blocks},
263 {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
264 {TablePropertiesNames::kDeletedKeys,
265 &new_table_properties->num_deletions},
266 {TablePropertiesNames::kMergeOperands,
267 &new_table_properties->num_merge_operands},
268 {TablePropertiesNames::kNumRangeDeletions,
269 &new_table_properties->num_range_deletions},
270 {TablePropertiesNames::kFormatVersion,
271 &new_table_properties->format_version},
272 {TablePropertiesNames::kFixedKeyLen,
273 &new_table_properties->fixed_key_len},
274 {TablePropertiesNames::kColumnFamilyId,
275 &new_table_properties->column_family_id},
276 {TablePropertiesNames::kCreationTime,
277 &new_table_properties->creation_time},
278 {TablePropertiesNames::kOldestKeyTime,
279 &new_table_properties->oldest_key_time},
280 {TablePropertiesNames::kFileCreationTime,
281 &new_table_properties->file_creation_time},
282 };
283
284 std::string last_key;
285 for (iter.SeekToFirstOrReport(); iter.Valid(); iter.NextOrReport()) {
286 s = iter.status();
287 if (!s.ok()) {
288 break;
289 }
290
291 auto key = iter.key().ToString();
292 // properties block should be strictly sorted with no duplicate key.
293 if (!last_key.empty() &&
294 BytewiseComparator()->Compare(key, last_key) <= 0) {
295 s = Status::Corruption("properties unsorted");
296 break;
297 }
298 last_key = key;
299
300 auto raw_val = iter.value();
301 auto pos = predefined_uint64_properties.find(key);
302
303 new_table_properties->properties_offsets.insert(
304 {key, handle.offset() + iter.ValueOffset()});
305
306 if (pos != predefined_uint64_properties.end()) {
307 if (key == TablePropertiesNames::kDeletedKeys ||
308 key == TablePropertiesNames::kMergeOperands) {
309 // Insert in user-collected properties for API backwards compatibility
310 new_table_properties->user_collected_properties.insert(
311 {key, raw_val.ToString()});
312 }
313 // handle predefined rocksdb properties
314 uint64_t val;
315 if (!GetVarint64(&raw_val, &val)) {
316 // skip malformed value
317 auto error_msg =
318 "Detect malformed value in properties meta-block:"
319 "\tkey: " + key + "\tval: " + raw_val.ToString();
320 ROCKS_LOG_ERROR(ioptions.info_log, "%s", error_msg.c_str());
321 continue;
322 }
323 *(pos->second) = val;
324 } else if (key == TablePropertiesNames::kDbId) {
325 new_table_properties->db_id = raw_val.ToString();
326 } else if (key == TablePropertiesNames::kDbSessionId) {
327 new_table_properties->db_session_id = raw_val.ToString();
328 } else if (key == TablePropertiesNames::kDbHostId) {
329 new_table_properties->db_host_id = raw_val.ToString();
330 } else if (key == TablePropertiesNames::kFilterPolicy) {
331 new_table_properties->filter_policy_name = raw_val.ToString();
332 } else if (key == TablePropertiesNames::kColumnFamilyName) {
333 new_table_properties->column_family_name = raw_val.ToString();
334 } else if (key == TablePropertiesNames::kComparator) {
335 new_table_properties->comparator_name = raw_val.ToString();
336 } else if (key == TablePropertiesNames::kMergeOperator) {
337 new_table_properties->merge_operator_name = raw_val.ToString();
338 } else if (key == TablePropertiesNames::kPrefixExtractorName) {
339 new_table_properties->prefix_extractor_name = raw_val.ToString();
340 } else if (key == TablePropertiesNames::kPropertyCollectors) {
341 new_table_properties->property_collectors_names = raw_val.ToString();
342 } else if (key == TablePropertiesNames::kCompression) {
343 new_table_properties->compression_name = raw_val.ToString();
344 } else if (key == TablePropertiesNames::kCompressionOptions) {
345 new_table_properties->compression_options = raw_val.ToString();
346 } else {
347 // handle user-collected properties
348 new_table_properties->user_collected_properties.insert(
349 {key, raw_val.ToString()});
350 }
351 }
352 if (s.ok()) {
353 *table_properties = new_table_properties;
354 if (ret_block_handle != nullptr) {
355 *ret_block_handle = handle;
356 }
357 if (verification_buf != nullptr) {
358 size_t len = static_cast<size_t>(handle.size() + kBlockTrailerSize);
359 *verification_buf =
360 ROCKSDB_NAMESPACE::AllocateBlock(len, memory_allocator);
361 if (verification_buf->get() != nullptr) {
362 memcpy(verification_buf->get(), block_contents.data.data(), len);
363 }
364 }
365 } else {
366 delete new_table_properties;
367 }
368
369 return s;
370 }
371
372 Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
373 uint64_t table_magic_number,
374 const ImmutableCFOptions& ioptions,
375 TableProperties** properties,
376 bool compression_type_missing,
377 MemoryAllocator* memory_allocator,
378 FilePrefetchBuffer* prefetch_buffer) {
379 // -- Read metaindex block
380 Footer footer;
381 IOOptions opts;
382 auto s = ReadFooterFromFile(opts, file, prefetch_buffer, file_size, &footer,
383 table_magic_number);
384 if (!s.ok()) {
385 return s;
386 }
387
388 auto metaindex_handle = footer.metaindex_handle();
389 BlockContents metaindex_contents;
390 ReadOptions read_options;
391 read_options.verify_checksums = false;
392 PersistentCacheOptions cache_options;
393
394 BlockFetcher block_fetcher(
395 file, prefetch_buffer, footer, read_options, metaindex_handle,
396 &metaindex_contents, ioptions, false /* decompress */,
397 false /*maybe_compressed*/, BlockType::kMetaIndex,
398 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
399 s = block_fetcher.ReadBlockContents();
400 if (!s.ok()) {
401 return s;
402 }
403 // property blocks are never compressed. Need to add uncompress logic if we
404 // are to compress it.
405 Block metaindex_block(std::move(metaindex_contents));
406 std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
407 BytewiseComparator(), kDisableGlobalSequenceNumber));
408
409 // -- Read property block
410 bool found_properties_block = true;
411 s = SeekToPropertiesBlock(meta_iter.get(), &found_properties_block);
412 if (!s.ok()) {
413 return s;
414 }
415
416 TableProperties table_properties;
417 if (found_properties_block == true) {
418 s = ReadProperties(
419 read_options, meta_iter->value(), file, prefetch_buffer, footer,
420 ioptions, properties, false /* verify_checksum */,
421 nullptr /* ret_block_hanel */, nullptr /* ret_block_contents */,
422 compression_type_missing, memory_allocator);
423 } else {
424 s = Status::NotFound();
425 }
426
427 return s;
428 }
429
430 Status FindMetaBlock(InternalIterator* meta_index_iter,
431 const std::string& meta_block_name,
432 BlockHandle* block_handle) {
433 meta_index_iter->Seek(meta_block_name);
434 if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
435 meta_index_iter->key() == meta_block_name) {
436 Slice v = meta_index_iter->value();
437 return block_handle->DecodeFrom(&v);
438 } else {
439 return Status::Corruption("Cannot find the meta block", meta_block_name);
440 }
441 }
442
443 Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
444 uint64_t table_magic_number,
445 const ImmutableCFOptions& ioptions,
446 const std::string& meta_block_name,
447 BlockHandle* block_handle,
448 bool /*compression_type_missing*/,
449 MemoryAllocator* memory_allocator) {
450 Footer footer;
451 IOOptions opts;
452 auto s = ReadFooterFromFile(opts, file, nullptr /* prefetch_buffer */,
453 file_size, &footer, table_magic_number);
454 if (!s.ok()) {
455 return s;
456 }
457
458 auto metaindex_handle = footer.metaindex_handle();
459 BlockContents metaindex_contents;
460 ReadOptions read_options;
461 read_options.verify_checksums = false;
462 PersistentCacheOptions cache_options;
463 BlockFetcher block_fetcher(
464 file, nullptr /* prefetch_buffer */, footer, read_options,
465 metaindex_handle, &metaindex_contents, ioptions,
466 false /* do decompression */, false /*maybe_compressed*/,
467 BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options,
468 memory_allocator);
469 s = block_fetcher.ReadBlockContents();
470 if (!s.ok()) {
471 return s;
472 }
473 // meta blocks are never compressed. Need to add uncompress logic if we are to
474 // compress it.
475 Block metaindex_block(std::move(metaindex_contents));
476
477 std::unique_ptr<InternalIterator> meta_iter;
478 meta_iter.reset(metaindex_block.NewDataIterator(
479 BytewiseComparator(), kDisableGlobalSequenceNumber));
480
481 return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
482 }
483
484 Status ReadMetaBlock(RandomAccessFileReader* file,
485 FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
486 uint64_t table_magic_number,
487 const ImmutableCFOptions& ioptions,
488 const std::string& meta_block_name, BlockType block_type,
489 BlockContents* contents, bool /*compression_type_missing*/,
490 MemoryAllocator* memory_allocator) {
491 Status status;
492 Footer footer;
493 IOOptions opts;
494 status = ReadFooterFromFile(opts, file, prefetch_buffer, file_size, &footer,
495 table_magic_number);
496 if (!status.ok()) {
497 return status;
498 }
499
500 // Reading metaindex block
501 auto metaindex_handle = footer.metaindex_handle();
502 BlockContents metaindex_contents;
503 ReadOptions read_options;
504 read_options.verify_checksums = false;
505 PersistentCacheOptions cache_options;
506
507 BlockFetcher block_fetcher(
508 file, prefetch_buffer, footer, read_options, metaindex_handle,
509 &metaindex_contents, ioptions, false /* decompress */,
510 false /*maybe_compressed*/, BlockType::kMetaIndex,
511 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
512 status = block_fetcher.ReadBlockContents();
513 if (!status.ok()) {
514 return status;
515 }
516 // meta block is never compressed. Need to add uncompress logic if we are to
517 // compress it.
518
519 // Finding metablock
520 Block metaindex_block(std::move(metaindex_contents));
521
522 std::unique_ptr<InternalIterator> meta_iter;
523 meta_iter.reset(metaindex_block.NewDataIterator(
524 BytewiseComparator(), kDisableGlobalSequenceNumber));
525
526 BlockHandle block_handle;
527 status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);
528
529 if (!status.ok()) {
530 return status;
531 }
532
533 // Reading metablock
534 BlockFetcher block_fetcher2(
535 file, prefetch_buffer, footer, read_options, block_handle, contents,
536 ioptions, false /* decompress */, false /*maybe_compressed*/, block_type,
537 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
538 return block_fetcher2.ReadBlockContents();
539 }
540
541 } // namespace ROCKSDB_NAMESPACE