1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "table/block_based_table_builder.h"
20 #include <unordered_map>
23 #include "db/dbformat.h"
25 #include "rocksdb/cache.h"
26 #include "rocksdb/comparator.h"
27 #include "rocksdb/env.h"
28 #include "rocksdb/filter_policy.h"
29 #include "rocksdb/flush_block_policy.h"
30 #include "rocksdb/merge_operator.h"
31 #include "rocksdb/table.h"
33 #include "table/block.h"
34 #include "table/block_based_filter_block.h"
35 #include "table/block_based_table_factory.h"
36 #include "table/block_based_table_reader.h"
37 #include "table/block_builder.h"
38 #include "table/filter_block.h"
39 #include "table/format.h"
40 #include "table/full_filter_block.h"
41 #include "table/meta_blocks.h"
42 #include "table/table_builder.h"
44 #include "util/string_util.h"
45 #include "util/coding.h"
46 #include "util/compression.h"
47 #include "util/crc32c.h"
48 #include "util/stop_watch.h"
49 #include "util/xxhash.h"
51 #include "table/index_builder.h"
52 #include "table/partitioned_filter_block.h"
56 extern const std::string kHashIndexPrefixesBlock
;
57 extern const std::string kHashIndexPrefixesMetadataBlock
;
59 typedef BlockBasedTableOptions::IndexType IndexType
;
61 // Without anonymous namespace here, we fail the warning -Wmissing-prototypes
64 // Create a filter block builder based on its type.
65 FilterBlockBuilder
* CreateFilterBlockBuilder(
66 const ImmutableCFOptions
& opt
, const BlockBasedTableOptions
& table_opt
,
67 PartitionedIndexBuilder
* const p_index_builder
) {
68 if (table_opt
.filter_policy
== nullptr) return nullptr;
70 FilterBitsBuilder
* filter_bits_builder
=
71 table_opt
.filter_policy
->GetFilterBitsBuilder();
72 if (filter_bits_builder
== nullptr) {
73 return new BlockBasedFilterBlockBuilder(opt
.prefix_extractor
, table_opt
);
75 if (table_opt
.partition_filters
) {
76 assert(p_index_builder
!= nullptr);
77 return new PartitionedFilterBlockBuilder(
78 opt
.prefix_extractor
, table_opt
.whole_key_filtering
,
79 filter_bits_builder
, table_opt
.index_block_restart_interval
,
82 return new FullFilterBlockBuilder(opt
.prefix_extractor
,
83 table_opt
.whole_key_filtering
,
89 bool GoodCompressionRatio(size_t compressed_size
, size_t raw_size
) {
90 // Check to see if compressed less than 12.5%
91 return compressed_size
< raw_size
- (raw_size
/ 8u);
96 // format_version is the block format as defined in include/rocksdb/table.h
97 Slice
CompressBlock(const Slice
& raw
,
98 const CompressionOptions
& compression_options
,
99 CompressionType
* type
, uint32_t format_version
,
100 const Slice
& compression_dict
,
101 std::string
* compressed_output
) {
102 if (*type
== kNoCompression
) {
106 // Will return compressed block contents if (1) the compression method is
107 // supported in this platform and (2) the compression rate is "good enough".
109 case kSnappyCompression
:
110 if (Snappy_Compress(compression_options
, raw
.data(), raw
.size(),
111 compressed_output
) &&
112 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
113 return *compressed_output
;
115 break; // fall back to no compression.
116 case kZlibCompression
:
119 GetCompressFormatForVersion(kZlibCompression
, format_version
),
120 raw
.data(), raw
.size(), compressed_output
, compression_dict
) &&
121 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
122 return *compressed_output
;
124 break; // fall back to no compression.
125 case kBZip2Compression
:
128 GetCompressFormatForVersion(kBZip2Compression
, format_version
),
129 raw
.data(), raw
.size(), compressed_output
) &&
130 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
131 return *compressed_output
;
133 break; // fall back to no compression.
134 case kLZ4Compression
:
137 GetCompressFormatForVersion(kLZ4Compression
, format_version
),
138 raw
.data(), raw
.size(), compressed_output
, compression_dict
) &&
139 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
140 return *compressed_output
;
142 break; // fall back to no compression.
143 case kLZ4HCCompression
:
146 GetCompressFormatForVersion(kLZ4HCCompression
, format_version
),
147 raw
.data(), raw
.size(), compressed_output
, compression_dict
) &&
148 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
149 return *compressed_output
;
151 break; // fall back to no compression.
152 case kXpressCompression
:
153 if (XPRESS_Compress(raw
.data(), raw
.size(),
154 compressed_output
) &&
155 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
156 return *compressed_output
;
160 case kZSTDNotFinalCompression
:
161 if (ZSTD_Compress(compression_options
, raw
.data(), raw
.size(),
162 compressed_output
, compression_dict
) &&
163 GoodCompressionRatio(compressed_output
->size(), raw
.size())) {
164 return *compressed_output
;
166 break; // fall back to no compression.
167 default: {} // Do not recognize this compression type
170 // Compression method is not supported, or not good compression ratio, so just
171 // fall back to uncompressed form.
172 *type
= kNoCompression
;
176 // kBlockBasedTableMagicNumber was picked by running
177 // echo rocksdb.table.block_based | sha1sum
178 // and taking the leading 64 bits.
179 // Please note that kBlockBasedTableMagicNumber may also be accessed by other
181 // for that reason we declare it extern in the header but to get the space
183 // it must be not extern in one place.
184 const uint64_t kBlockBasedTableMagicNumber
= 0x88e241b785f4cff7ull
;
185 // We also support reading and writing legacy block based table format (for
186 // backwards compatibility)
187 const uint64_t kLegacyBlockBasedTableMagicNumber
= 0xdb4775248b80fb57ull
;
189 // A collector that collects properties of interest to block-based table.
190 // For now this class looks heavy-weight since we only write one additional
192 // But in the foreseeable future, we will add more and more properties that are
193 // specific to block-based table.
194 class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
195 : public IntTblPropCollector
{
197 explicit BlockBasedTablePropertiesCollector(
198 BlockBasedTableOptions::IndexType index_type
, bool whole_key_filtering
,
199 bool prefix_filtering
)
200 : index_type_(index_type
),
201 whole_key_filtering_(whole_key_filtering
),
202 prefix_filtering_(prefix_filtering
) {}
204 virtual Status
InternalAdd(const Slice
& key
, const Slice
& value
,
205 uint64_t file_size
) override
{
206 // Intentionally left blank. Have no interest in collecting stats for
207 // individual key/value pairs.
211 virtual Status
Finish(UserCollectedProperties
* properties
) override
{
213 PutFixed32(&val
, static_cast<uint32_t>(index_type_
));
214 properties
->insert({BlockBasedTablePropertyNames::kIndexType
, val
});
215 properties
->insert({BlockBasedTablePropertyNames::kWholeKeyFiltering
,
216 whole_key_filtering_
? kPropTrue
: kPropFalse
});
217 properties
->insert({BlockBasedTablePropertyNames::kPrefixFiltering
,
218 prefix_filtering_
? kPropTrue
: kPropFalse
});
222 // The name of the properties collector can be used for debugging purpose.
223 virtual const char* Name() const override
{
224 return "BlockBasedTablePropertiesCollector";
227 virtual UserCollectedProperties
GetReadableProperties() const override
{
228 // Intentionally left blank.
229 return UserCollectedProperties();
233 BlockBasedTableOptions::IndexType index_type_
;
234 bool whole_key_filtering_
;
235 bool prefix_filtering_
;
238 struct BlockBasedTableBuilder::Rep
{
239 const ImmutableCFOptions ioptions
;
240 const BlockBasedTableOptions table_options
;
241 const InternalKeyComparator
& internal_comparator
;
242 WritableFileWriter
* file
;
245 BlockBuilder data_block
;
246 BlockBuilder range_del_block
;
248 InternalKeySliceTransform internal_prefix_transform
;
249 std::unique_ptr
<IndexBuilder
> index_builder
;
251 std::string last_key
;
252 const CompressionType compression_type
;
253 const CompressionOptions compression_opts
;
254 // Data for presetting the compression library's dictionary, or nullptr.
255 const std::string
* compression_dict
;
256 TableProperties props
;
258 bool closed
= false; // Either Finish() or Abandon() has been called.
259 std::unique_ptr
<FilterBlockBuilder
> filter_builder
;
260 char compressed_cache_key_prefix
[BlockBasedTable::kMaxCacheKeyPrefixSize
];
261 size_t compressed_cache_key_prefix_size
;
263 BlockHandle pending_handle
; // Handle to add to index block
265 std::string compressed_output
;
266 std::unique_ptr
<FlushBlockPolicy
> flush_block_policy
;
267 uint32_t column_family_id
;
268 const std::string
& column_family_name
;
270 std::vector
<std::unique_ptr
<IntTblPropCollector
>> table_properties_collectors
;
272 Rep(const ImmutableCFOptions
& _ioptions
,
273 const BlockBasedTableOptions
& table_opt
,
274 const InternalKeyComparator
& icomparator
,
275 const std::vector
<std::unique_ptr
<IntTblPropCollectorFactory
>>*
276 int_tbl_prop_collector_factories
,
277 uint32_t _column_family_id
, WritableFileWriter
* f
,
278 const CompressionType _compression_type
,
279 const CompressionOptions
& _compression_opts
,
280 const std::string
* _compression_dict
, const bool skip_filters
,
281 const std::string
& _column_family_name
)
282 : ioptions(_ioptions
),
283 table_options(table_opt
),
284 internal_comparator(icomparator
),
286 data_block(table_options
.block_restart_interval
,
287 table_options
.use_delta_encoding
),
288 range_del_block(1), // TODO(andrewkr): restart_interval unnecessary
289 internal_prefix_transform(_ioptions
.prefix_extractor
),
290 compression_type(_compression_type
),
291 compression_opts(_compression_opts
),
292 compression_dict(_compression_dict
),
294 table_options
.flush_block_policy_factory
->NewFlushBlockPolicy(
295 table_options
, data_block
)),
296 column_family_id(_column_family_id
),
297 column_family_name(_column_family_name
) {
298 PartitionedIndexBuilder
* p_index_builder
= nullptr;
299 if (table_options
.index_type
==
300 BlockBasedTableOptions::kTwoLevelIndexSearch
) {
301 p_index_builder
= PartitionedIndexBuilder::CreateIndexBuilder(
302 &internal_comparator
, table_options
);
303 index_builder
.reset(p_index_builder
);
305 index_builder
.reset(IndexBuilder::CreateIndexBuilder(
306 table_options
.index_type
, &internal_comparator
,
307 &this->internal_prefix_transform
, table_options
));
310 filter_builder
= nullptr;
312 filter_builder
.reset(
313 CreateFilterBlockBuilder(_ioptions
, table_options
, p_index_builder
));
316 for (auto& collector_factories
: *int_tbl_prop_collector_factories
) {
317 table_properties_collectors
.emplace_back(
318 collector_factories
->CreateIntTblPropCollector(column_family_id
));
320 table_properties_collectors
.emplace_back(
321 new BlockBasedTablePropertiesCollector(
322 table_options
.index_type
, table_options
.whole_key_filtering
,
323 _ioptions
.prefix_extractor
!= nullptr));
327 BlockBasedTableBuilder::BlockBasedTableBuilder(
328 const ImmutableCFOptions
& ioptions
,
329 const BlockBasedTableOptions
& table_options
,
330 const InternalKeyComparator
& internal_comparator
,
331 const std::vector
<std::unique_ptr
<IntTblPropCollectorFactory
>>*
332 int_tbl_prop_collector_factories
,
333 uint32_t column_family_id
, WritableFileWriter
* file
,
334 const CompressionType compression_type
,
335 const CompressionOptions
& compression_opts
,
336 const std::string
* compression_dict
, const bool skip_filters
,
337 const std::string
& column_family_name
) {
338 BlockBasedTableOptions
sanitized_table_options(table_options
);
339 if (sanitized_table_options
.format_version
== 0 &&
340 sanitized_table_options
.checksum
!= kCRC32c
) {
343 "Silently converting format_version to 1 because checksum is "
345 // silently convert format_version to 1 to keep consistent with current
347 sanitized_table_options
.format_version
= 1;
350 rep_
= new Rep(ioptions
, sanitized_table_options
, internal_comparator
,
351 int_tbl_prop_collector_factories
, column_family_id
, file
,
352 compression_type
, compression_opts
, compression_dict
,
353 skip_filters
, column_family_name
);
355 if (rep_
->filter_builder
!= nullptr) {
356 rep_
->filter_builder
->StartBlock(0);
358 if (table_options
.block_cache_compressed
.get() != nullptr) {
359 BlockBasedTable::GenerateCachePrefix(
360 table_options
.block_cache_compressed
.get(), file
->writable_file(),
361 &rep_
->compressed_cache_key_prefix
[0],
362 &rep_
->compressed_cache_key_prefix_size
);
366 BlockBasedTableBuilder::~BlockBasedTableBuilder() {
367 assert(rep_
->closed
); // Catch errors where caller forgot to call Finish()
371 void BlockBasedTableBuilder::Add(const Slice
& key
, const Slice
& value
) {
375 ValueType value_type
= ExtractValueType(key
);
376 if (IsValueType(value_type
)) {
377 if (r
->props
.num_entries
> 0) {
378 assert(r
->internal_comparator
.Compare(key
, Slice(r
->last_key
)) > 0);
381 auto should_flush
= r
->flush_block_policy
->Update(key
, value
);
383 assert(!r
->data_block
.empty());
386 // Add item to index block.
387 // We do not emit the index entry for a block until we have seen the
388 // first key for the next data block. This allows us to use shorter
389 // keys in the index block. For example, consider a block boundary
390 // between the keys "the quick brown fox" and "the who". We can use
391 // "the r" as the key for the index block entry since it is >= all
392 // entries in the first block and < all entries in subsequent
395 r
->index_builder
->AddIndexEntry(&r
->last_key
, &key
, r
->pending_handle
);
399 // Note: PartitionedFilterBlockBuilder requires key being added to filter
400 // builder after being added to index builder.
401 if (r
->filter_builder
!= nullptr) {
402 r
->filter_builder
->Add(ExtractUserKey(key
));
405 r
->last_key
.assign(key
.data(), key
.size());
406 r
->data_block
.Add(key
, value
);
407 r
->props
.num_entries
++;
408 r
->props
.raw_key_size
+= key
.size();
409 r
->props
.raw_value_size
+= value
.size();
411 r
->index_builder
->OnKeyAdded(key
);
412 NotifyCollectTableCollectorsOnAdd(key
, value
, r
->offset
,
413 r
->table_properties_collectors
,
414 r
->ioptions
.info_log
);
416 } else if (value_type
== kTypeRangeDeletion
) {
417 // TODO(wanning&andrewkr) add num_tomestone to table properties
418 r
->range_del_block
.Add(key
, value
);
419 ++r
->props
.num_entries
;
420 r
->props
.raw_key_size
+= key
.size();
421 r
->props
.raw_value_size
+= value
.size();
422 NotifyCollectTableCollectorsOnAdd(key
, value
, r
->offset
,
423 r
->table_properties_collectors
,
424 r
->ioptions
.info_log
);
430 void BlockBasedTableBuilder::Flush() {
434 if (r
->data_block
.empty()) return;
435 WriteBlock(&r
->data_block
, &r
->pending_handle
, true /* is_data_block */);
436 if (r
->filter_builder
!= nullptr) {
437 r
->filter_builder
->StartBlock(r
->offset
);
439 r
->props
.data_size
= r
->offset
;
440 ++r
->props
.num_data_blocks
;
443 void BlockBasedTableBuilder::WriteBlock(BlockBuilder
* block
,
445 bool is_data_block
) {
446 WriteBlock(block
->Finish(), handle
, is_data_block
);
450 void BlockBasedTableBuilder::WriteBlock(const Slice
& raw_block_contents
,
452 bool is_data_block
) {
453 // File format contains a sequence of blocks where each block has:
454 // block_data: uint8[n]
460 auto type
= r
->compression_type
;
461 Slice block_contents
;
462 bool abort_compression
= false;
464 StopWatchNano
timer(r
->ioptions
.env
,
465 ShouldReportDetailedTime(r
->ioptions
.env
, r
->ioptions
.statistics
));
467 if (raw_block_contents
.size() < kCompressionSizeLimit
) {
468 Slice compression_dict
;
469 if (is_data_block
&& r
->compression_dict
&& r
->compression_dict
->size()) {
470 compression_dict
= *r
->compression_dict
;
473 block_contents
= CompressBlock(raw_block_contents
, r
->compression_opts
,
474 &type
, r
->table_options
.format_version
,
475 compression_dict
, &r
->compressed_output
);
477 // Some of the compression algorithms are known to be unreliable. If
478 // the verify_compression flag is set then try to de-compress the
479 // compressed data and compare to the input.
480 if (type
!= kNoCompression
&& r
->table_options
.verify_compression
) {
481 // Retrieve the uncompressed contents into a new buffer
482 BlockContents contents
;
483 Status stat
= UncompressBlockContentsForCompressionType(
484 block_contents
.data(), block_contents
.size(), &contents
,
485 r
->table_options
.format_version
, compression_dict
, type
,
489 bool compressed_ok
= contents
.data
.compare(raw_block_contents
) == 0;
490 if (!compressed_ok
) {
491 // The result of the compression was invalid. abort.
492 abort_compression
= true;
493 ROCKS_LOG_ERROR(r
->ioptions
.info_log
,
494 "Decompressed block did not match raw block");
496 Status::Corruption("Decompressed block did not match raw block");
499 // Decompression reported an error. abort.
500 r
->status
= Status::Corruption("Could not decompress");
501 abort_compression
= true;
505 // Block is too big to be compressed.
506 abort_compression
= true;
509 // Abort compression if the block is too big, or did not pass
511 if (abort_compression
) {
512 RecordTick(r
->ioptions
.statistics
, NUMBER_BLOCK_NOT_COMPRESSED
);
513 type
= kNoCompression
;
514 block_contents
= raw_block_contents
;
515 } else if (type
!= kNoCompression
&&
516 ShouldReportDetailedTime(r
->ioptions
.env
,
517 r
->ioptions
.statistics
)) {
518 MeasureTime(r
->ioptions
.statistics
, COMPRESSION_TIMES_NANOS
,
519 timer
.ElapsedNanos());
520 MeasureTime(r
->ioptions
.statistics
, BYTES_COMPRESSED
,
521 raw_block_contents
.size());
522 RecordTick(r
->ioptions
.statistics
, NUMBER_BLOCK_COMPRESSED
);
525 WriteRawBlock(block_contents
, type
, handle
);
526 r
->compressed_output
.clear();
529 void BlockBasedTableBuilder::WriteRawBlock(const Slice
& block_contents
,
530 CompressionType type
,
531 BlockHandle
* handle
) {
533 StopWatch
sw(r
->ioptions
.env
, r
->ioptions
.statistics
, WRITE_RAW_BLOCK_MICROS
);
534 handle
->set_offset(r
->offset
);
535 handle
->set_size(block_contents
.size());
536 r
->status
= r
->file
->Append(block_contents
);
537 if (r
->status
.ok()) {
538 char trailer
[kBlockTrailerSize
];
540 char* trailer_without_type
= trailer
+ 1;
541 switch (r
->table_options
.checksum
) {
543 // we don't support no checksum yet
545 // intentional fallthrough
547 auto crc
= crc32c::Value(block_contents
.data(), block_contents
.size());
548 crc
= crc32c::Extend(crc
, trailer
, 1); // Extend to cover block type
549 EncodeFixed32(trailer_without_type
, crc32c::Mask(crc
));
553 void* xxh
= XXH32_init(0);
554 XXH32_update(xxh
, block_contents
.data(),
555 static_cast<uint32_t>(block_contents
.size()));
556 XXH32_update(xxh
, trailer
, 1); // Extend to cover block type
557 EncodeFixed32(trailer_without_type
, XXH32_digest(xxh
));
562 r
->status
= r
->file
->Append(Slice(trailer
, kBlockTrailerSize
));
563 if (r
->status
.ok()) {
564 r
->status
= InsertBlockInCache(block_contents
, type
, handle
);
566 if (r
->status
.ok()) {
567 r
->offset
+= block_contents
.size() + kBlockTrailerSize
;
572 Status
BlockBasedTableBuilder::status() const {
576 static void DeleteCachedBlock(const Slice
& key
, void* value
) {
577 Block
* block
= reinterpret_cast<Block
*>(value
);
582 // Make a copy of the block contents and insert into compressed block cache
584 Status
BlockBasedTableBuilder::InsertBlockInCache(const Slice
& block_contents
,
585 const CompressionType type
,
586 const BlockHandle
* handle
) {
588 Cache
* block_cache_compressed
= r
->table_options
.block_cache_compressed
.get();
590 if (type
!= kNoCompression
&& block_cache_compressed
!= nullptr) {
592 size_t size
= block_contents
.size();
594 std::unique_ptr
<char[]> ubuf(new char[size
+ 1]);
595 memcpy(ubuf
.get(), block_contents
.data(), size
);
598 BlockContents
results(std::move(ubuf
), size
, true, type
);
600 Block
* block
= new Block(std::move(results
), kDisableGlobalSequenceNumber
);
602 // make cache key by appending the file offset to the cache prefix id
603 char* end
= EncodeVarint64(
604 r
->compressed_cache_key_prefix
+
605 r
->compressed_cache_key_prefix_size
,
607 Slice
key(r
->compressed_cache_key_prefix
, static_cast<size_t>
608 (end
- r
->compressed_cache_key_prefix
));
610 // Insert into compressed block cache.
611 block_cache_compressed
->Insert(key
, block
, block
->usable_size(),
614 // Invalidate OS cache.
615 r
->file
->InvalidateCache(static_cast<size_t>(r
->offset
), size
);
620 Status
BlockBasedTableBuilder::Finish() {
622 bool empty_data_block
= r
->data_block
.empty();
627 // To make sure properties block is able to keep the accurate size of index
628 // block, we will finish writing all index entries here and flush them
629 // to storage after metaindex block is written.
630 if (ok() && !empty_data_block
) {
631 r
->index_builder
->AddIndexEntry(
632 &r
->last_key
, nullptr /* no next data block */, r
->pending_handle
);
635 BlockHandle filter_block_handle
, metaindex_block_handle
, index_block_handle
,
636 compression_dict_block_handle
, range_del_block_handle
;
637 // Write filter block
638 if (ok() && r
->filter_builder
!= nullptr) {
639 Status s
= Status::Incomplete();
640 while (s
.IsIncomplete()) {
641 Slice filter_content
= r
->filter_builder
->Finish(filter_block_handle
, &s
);
642 assert(s
.ok() || s
.IsIncomplete());
643 r
->props
.filter_size
+= filter_content
.size();
644 WriteRawBlock(filter_content
, kNoCompression
, &filter_block_handle
);
648 IndexBuilder::IndexBlocks index_blocks
;
649 auto index_builder_status
= r
->index_builder
->Finish(&index_blocks
);
650 if (index_builder_status
.IsIncomplete()) {
651 // We we have more than one index partition then meta_blocks are not
652 // supported for the index. Currently meta_blocks are used only by
653 // HashIndexBuilder which is not multi-partition.
654 assert(index_blocks
.meta_blocks
.empty());
655 } else if (!index_builder_status
.ok()) {
656 return index_builder_status
;
659 // Write meta blocks and metaindex block with the following order.
660 // 1. [meta block: filter]
661 // 2. [meta block: properties]
662 // 3. [meta block: compression dictionary]
663 // 4. [meta block: range deletion tombstone]
664 // 5. [metaindex block]
666 MetaIndexBuilder meta_index_builder
;
667 for (const auto& item
: index_blocks
.meta_blocks
) {
668 BlockHandle block_handle
;
669 WriteBlock(item
.second
, &block_handle
, false /* is_data_block */);
670 meta_index_builder
.Add(item
.first
, block_handle
);
674 if (r
->filter_builder
!= nullptr) {
675 // Add mapping from "<filter_block_prefix>.Name" to location
678 if (r
->filter_builder
->IsBlockBased()) {
679 key
= BlockBasedTable::kFilterBlockPrefix
;
681 key
= r
->table_options
.partition_filters
682 ? BlockBasedTable::kPartitionedFilterBlockPrefix
683 : BlockBasedTable::kFullFilterBlockPrefix
;
685 key
.append(r
->table_options
.filter_policy
->Name());
686 meta_index_builder
.Add(key
, filter_block_handle
);
689 // Write properties and compression dictionary blocks.
691 PropertyBlockBuilder property_block_builder
;
692 r
->props
.column_family_id
= r
->column_family_id
;
693 r
->props
.column_family_name
= r
->column_family_name
;
694 r
->props
.filter_policy_name
= r
->table_options
.filter_policy
!= nullptr ?
695 r
->table_options
.filter_policy
->Name() : "";
696 r
->props
.index_size
=
697 r
->index_builder
->EstimatedSize() + kBlockTrailerSize
;
698 r
->props
.comparator_name
= r
->ioptions
.user_comparator
!= nullptr
699 ? r
->ioptions
.user_comparator
->Name()
701 r
->props
.merge_operator_name
= r
->ioptions
.merge_operator
!= nullptr
702 ? r
->ioptions
.merge_operator
->Name()
704 r
->props
.compression_name
= CompressionTypeToString(r
->compression_type
);
705 r
->props
.prefix_extractor_name
=
706 r
->ioptions
.prefix_extractor
!= nullptr
707 ? r
->ioptions
.prefix_extractor
->Name()
710 std::string property_collectors_names
= "[";
711 property_collectors_names
= "[";
713 i
< r
->ioptions
.table_properties_collector_factories
.size(); ++i
) {
715 property_collectors_names
+= ",";
717 property_collectors_names
+=
718 r
->ioptions
.table_properties_collector_factories
[i
]->Name();
720 property_collectors_names
+= "]";
721 r
->props
.property_collectors_names
= property_collectors_names
;
723 // Add basic properties
724 property_block_builder
.AddTableProperty(r
->props
);
726 // Add use collected properties
727 NotifyCollectTableCollectorsOnFinish(r
->table_properties_collectors
,
728 r
->ioptions
.info_log
,
729 &property_block_builder
);
731 BlockHandle properties_block_handle
;
733 property_block_builder
.Finish(),
735 &properties_block_handle
737 meta_index_builder
.Add(kPropertiesBlock
, properties_block_handle
);
739 // Write compression dictionary block
740 if (r
->compression_dict
&& r
->compression_dict
->size()) {
741 WriteRawBlock(*r
->compression_dict
, kNoCompression
,
742 &compression_dict_block_handle
);
743 meta_index_builder
.Add(kCompressionDictBlock
,
744 compression_dict_block_handle
);
746 } // end of properties/compression dictionary block writing
748 if (ok() && !r
->range_del_block
.empty()) {
749 WriteRawBlock(r
->range_del_block
.Finish(), kNoCompression
,
750 &range_del_block_handle
);
751 meta_index_builder
.Add(kRangeDelBlock
, range_del_block_handle
);
752 } // range deletion tombstone meta block
757 // flush the meta index block
758 WriteRawBlock(meta_index_builder
.Finish(), kNoCompression
,
759 &metaindex_block_handle
);
761 const bool is_data_block
= true;
762 WriteBlock(index_blocks
.index_block_contents
, &index_block_handle
,
764 // If there are more index partitions, finish them and write them out
765 Status
& s
= index_builder_status
;
766 while (s
.IsIncomplete()) {
767 s
= r
->index_builder
->Finish(&index_blocks
, index_block_handle
);
768 if (!s
.ok() && !s
.IsIncomplete()) {
771 WriteBlock(index_blocks
.index_block_contents
, &index_block_handle
,
773 // The last index_block_handle will be for the partition index block
779 // No need to write out new footer if we're using default checksum.
780 // We're writing legacy magic number because we want old versions of RocksDB
781 // be able to read files generated with new release (just in case if
782 // somebody wants to roll back after an upgrade)
783 // TODO(icanadi) at some point in the future, when we're absolutely sure
784 // nobody will roll back to RocksDB 2.x versions, retire the legacy magic
785 // number and always write new table files with new magic number
786 bool legacy
= (r
->table_options
.format_version
== 0);
787 // this is guaranteed by BlockBasedTableBuilder's constructor
788 assert(r
->table_options
.checksum
== kCRC32c
||
789 r
->table_options
.format_version
!= 0);
790 Footer
footer(legacy
? kLegacyBlockBasedTableMagicNumber
791 : kBlockBasedTableMagicNumber
,
792 r
->table_options
.format_version
);
793 footer
.set_metaindex_handle(metaindex_block_handle
);
794 footer
.set_index_handle(index_block_handle
);
795 footer
.set_checksum(r
->table_options
.checksum
);
796 std::string footer_encoding
;
797 footer
.EncodeTo(&footer_encoding
);
798 r
->status
= r
->file
->Append(footer_encoding
);
799 if (r
->status
.ok()) {
800 r
->offset
+= footer_encoding
.size();
807 void BlockBasedTableBuilder::Abandon() {
813 uint64_t BlockBasedTableBuilder::NumEntries() const {
814 return rep_
->props
.num_entries
;
817 uint64_t BlockBasedTableBuilder::FileSize() const {
821 bool BlockBasedTableBuilder::NeedCompact() const {
822 for (const auto& collector
: rep_
->table_properties_collectors
) {
823 if (collector
->NeedCompact()) {
830 TableProperties
BlockBasedTableBuilder::GetTableProperties() const {
831 TableProperties ret
= rep_
->props
;
832 for (const auto& collector
: rep_
->table_properties_collectors
) {
833 for (const auto& prop
: collector
->GetReadableProperties()) {
834 ret
.readable_properties
.insert(prop
);
836 collector
->Finish(&ret
.user_collected_properties
);
841 const std::string
BlockBasedTable::kFilterBlockPrefix
= "filter.";
842 const std::string
BlockBasedTable::kFullFilterBlockPrefix
= "fullfilter.";
843 const std::string
BlockBasedTable::kPartitionedFilterBlockPrefix
=
844 "partitionedfilter.";
845 } // namespace rocksdb