1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
13 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
15 #include <malloc_np.h>
20 #include "rocksdb/options.h"
21 #include "rocksdb/slice.h"
22 #include "rocksdb/status.h"
23 #include "rocksdb/table.h"
25 #include "options/cf_options.h"
26 #include "port/port.h" // noexcept
27 #include "table/persistent_cache_options.h"
28 #include "util/file_reader_writer.h"
29 #include "util/memory_allocator.h"
33 class RandomAccessFile
;
36 extern bool ShouldReportDetailedTime(Env
* env
, Statistics
* stats
);
38 // the length of the magic number in bytes.
39 const int kMagicNumberLengthByte
= 8;
41 // BlockHandle is a pointer to the extent of a file that stores a data
42 // block or a meta block.
46 BlockHandle(uint64_t offset
, uint64_t size
);
48 // The offset of the block in the file.
49 uint64_t offset() const { return offset_
; }
50 void set_offset(uint64_t _offset
) { offset_
= _offset
; }
52 // The size of the stored block
53 uint64_t size() const { return size_
; }
54 void set_size(uint64_t _size
) { size_
= _size
; }
56 void EncodeTo(std::string
* dst
) const;
57 Status
DecodeFrom(Slice
* input
);
58 Status
DecodeSizeFrom(uint64_t offset
, Slice
* input
);
60 // Return a string that contains the copy of handle.
61 std::string
ToString(bool hex
= true) const;
63 // if the block handle's offset and size are both "0", we will view it
64 // as a null block handle that points to no where.
65 bool IsNull() const { return offset_
== 0 && size_
== 0; }
67 static const BlockHandle
& NullBlockHandle() { return kNullBlockHandle
; }
69 // Maximum encoding length of a BlockHandle
70 enum { kMaxEncodedLength
= 10 + 10 };
76 static const BlockHandle kNullBlockHandle
;
79 inline uint32_t GetCompressFormatForVersion(CompressionType compression_type
,
82 (void)compression_type
;
84 // snappy is not versioned
85 assert(compression_type
!= kSnappyCompression
&&
86 compression_type
!= kXpressCompression
&&
87 compression_type
!= kNoCompression
);
88 // As of version 2, we encode compressed block with
89 // compress_format_version == 2. Before that, the version is 1.
90 // DO NOT CHANGE THIS FUNCTION, it affects disk format
91 return version
>= 2 ? 2 : 1;
94 inline bool BlockBasedTableSupportedVersion(uint32_t version
) {
98 // Footer encapsulates the fixed information stored at the tail
99 // end of every table file.
102 // Constructs a footer without specifying its table magic number.
103 // In such case, the table magic number of such footer should be
104 // initialized via @ReadFooterFromFile().
105 // Use this when you plan to load Footer with DecodeFrom(). Never use this
106 // when you plan to EncodeTo.
107 Footer() : Footer(kInvalidTableMagicNumber
, 0) {}
109 // Use this constructor when you plan to write out the footer using
110 // EncodeTo(). Never use this constructor with DecodeFrom().
111 Footer(uint64_t table_magic_number
, uint32_t version
);
113 // The version of the footer in this file
114 uint32_t version() const { return version_
; }
116 // The checksum type used in this file
117 ChecksumType
checksum() const { return checksum_
; }
118 void set_checksum(const ChecksumType c
) { checksum_
= c
; }
120 // The block handle for the metaindex block of the table
121 const BlockHandle
& metaindex_handle() const { return metaindex_handle_
; }
122 void set_metaindex_handle(const BlockHandle
& h
) { metaindex_handle_
= h
; }
124 // The block handle for the index block of the table
125 const BlockHandle
& index_handle() const { return index_handle_
; }
127 void set_index_handle(const BlockHandle
& h
) { index_handle_
= h
; }
129 uint64_t table_magic_number() const { return table_magic_number_
; }
131 void EncodeTo(std::string
* dst
) const;
133 // Set the current footer based on the input slice.
135 // REQUIRES: table_magic_number_ is not set (i.e.,
136 // HasInitializedTableMagicNumber() is true). The function will initialize the
138 Status
DecodeFrom(Slice
* input
);
140 // Encoded length of a Footer. Note that the serialization of a Footer will
141 // always occupy at least kMinEncodedLength bytes. If fields are changed
142 // the version number should be incremented and kMaxEncodedLength should be
143 // increased accordingly.
145 // Footer version 0 (legacy) will always occupy exactly this many bytes.
146 // It consists of two block handles, padding, and a magic number.
147 kVersion0EncodedLength
= 2 * BlockHandle::kMaxEncodedLength
+ 8,
148 // Footer of versions 1 and higher will always occupy exactly this many
149 // bytes. It consists of the checksum type, two block handles, padding,
150 // a version number (bigger than 1), and a magic number
151 kNewVersionsEncodedLength
= 1 + 2 * BlockHandle::kMaxEncodedLength
+ 4 + 8,
152 kMinEncodedLength
= kVersion0EncodedLength
,
153 kMaxEncodedLength
= kNewVersionsEncodedLength
,
156 static const uint64_t kInvalidTableMagicNumber
= 0;
158 // convert this object to a human readable form
159 std::string
ToString() const;
162 // REQUIRES: magic number wasn't initialized.
163 void set_table_magic_number(uint64_t magic_number
) {
164 assert(!HasInitializedTableMagicNumber());
165 table_magic_number_
= magic_number
;
168 // return true if @table_magic_number_ is set to a value different
169 // from @kInvalidTableMagicNumber.
170 bool HasInitializedTableMagicNumber() const {
171 return (table_magic_number_
!= kInvalidTableMagicNumber
);
175 ChecksumType checksum_
;
176 BlockHandle metaindex_handle_
;
177 BlockHandle index_handle_
;
178 uint64_t table_magic_number_
= 0;
181 // Read the footer from file
182 // If enforce_table_magic_number != 0, ReadFooterFromFile() will return
183 // corruption if table_magic number is not equal to enforce_table_magic_number
184 Status
ReadFooterFromFile(RandomAccessFileReader
* file
,
185 FilePrefetchBuffer
* prefetch_buffer
,
186 uint64_t file_size
, Footer
* footer
,
187 uint64_t enforce_table_magic_number
= 0);
189 // 1-byte type + 32-bit crc
190 static const size_t kBlockTrailerSize
= 5;
192 inline CompressionType
get_block_compression_type(const char* block_data
,
194 return static_cast<CompressionType
>(block_data
[block_size
]);
197 struct BlockContents
{
198 Slice data
; // Actual contents of data
199 CacheAllocationPtr allocation
;
202 // Whether the block is a raw block, which contains compression type
203 // byte. It is only used for assertion.
204 bool is_raw_block
= false;
209 BlockContents(const Slice
& _data
) : data(_data
) {}
211 BlockContents(CacheAllocationPtr
&& _data
, size_t _size
)
212 : data(_data
.get(), _size
), allocation(std::move(_data
)) {}
214 BlockContents(std::unique_ptr
<char[]>&& _data
, size_t _size
)
215 : data(_data
.get(), _size
) {
216 allocation
.reset(_data
.release());
219 bool own_bytes() const { return allocation
.get() != nullptr; }
221 // It's the caller's responsibility to make sure that this is
222 // for raw block contents, which contains the compression
224 CompressionType
get_compression_type() const {
225 assert(is_raw_block
);
226 return get_block_compression_type(data
.data(), data
.size());
229 // The additional memory space taken by the block data.
230 size_t usable_size() const {
231 if (allocation
.get() != nullptr) {
232 auto allocator
= allocation
.get_deleter().allocator
;
234 return allocator
->UsableSize(allocation
.get(), data
.size());
236 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
237 return malloc_usable_size(allocation
.get());
240 #endif // ROCKSDB_MALLOC_USABLE_SIZE
242 return 0; // no extra memory is occupied by the data
246 size_t ApproximateMemoryUsage() const {
247 return usable_size() + sizeof(*this);
250 BlockContents(BlockContents
&& other
) ROCKSDB_NOEXCEPT
{
251 *this = std::move(other
);
254 BlockContents
& operator=(BlockContents
&& other
) {
255 data
= std::move(other
.data
);
256 allocation
= std::move(other
.allocation
);
258 is_raw_block
= other
.is_raw_block
;
264 // Read the block identified by "handle" from "file". On failure
265 // return non-OK. On success fill *result and return OK.
266 extern Status
ReadBlockContents(
267 RandomAccessFileReader
* file
, FilePrefetchBuffer
* prefetch_buffer
,
268 const Footer
& footer
, const ReadOptions
& options
, const BlockHandle
& handle
,
269 BlockContents
* contents
, const ImmutableCFOptions
& ioptions
,
270 bool do_uncompress
= true, const Slice
& compression_dict
= Slice(),
271 const PersistentCacheOptions
& cache_options
= PersistentCacheOptions());
273 // The 'data' points to the raw block contents read in from file.
274 // This method allocates a new heap buffer and the raw block
275 // contents are uncompresed into this buffer. This buffer is
276 // returned via 'result' and it is upto the caller to
278 // For description of compress_format_version and possible values, see
279 // util/compression.h
280 extern Status
UncompressBlockContents(const UncompressionInfo
& info
,
281 const char* data
, size_t n
,
282 BlockContents
* contents
,
283 uint32_t compress_format_version
,
284 const ImmutableCFOptions
& ioptions
,
285 MemoryAllocator
* allocator
= nullptr);
287 // This is an extension to UncompressBlockContents that accepts
288 // a specific compression type. This is used by un-wrapped blocks
289 // with no compression header.
290 extern Status
UncompressBlockContentsForCompressionType(
291 const UncompressionInfo
& info
, const char* data
, size_t n
,
292 BlockContents
* contents
, uint32_t compress_format_version
,
293 const ImmutableCFOptions
& ioptions
, MemoryAllocator
* allocator
= nullptr);
295 // Implementation details follow. Clients should ignore,
297 // TODO(andrewkr): we should prefer one way of representing a null/uninitialized
298 // BlockHandle. Currently we use zeros for null and use negation-of-zeros for
300 inline BlockHandle::BlockHandle()
301 : BlockHandle(~static_cast<uint64_t>(0), ~static_cast<uint64_t>(0)) {}
303 inline BlockHandle::BlockHandle(uint64_t _offset
, uint64_t _size
)
304 : offset_(_offset
), size_(_size
) {}
306 } // namespace rocksdb