]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #pragma once | |
7c673cae | 11 | #include <stdint.h> |
11fdf7f2 TL |
12 | #include <string> |
13 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE | |
14 | #ifdef OS_FREEBSD | |
15 | #include <malloc_np.h> | |
16 | #else | |
17 | #include <malloc.h> | |
18 | #endif | |
19 | #endif | |
20 | #include "rocksdb/options.h" | |
7c673cae FG |
21 | #include "rocksdb/slice.h" |
22 | #include "rocksdb/status.h" | |
7c673cae FG |
23 | #include "rocksdb/table.h" |
24 | ||
25 | #include "options/cf_options.h" | |
26 | #include "port/port.h" // noexcept | |
27 | #include "table/persistent_cache_options.h" | |
11fdf7f2 | 28 | #include "util/file_reader_writer.h" |
494da23a | 29 | #include "util/memory_allocator.h" |
7c673cae FG |
30 | |
31 | namespace rocksdb { | |
32 | ||
7c673cae FG |
33 | class RandomAccessFile; |
34 | struct ReadOptions; | |
35 | ||
36 | extern bool ShouldReportDetailedTime(Env* env, Statistics* stats); | |
37 | ||
38 | // the length of the magic number in bytes. | |
39 | const int kMagicNumberLengthByte = 8; | |
40 | ||
41 | // BlockHandle is a pointer to the extent of a file that stores a data | |
42 | // block or a meta block. | |
43 | class BlockHandle { | |
44 | public: | |
45 | BlockHandle(); | |
46 | BlockHandle(uint64_t offset, uint64_t size); | |
47 | ||
48 | // The offset of the block in the file. | |
49 | uint64_t offset() const { return offset_; } | |
50 | void set_offset(uint64_t _offset) { offset_ = _offset; } | |
51 | ||
52 | // The size of the stored block | |
53 | uint64_t size() const { return size_; } | |
54 | void set_size(uint64_t _size) { size_ = _size; } | |
55 | ||
56 | void EncodeTo(std::string* dst) const; | |
57 | Status DecodeFrom(Slice* input); | |
11fdf7f2 | 58 | Status DecodeSizeFrom(uint64_t offset, Slice* input); |
7c673cae FG |
59 | |
60 | // Return a string that contains the copy of handle. | |
61 | std::string ToString(bool hex = true) const; | |
62 | ||
63 | // if the block handle's offset and size are both "0", we will view it | |
64 | // as a null block handle that points to no where. | |
11fdf7f2 | 65 | bool IsNull() const { return offset_ == 0 && size_ == 0; } |
7c673cae | 66 | |
11fdf7f2 | 67 | static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; } |
7c673cae FG |
68 | |
69 | // Maximum encoding length of a BlockHandle | |
70 | enum { kMaxEncodedLength = 10 + 10 }; | |
71 | ||
72 | private: | |
73 | uint64_t offset_; | |
74 | uint64_t size_; | |
75 | ||
76 | static const BlockHandle kNullBlockHandle; | |
77 | }; | |
78 | ||
494da23a TL |
79 | inline uint32_t GetCompressFormatForVersion(CompressionType compression_type, |
80 | uint32_t version) { | |
11fdf7f2 TL |
81 | #ifdef NDEBUG |
82 | (void)compression_type; | |
83 | #endif | |
7c673cae FG |
84 | // snappy is not versioned |
85 | assert(compression_type != kSnappyCompression && | |
86 | compression_type != kXpressCompression && | |
87 | compression_type != kNoCompression); | |
88 | // As of version 2, we encode compressed block with | |
89 | // compress_format_version == 2. Before that, the version is 1. | |
90 | // DO NOT CHANGE THIS FUNCTION, it affects disk format | |
91 | return version >= 2 ? 2 : 1; | |
92 | } | |
93 | ||
94 | inline bool BlockBasedTableSupportedVersion(uint32_t version) { | |
11fdf7f2 | 95 | return version <= 4; |
7c673cae FG |
96 | } |
97 | ||
98 | // Footer encapsulates the fixed information stored at the tail | |
99 | // end of every table file. | |
100 | class Footer { | |
101 | public: | |
102 | // Constructs a footer without specifying its table magic number. | |
103 | // In such case, the table magic number of such footer should be | |
104 | // initialized via @ReadFooterFromFile(). | |
105 | // Use this when you plan to load Footer with DecodeFrom(). Never use this | |
106 | // when you plan to EncodeTo. | |
107 | Footer() : Footer(kInvalidTableMagicNumber, 0) {} | |
108 | ||
109 | // Use this constructor when you plan to write out the footer using | |
110 | // EncodeTo(). Never use this constructor with DecodeFrom(). | |
111 | Footer(uint64_t table_magic_number, uint32_t version); | |
112 | ||
113 | // The version of the footer in this file | |
114 | uint32_t version() const { return version_; } | |
115 | ||
116 | // The checksum type used in this file | |
117 | ChecksumType checksum() const { return checksum_; } | |
118 | void set_checksum(const ChecksumType c) { checksum_ = c; } | |
119 | ||
120 | // The block handle for the metaindex block of the table | |
121 | const BlockHandle& metaindex_handle() const { return metaindex_handle_; } | |
122 | void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } | |
123 | ||
124 | // The block handle for the index block of the table | |
125 | const BlockHandle& index_handle() const { return index_handle_; } | |
126 | ||
127 | void set_index_handle(const BlockHandle& h) { index_handle_ = h; } | |
128 | ||
129 | uint64_t table_magic_number() const { return table_magic_number_; } | |
130 | ||
131 | void EncodeTo(std::string* dst) const; | |
132 | ||
133 | // Set the current footer based on the input slice. | |
134 | // | |
135 | // REQUIRES: table_magic_number_ is not set (i.e., | |
136 | // HasInitializedTableMagicNumber() is true). The function will initialize the | |
137 | // magic number | |
138 | Status DecodeFrom(Slice* input); | |
139 | ||
140 | // Encoded length of a Footer. Note that the serialization of a Footer will | |
141 | // always occupy at least kMinEncodedLength bytes. If fields are changed | |
142 | // the version number should be incremented and kMaxEncodedLength should be | |
143 | // increased accordingly. | |
144 | enum { | |
145 | // Footer version 0 (legacy) will always occupy exactly this many bytes. | |
146 | // It consists of two block handles, padding, and a magic number. | |
147 | kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8, | |
148 | // Footer of versions 1 and higher will always occupy exactly this many | |
149 | // bytes. It consists of the checksum type, two block handles, padding, | |
150 | // a version number (bigger than 1), and a magic number | |
151 | kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8, | |
152 | kMinEncodedLength = kVersion0EncodedLength, | |
153 | kMaxEncodedLength = kNewVersionsEncodedLength, | |
154 | }; | |
155 | ||
156 | static const uint64_t kInvalidTableMagicNumber = 0; | |
157 | ||
158 | // convert this object to a human readable form | |
159 | std::string ToString() const; | |
160 | ||
161 | private: | |
162 | // REQUIRES: magic number wasn't initialized. | |
163 | void set_table_magic_number(uint64_t magic_number) { | |
164 | assert(!HasInitializedTableMagicNumber()); | |
165 | table_magic_number_ = magic_number; | |
166 | } | |
167 | ||
168 | // return true if @table_magic_number_ is set to a value different | |
169 | // from @kInvalidTableMagicNumber. | |
170 | bool HasInitializedTableMagicNumber() const { | |
171 | return (table_magic_number_ != kInvalidTableMagicNumber); | |
172 | } | |
173 | ||
174 | uint32_t version_; | |
175 | ChecksumType checksum_; | |
176 | BlockHandle metaindex_handle_; | |
177 | BlockHandle index_handle_; | |
178 | uint64_t table_magic_number_ = 0; | |
179 | }; | |
180 | ||
181 | // Read the footer from file | |
182 | // If enforce_table_magic_number != 0, ReadFooterFromFile() will return | |
183 | // corruption if table_magic number is not equal to enforce_table_magic_number | |
11fdf7f2 TL |
184 | Status ReadFooterFromFile(RandomAccessFileReader* file, |
185 | FilePrefetchBuffer* prefetch_buffer, | |
186 | uint64_t file_size, Footer* footer, | |
7c673cae FG |
187 | uint64_t enforce_table_magic_number = 0); |
188 | ||
189 | // 1-byte type + 32-bit crc | |
190 | static const size_t kBlockTrailerSize = 5; | |
191 | ||
494da23a TL |
192 | inline CompressionType get_block_compression_type(const char* block_data, |
193 | size_t block_size) { | |
194 | return static_cast<CompressionType>(block_data[block_size]); | |
195 | } | |
196 | ||
7c673cae | 197 | struct BlockContents { |
494da23a TL |
198 | Slice data; // Actual contents of data |
199 | CacheAllocationPtr allocation; | |
200 | ||
201 | #ifndef NDEBUG | |
202 | // Whether the block is a raw block, which contains compression type | |
203 | // byte. It is only used for assertion. | |
204 | bool is_raw_block = false; | |
205 | #endif // NDEBUG | |
7c673cae | 206 | |
494da23a | 207 | BlockContents() {} |
7c673cae | 208 | |
494da23a TL |
209 | BlockContents(const Slice& _data) : data(_data) {} |
210 | ||
211 | BlockContents(CacheAllocationPtr&& _data, size_t _size) | |
212 | : data(_data.get(), _size), allocation(std::move(_data)) {} | |
213 | ||
214 | BlockContents(std::unique_ptr<char[]>&& _data, size_t _size) | |
215 | : data(_data.get(), _size) { | |
216 | allocation.reset(_data.release()); | |
217 | } | |
7c673cae | 218 | |
494da23a TL |
219 | bool own_bytes() const { return allocation.get() != nullptr; } |
220 | ||
221 | // It's the caller's responsibility to make sure that this is | |
222 | // for raw block contents, which contains the compression | |
223 | // byte in the end. | |
224 | CompressionType get_compression_type() const { | |
225 | assert(is_raw_block); | |
226 | return get_block_compression_type(data.data(), data.size()); | |
227 | } | |
7c673cae | 228 | |
11fdf7f2 TL |
229 | // The additional memory space taken by the block data. |
230 | size_t usable_size() const { | |
231 | if (allocation.get() != nullptr) { | |
494da23a TL |
232 | auto allocator = allocation.get_deleter().allocator; |
233 | if (allocator) { | |
234 | return allocator->UsableSize(allocation.get(), data.size()); | |
235 | } | |
11fdf7f2 TL |
236 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE |
237 | return malloc_usable_size(allocation.get()); | |
238 | #else | |
239 | return data.size(); | |
240 | #endif // ROCKSDB_MALLOC_USABLE_SIZE | |
241 | } else { | |
242 | return 0; // no extra memory is occupied by the data | |
243 | } | |
244 | } | |
245 | ||
494da23a TL |
246 | size_t ApproximateMemoryUsage() const { |
247 | return usable_size() + sizeof(*this); | |
248 | } | |
249 | ||
11fdf7f2 TL |
250 | BlockContents(BlockContents&& other) ROCKSDB_NOEXCEPT { |
251 | *this = std::move(other); | |
252 | } | |
7c673cae FG |
253 | |
254 | BlockContents& operator=(BlockContents&& other) { | |
255 | data = std::move(other.data); | |
7c673cae | 256 | allocation = std::move(other.allocation); |
494da23a TL |
257 | #ifndef NDEBUG |
258 | is_raw_block = other.is_raw_block; | |
259 | #endif // NDEBUG | |
7c673cae FG |
260 | return *this; |
261 | } | |
262 | }; | |
263 | ||
264 | // Read the block identified by "handle" from "file". On failure | |
265 | // return non-OK. On success fill *result and return OK. | |
266 | extern Status ReadBlockContents( | |
11fdf7f2 TL |
267 | RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, |
268 | const Footer& footer, const ReadOptions& options, const BlockHandle& handle, | |
269 | BlockContents* contents, const ImmutableCFOptions& ioptions, | |
7c673cae FG |
270 | bool do_uncompress = true, const Slice& compression_dict = Slice(), |
271 | const PersistentCacheOptions& cache_options = PersistentCacheOptions()); | |
272 | ||
273 | // The 'data' points to the raw block contents read in from file. | |
274 | // This method allocates a new heap buffer and the raw block | |
275 | // contents are uncompresed into this buffer. This buffer is | |
276 | // returned via 'result' and it is upto the caller to | |
277 | // free this buffer. | |
278 | // For description of compress_format_version and possible values, see | |
279 | // util/compression.h | |
494da23a TL |
280 | extern Status UncompressBlockContents(const UncompressionInfo& info, |
281 | const char* data, size_t n, | |
282 | BlockContents* contents, | |
283 | uint32_t compress_format_version, | |
284 | const ImmutableCFOptions& ioptions, | |
285 | MemoryAllocator* allocator = nullptr); | |
7c673cae FG |
286 | |
287 | // This is an extension to UncompressBlockContents that accepts | |
288 | // a specific compression type. This is used by un-wrapped blocks | |
289 | // with no compression header. | |
290 | extern Status UncompressBlockContentsForCompressionType( | |
494da23a | 291 | const UncompressionInfo& info, const char* data, size_t n, |
11fdf7f2 | 292 | BlockContents* contents, uint32_t compress_format_version, |
494da23a | 293 | const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr); |
7c673cae FG |
294 | |
295 | // Implementation details follow. Clients should ignore, | |
296 | ||
297 | // TODO(andrewkr): we should prefer one way of representing a null/uninitialized | |
298 | // BlockHandle. Currently we use zeros for null and use negation-of-zeros for | |
299 | // uninitialized. | |
300 | inline BlockHandle::BlockHandle() | |
11fdf7f2 | 301 | : BlockHandle(~static_cast<uint64_t>(0), ~static_cast<uint64_t>(0)) {} |
7c673cae FG |
302 | |
303 | inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size) | |
304 | : offset_(_offset), size_(_size) {} | |
305 | ||
306 | } // namespace rocksdb |