]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/format.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / table / format.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #pragma once
11 #include <string>
12 #include <stdint.h>
13 #include "rocksdb/slice.h"
14 #include "rocksdb/status.h"
15 #include "rocksdb/options.h"
16 #include "rocksdb/table.h"
17
18 #include "options/cf_options.h"
19 #include "port/port.h" // noexcept
20 #include "table/persistent_cache_options.h"
21
22 namespace rocksdb {
23
24 class Block;
25 class RandomAccessFile;
26 struct ReadOptions;
27
28 extern bool ShouldReportDetailedTime(Env* env, Statistics* stats);
29
30 // the length of the magic number in bytes.
31 const int kMagicNumberLengthByte = 8;
32
33 // BlockHandle is a pointer to the extent of a file that stores a data
34 // block or a meta block.
35 class BlockHandle {
36 public:
37 BlockHandle();
38 BlockHandle(uint64_t offset, uint64_t size);
39
40 // The offset of the block in the file.
41 uint64_t offset() const { return offset_; }
42 void set_offset(uint64_t _offset) { offset_ = _offset; }
43
44 // The size of the stored block
45 uint64_t size() const { return size_; }
46 void set_size(uint64_t _size) { size_ = _size; }
47
48 void EncodeTo(std::string* dst) const;
49 Status DecodeFrom(Slice* input);
50
51 // Return a string that contains the copy of handle.
52 std::string ToString(bool hex = true) const;
53
54 // if the block handle's offset and size are both "0", we will view it
55 // as a null block handle that points to no where.
56 bool IsNull() const {
57 return offset_ == 0 && size_ == 0;
58 }
59
60 static const BlockHandle& NullBlockHandle() {
61 return kNullBlockHandle;
62 }
63
64 // Maximum encoding length of a BlockHandle
65 enum { kMaxEncodedLength = 10 + 10 };
66
67 private:
68 uint64_t offset_;
69 uint64_t size_;
70
71 static const BlockHandle kNullBlockHandle;
72 };
73
74 inline uint32_t GetCompressFormatForVersion(CompressionType compression_type,
75 uint32_t version) {
76 // snappy is not versioned
77 assert(compression_type != kSnappyCompression &&
78 compression_type != kXpressCompression &&
79 compression_type != kNoCompression);
80 // As of version 2, we encode compressed block with
81 // compress_format_version == 2. Before that, the version is 1.
82 // DO NOT CHANGE THIS FUNCTION, it affects disk format
83 return version >= 2 ? 2 : 1;
84 }
85
86 inline bool BlockBasedTableSupportedVersion(uint32_t version) {
87 return version <= 2;
88 }
89
90 // Footer encapsulates the fixed information stored at the tail
91 // end of every table file.
92 class Footer {
93 public:
94 // Constructs a footer without specifying its table magic number.
95 // In such case, the table magic number of such footer should be
96 // initialized via @ReadFooterFromFile().
97 // Use this when you plan to load Footer with DecodeFrom(). Never use this
98 // when you plan to EncodeTo.
99 Footer() : Footer(kInvalidTableMagicNumber, 0) {}
100
101 // Use this constructor when you plan to write out the footer using
102 // EncodeTo(). Never use this constructor with DecodeFrom().
103 Footer(uint64_t table_magic_number, uint32_t version);
104
105 // The version of the footer in this file
106 uint32_t version() const { return version_; }
107
108 // The checksum type used in this file
109 ChecksumType checksum() const { return checksum_; }
110 void set_checksum(const ChecksumType c) { checksum_ = c; }
111
112 // The block handle for the metaindex block of the table
113 const BlockHandle& metaindex_handle() const { return metaindex_handle_; }
114 void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; }
115
116 // The block handle for the index block of the table
117 const BlockHandle& index_handle() const { return index_handle_; }
118
119 void set_index_handle(const BlockHandle& h) { index_handle_ = h; }
120
121 uint64_t table_magic_number() const { return table_magic_number_; }
122
123 void EncodeTo(std::string* dst) const;
124
125 // Set the current footer based on the input slice.
126 //
127 // REQUIRES: table_magic_number_ is not set (i.e.,
128 // HasInitializedTableMagicNumber() is true). The function will initialize the
129 // magic number
130 Status DecodeFrom(Slice* input);
131
132 // Encoded length of a Footer. Note that the serialization of a Footer will
133 // always occupy at least kMinEncodedLength bytes. If fields are changed
134 // the version number should be incremented and kMaxEncodedLength should be
135 // increased accordingly.
136 enum {
137 // Footer version 0 (legacy) will always occupy exactly this many bytes.
138 // It consists of two block handles, padding, and a magic number.
139 kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8,
140 // Footer of versions 1 and higher will always occupy exactly this many
141 // bytes. It consists of the checksum type, two block handles, padding,
142 // a version number (bigger than 1), and a magic number
143 kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8,
144 kMinEncodedLength = kVersion0EncodedLength,
145 kMaxEncodedLength = kNewVersionsEncodedLength,
146 };
147
148 static const uint64_t kInvalidTableMagicNumber = 0;
149
150 // convert this object to a human readable form
151 std::string ToString() const;
152
153 private:
154 // REQUIRES: magic number wasn't initialized.
155 void set_table_magic_number(uint64_t magic_number) {
156 assert(!HasInitializedTableMagicNumber());
157 table_magic_number_ = magic_number;
158 }
159
160 // return true if @table_magic_number_ is set to a value different
161 // from @kInvalidTableMagicNumber.
162 bool HasInitializedTableMagicNumber() const {
163 return (table_magic_number_ != kInvalidTableMagicNumber);
164 }
165
166 uint32_t version_;
167 ChecksumType checksum_;
168 BlockHandle metaindex_handle_;
169 BlockHandle index_handle_;
170 uint64_t table_magic_number_ = 0;
171 };
172
173 // Read the footer from file
174 // If enforce_table_magic_number != 0, ReadFooterFromFile() will return
175 // corruption if table_magic number is not equal to enforce_table_magic_number
176 Status ReadFooterFromFile(RandomAccessFileReader* file, uint64_t file_size,
177 Footer* footer,
178 uint64_t enforce_table_magic_number = 0);
179
180 // 1-byte type + 32-bit crc
181 static const size_t kBlockTrailerSize = 5;
182
183 struct BlockContents {
184 Slice data; // Actual contents of data
185 bool cachable; // True iff data can be cached
186 CompressionType compression_type;
187 std::unique_ptr<char[]> allocation;
188
189 BlockContents() : cachable(false), compression_type(kNoCompression) {}
190
191 BlockContents(const Slice& _data, bool _cachable,
192 CompressionType _compression_type)
193 : data(_data), cachable(_cachable), compression_type(_compression_type) {}
194
195 BlockContents(std::unique_ptr<char[]>&& _data, size_t _size, bool _cachable,
196 CompressionType _compression_type)
197 : data(_data.get(), _size),
198 cachable(_cachable),
199 compression_type(_compression_type),
200 allocation(std::move(_data)) {}
201
202 BlockContents(BlockContents&& other) ROCKSDB_NOEXCEPT { *this = std::move(other); }
203
204 BlockContents& operator=(BlockContents&& other) {
205 data = std::move(other.data);
206 cachable = other.cachable;
207 compression_type = other.compression_type;
208 allocation = std::move(other.allocation);
209 return *this;
210 }
211 };
212
213 // Read the block identified by "handle" from "file". On failure
214 // return non-OK. On success fill *result and return OK.
215 extern Status ReadBlockContents(
216 RandomAccessFileReader* file, const Footer& footer,
217 const ReadOptions& options, const BlockHandle& handle,
218 BlockContents* contents, const ImmutableCFOptions &ioptions,
219 bool do_uncompress = true, const Slice& compression_dict = Slice(),
220 const PersistentCacheOptions& cache_options = PersistentCacheOptions());
221
222 // The 'data' points to the raw block contents read in from file.
223 // This method allocates a new heap buffer and the raw block
224 // contents are uncompresed into this buffer. This buffer is
225 // returned via 'result' and it is upto the caller to
226 // free this buffer.
227 // For description of compress_format_version and possible values, see
228 // util/compression.h
229 extern Status UncompressBlockContents(const char* data, size_t n,
230 BlockContents* contents,
231 uint32_t compress_format_version,
232 const Slice& compression_dict,
233 const ImmutableCFOptions &ioptions);
234
235 // This is an extension to UncompressBlockContents that accepts
236 // a specific compression type. This is used by un-wrapped blocks
237 // with no compression header.
238 extern Status UncompressBlockContentsForCompressionType(
239 const char* data, size_t n, BlockContents* contents,
240 uint32_t compress_format_version, const Slice& compression_dict,
241 CompressionType compression_type, const ImmutableCFOptions &ioptions);
242
243 // Implementation details follow. Clients should ignore,
244
245 // TODO(andrewkr): we should prefer one way of representing a null/uninitialized
246 // BlockHandle. Currently we use zeros for null and use negation-of-zeros for
247 // uninitialized.
248 inline BlockHandle::BlockHandle()
249 : BlockHandle(~static_cast<uint64_t>(0),
250 ~static_cast<uint64_t>(0)) {
251 }
252
253 inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size)
254 : offset_(_offset), size_(_size) {}
255
256 } // namespace rocksdb