]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #include "table/format.h" | |
11 | ||
f67539c2 | 12 | #include <cinttypes> |
494da23a | 13 | #include <string> |
7c673cae | 14 | |
f67539c2 TL |
15 | #include "block_fetcher.h" |
16 | #include "file/random_access_file_reader.h" | |
f67539c2 | 17 | #include "memory/memory_allocator.h" |
7c673cae FG |
18 | #include "monitoring/perf_context_imp.h" |
19 | #include "monitoring/statistics.h" | |
1e59de90 | 20 | #include "options/options_helper.h" |
7c673cae | 21 | #include "rocksdb/env.h" |
20effc67 | 22 | #include "rocksdb/options.h" |
1e59de90 | 23 | #include "rocksdb/table.h" |
f67539c2 TL |
24 | #include "table/block_based/block.h" |
25 | #include "table/block_based/block_based_table_reader.h" | |
7c673cae | 26 | #include "table/persistent_cache_helper.h" |
1e59de90 | 27 | #include "util/cast_util.h" |
7c673cae FG |
28 | #include "util/coding.h" |
29 | #include "util/compression.h" | |
30 | #include "util/crc32c.h" | |
1e59de90 | 31 | #include "util/hash.h" |
7c673cae FG |
32 | #include "util/stop_watch.h" |
33 | #include "util/string_util.h" | |
1e59de90 | 34 | #include "util/xxhash.h" |
7c673cae | 35 | |
f67539c2 | 36 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
37 | |
38 | extern const uint64_t kLegacyBlockBasedTableMagicNumber; | |
39 | extern const uint64_t kBlockBasedTableMagicNumber; | |
40 | ||
41 | #ifndef ROCKSDB_LITE | |
42 | extern const uint64_t kLegacyPlainTableMagicNumber; | |
43 | extern const uint64_t kPlainTableMagicNumber; | |
44 | #else | |
45 | // ROCKSDB_LITE doesn't have plain table | |
46 | const uint64_t kLegacyPlainTableMagicNumber = 0; | |
47 | const uint64_t kPlainTableMagicNumber = 0; | |
48 | #endif | |
20effc67 | 49 | const char* kHostnameForDbHostId = "__hostname__"; |
7c673cae FG |
50 | |
51 | bool ShouldReportDetailedTime(Env* env, Statistics* stats) { | |
52 | return env != nullptr && stats != nullptr && | |
494da23a | 53 | stats->get_stats_level() > kExceptDetailedTimers; |
7c673cae FG |
54 | } |
55 | ||
56 | void BlockHandle::EncodeTo(std::string* dst) const { | |
57 | // Sanity check that all fields have been set | |
1e59de90 TL |
58 | assert(offset_ != ~uint64_t{0}); |
59 | assert(size_ != ~uint64_t{0}); | |
7c673cae FG |
60 | PutVarint64Varint64(dst, offset_, size_); |
61 | } | |
62 | ||
1e59de90 TL |
63 | char* BlockHandle::EncodeTo(char* dst) const { |
64 | // Sanity check that all fields have been set | |
65 | assert(offset_ != ~uint64_t{0}); | |
66 | assert(size_ != ~uint64_t{0}); | |
67 | char* cur = EncodeVarint64(dst, offset_); | |
68 | cur = EncodeVarint64(cur, size_); | |
69 | return cur; | |
70 | } | |
71 | ||
7c673cae | 72 | Status BlockHandle::DecodeFrom(Slice* input) { |
494da23a | 73 | if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { |
7c673cae FG |
74 | return Status::OK(); |
75 | } else { | |
76 | // reset in case failure after partially decoding | |
77 | offset_ = 0; | |
78 | size_ = 0; | |
79 | return Status::Corruption("bad block handle"); | |
80 | } | |
81 | } | |
82 | ||
11fdf7f2 TL |
83 | Status BlockHandle::DecodeSizeFrom(uint64_t _offset, Slice* input) { |
84 | if (GetVarint64(input, &size_)) { | |
85 | offset_ = _offset; | |
86 | return Status::OK(); | |
87 | } else { | |
88 | // reset in case failure after partially decoding | |
89 | offset_ = 0; | |
90 | size_ = 0; | |
91 | return Status::Corruption("bad block handle"); | |
92 | } | |
93 | } | |
94 | ||
7c673cae FG |
95 | // Return a string that contains the copy of handle. |
96 | std::string BlockHandle::ToString(bool hex) const { | |
97 | std::string handle_str; | |
98 | EncodeTo(&handle_str); | |
99 | if (hex) { | |
100 | return Slice(handle_str).ToString(true); | |
101 | } else { | |
102 | return handle_str; | |
103 | } | |
104 | } | |
105 | ||
106 | const BlockHandle BlockHandle::kNullBlockHandle(0, 0); | |
107 | ||
f67539c2 TL |
108 | void IndexValue::EncodeTo(std::string* dst, bool have_first_key, |
109 | const BlockHandle* previous_handle) const { | |
110 | if (previous_handle) { | |
1e59de90 | 111 | // WART: this is specific to Block-based table |
f67539c2 | 112 | assert(handle.offset() == previous_handle->offset() + |
1e59de90 TL |
113 | previous_handle->size() + |
114 | BlockBasedTable::kBlockTrailerSize); | |
f67539c2 TL |
115 | PutVarsignedint64(dst, handle.size() - previous_handle->size()); |
116 | } else { | |
117 | handle.EncodeTo(dst); | |
118 | } | |
119 | assert(dst->size() != 0); | |
120 | ||
121 | if (have_first_key) { | |
122 | PutLengthPrefixedSlice(dst, first_internal_key); | |
123 | } | |
124 | } | |
125 | ||
126 | Status IndexValue::DecodeFrom(Slice* input, bool have_first_key, | |
127 | const BlockHandle* previous_handle) { | |
128 | if (previous_handle) { | |
129 | int64_t delta; | |
130 | if (!GetVarsignedint64(input, &delta)) { | |
131 | return Status::Corruption("bad delta-encoded index value"); | |
132 | } | |
1e59de90 TL |
133 | // WART: this is specific to Block-based table |
134 | handle = BlockHandle(previous_handle->offset() + previous_handle->size() + | |
135 | BlockBasedTable::kBlockTrailerSize, | |
136 | previous_handle->size() + delta); | |
f67539c2 TL |
137 | } else { |
138 | Status s = handle.DecodeFrom(input); | |
139 | if (!s.ok()) { | |
140 | return s; | |
141 | } | |
142 | } | |
143 | ||
144 | if (!have_first_key) { | |
145 | first_internal_key = Slice(); | |
146 | } else if (!GetLengthPrefixedSlice(input, &first_internal_key)) { | |
147 | return Status::Corruption("bad first key in block info"); | |
148 | } | |
149 | ||
150 | return Status::OK(); | |
151 | } | |
152 | ||
153 | std::string IndexValue::ToString(bool hex, bool have_first_key) const { | |
154 | std::string s; | |
155 | EncodeTo(&s, have_first_key, nullptr); | |
156 | if (hex) { | |
157 | return Slice(s).ToString(true); | |
158 | } else { | |
159 | return s; | |
160 | } | |
161 | } | |
162 | ||
7c673cae FG |
163 | namespace { |
164 | inline bool IsLegacyFooterFormat(uint64_t magic_number) { | |
165 | return magic_number == kLegacyBlockBasedTableMagicNumber || | |
166 | magic_number == kLegacyPlainTableMagicNumber; | |
167 | } | |
168 | inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) { | |
169 | if (magic_number == kLegacyBlockBasedTableMagicNumber) { | |
170 | return kBlockBasedTableMagicNumber; | |
171 | } | |
172 | if (magic_number == kLegacyPlainTableMagicNumber) { | |
173 | return kPlainTableMagicNumber; | |
174 | } | |
175 | assert(false); | |
1e59de90 TL |
176 | return magic_number; |
177 | } | |
178 | inline uint64_t DownconvertToLegacyFooterFormat(uint64_t magic_number) { | |
179 | if (magic_number == kBlockBasedTableMagicNumber) { | |
180 | return kLegacyBlockBasedTableMagicNumber; | |
181 | } | |
182 | if (magic_number == kPlainTableMagicNumber) { | |
183 | return kLegacyPlainTableMagicNumber; | |
184 | } | |
185 | assert(false); | |
186 | return magic_number; | |
187 | } | |
188 | inline uint8_t BlockTrailerSizeForMagicNumber(uint64_t magic_number) { | |
189 | if (magic_number == kBlockBasedTableMagicNumber || | |
190 | magic_number == kLegacyBlockBasedTableMagicNumber) { | |
191 | return static_cast<uint8_t>(BlockBasedTable::kBlockTrailerSize); | |
192 | } else { | |
193 | return 0; | |
194 | } | |
7c673cae | 195 | } |
1e59de90 TL |
196 | |
197 | // Footer format, in three parts: | |
198 | // * Part1 | |
199 | // -> format_version == 0 (inferred from legacy magic number) | |
200 | // <empty> (0 bytes) | |
201 | // -> format_version >= 1 | |
202 | // checksum type (char, 1 byte) | |
203 | // * Part2 | |
204 | // metaindex handle (varint64 offset, varint64 size) | |
205 | // index handle (varint64 offset, varint64 size) | |
206 | // <zero padding> for part2 size = 2 * BlockHandle::kMaxEncodedLength = 40 | |
207 | // * Part3 | |
208 | // -> format_version == 0 (inferred from legacy magic number) | |
209 | // legacy magic number (8 bytes) | |
210 | // -> format_version >= 1 (inferred from NOT legacy magic number) | |
211 | // format_version (uint32LE, 4 bytes), also called "footer version" | |
212 | // newer magic number (8 bytes) | |
213 | ||
214 | constexpr size_t kFooterPart2Size = 2 * BlockHandle::kMaxEncodedLength; | |
7c673cae FG |
215 | } // namespace |
216 | ||
1e59de90 TL |
217 | void FooterBuilder::Build(uint64_t magic_number, uint32_t format_version, |
218 | uint64_t footer_offset, ChecksumType checksum_type, | |
219 | const BlockHandle& metaindex_handle, | |
220 | const BlockHandle& index_handle) { | |
221 | (void)footer_offset; // Future use | |
222 | ||
223 | assert(magic_number != Footer::kNullTableMagicNumber); | |
224 | assert(IsSupportedFormatVersion(format_version)); | |
225 | ||
226 | char* part2; | |
227 | char* part3; | |
228 | if (format_version > 0) { | |
229 | slice_ = Slice(data_.data(), Footer::kNewVersionsEncodedLength); | |
230 | // Generate parts 1 and 3 | |
231 | char* cur = data_.data(); | |
232 | // Part 1 | |
233 | *(cur++) = checksum_type; | |
234 | // Part 2 | |
235 | part2 = cur; | |
236 | // Skip over part 2 for now | |
237 | cur += kFooterPart2Size; | |
238 | // Part 3 | |
239 | part3 = cur; | |
240 | EncodeFixed32(cur, format_version); | |
241 | cur += 4; | |
242 | EncodeFixed64(cur, magic_number); | |
243 | assert(cur + 8 == slice_.data() + slice_.size()); | |
7c673cae | 244 | } else { |
1e59de90 TL |
245 | slice_ = Slice(data_.data(), Footer::kVersion0EncodedLength); |
246 | // Legacy SST files use kCRC32c checksum but it's not stored in footer. | |
247 | assert(checksum_type == kNoChecksum || checksum_type == kCRC32c); | |
248 | // Generate part 3 (part 1 empty, skip part 2 for now) | |
249 | part2 = data_.data(); | |
250 | part3 = part2 + kFooterPart2Size; | |
251 | char* cur = part3; | |
252 | // Use legacy magic numbers to indicate format_version=0, for | |
253 | // compatibility. No other cases should use format_version=0. | |
254 | EncodeFixed64(cur, DownconvertToLegacyFooterFormat(magic_number)); | |
255 | assert(cur + 8 == slice_.data() + slice_.size()); | |
7c673cae | 256 | } |
7c673cae | 257 | |
1e59de90 TL |
258 | { |
259 | char* cur = part2; | |
260 | cur = metaindex_handle.EncodeTo(cur); | |
261 | cur = index_handle.EncodeTo(cur); | |
262 | // Zero pad remainder | |
263 | std::fill(cur, part3, char{0}); | |
264 | } | |
7c673cae FG |
265 | } |
266 | ||
1e59de90 TL |
267 | Status Footer::DecodeFrom(Slice input, uint64_t input_offset) { |
268 | (void)input_offset; // Future use | |
269 | ||
270 | // Only decode to unused Footer | |
271 | assert(table_magic_number_ == kNullTableMagicNumber); | |
7c673cae | 272 | assert(input != nullptr); |
1e59de90 | 273 | assert(input.size() >= kMinEncodedLength); |
7c673cae | 274 | |
1e59de90 TL |
275 | const char* magic_ptr = input.data() + input.size() - kMagicNumberLengthByte; |
276 | uint64_t magic = DecodeFixed64(magic_ptr); | |
7c673cae FG |
277 | |
278 | // We check for legacy formats here and silently upconvert them | |
279 | bool legacy = IsLegacyFooterFormat(magic); | |
280 | if (legacy) { | |
281 | magic = UpconvertLegacyFooterFormat(magic); | |
282 | } | |
1e59de90 TL |
283 | table_magic_number_ = magic; |
284 | block_trailer_size_ = BlockTrailerSizeForMagicNumber(magic); | |
7c673cae | 285 | |
1e59de90 | 286 | // Parse Part3 |
7c673cae FG |
287 | if (legacy) { |
288 | // The size is already asserted to be at least kMinEncodedLength | |
289 | // at the beginning of the function | |
1e59de90 TL |
290 | input.remove_prefix(input.size() - kVersion0EncodedLength); |
291 | format_version_ = 0 /* legacy */; | |
292 | checksum_type_ = kCRC32c; | |
7c673cae | 293 | } else { |
1e59de90 TL |
294 | const char* part3_ptr = magic_ptr - 4; |
295 | format_version_ = DecodeFixed32(part3_ptr); | |
296 | if (!IsSupportedFormatVersion(format_version_)) { | |
297 | return Status::Corruption("Corrupt or unsupported format_version: " + | |
298 | std::to_string(format_version_)); | |
7c673cae | 299 | } |
1e59de90 TL |
300 | // All known format versions >= 1 occupy exactly this many bytes. |
301 | if (input.size() < kNewVersionsEncodedLength) { | |
302 | return Status::Corruption("Input is too short to be an SST file"); | |
7c673cae | 303 | } |
1e59de90 TL |
304 | uint64_t adjustment = input.size() - kNewVersionsEncodedLength; |
305 | input.remove_prefix(adjustment); | |
306 | ||
307 | // Parse Part1 | |
308 | char chksum = input.data()[0]; | |
309 | checksum_type_ = lossless_cast<ChecksumType>(chksum); | |
310 | if (!IsSupportedChecksumType(checksum_type())) { | |
311 | return Status::Corruption("Corrupt or unsupported checksum type: " + | |
312 | std::to_string(lossless_cast<uint8_t>(chksum))); | |
313 | } | |
314 | // Consume checksum type field | |
315 | input.remove_prefix(1); | |
7c673cae FG |
316 | } |
317 | ||
1e59de90 TL |
318 | // Parse Part2 |
319 | Status result = metaindex_handle_.DecodeFrom(&input); | |
7c673cae | 320 | if (result.ok()) { |
1e59de90 | 321 | result = index_handle_.DecodeFrom(&input); |
7c673cae FG |
322 | } |
323 | return result; | |
1e59de90 | 324 | // Padding in part2 is ignored |
7c673cae FG |
325 | } |
326 | ||
327 | std::string Footer::ToString() const { | |
11fdf7f2 | 328 | std::string result; |
7c673cae FG |
329 | result.reserve(1024); |
330 | ||
331 | bool legacy = IsLegacyFooterFormat(table_magic_number_); | |
332 | if (legacy) { | |
333 | result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n "); | |
334 | result.append("index handle: " + index_handle_.ToString() + "\n "); | |
1e59de90 | 335 | result.append("table_magic_number: " + std::to_string(table_magic_number_) + |
f67539c2 | 336 | "\n "); |
1e59de90 | 337 | } else { |
7c673cae FG |
338 | result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n "); |
339 | result.append("index handle: " + index_handle_.ToString() + "\n "); | |
1e59de90 TL |
340 | result.append("table_magic_number: " + std::to_string(table_magic_number_) + |
341 | "\n "); | |
342 | result.append("format version: " + std::to_string(format_version_) + | |
f67539c2 | 343 | "\n "); |
7c673cae FG |
344 | } |
345 | return result; | |
346 | } | |
347 | ||
20effc67 | 348 | Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, |
11fdf7f2 TL |
349 | FilePrefetchBuffer* prefetch_buffer, |
350 | uint64_t file_size, Footer* footer, | |
351 | uint64_t enforce_table_magic_number) { | |
7c673cae | 352 | if (file_size < Footer::kMinEncodedLength) { |
1e59de90 TL |
353 | return Status::Corruption("file is too short (" + |
354 | std::to_string(file_size) + | |
494da23a TL |
355 | " bytes) to be an " |
356 | "sstable: " + | |
357 | file->file_name()); | |
7c673cae FG |
358 | } |
359 | ||
20effc67 TL |
360 | std::string footer_buf; |
361 | AlignedBuf internal_buf; | |
7c673cae | 362 | Slice footer_input; |
1e59de90 TL |
363 | uint64_t read_offset = (file_size > Footer::kMaxEncodedLength) |
364 | ? file_size - Footer::kMaxEncodedLength | |
365 | : 0; | |
11fdf7f2 | 366 | Status s; |
20effc67 TL |
367 | // TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now, |
368 | // there is no readahead for point lookups, so TryReadFromCache will fail if | |
369 | // the required data is not in the prefetch buffer. Once deadline is enabled | |
370 | // for iterator, TryReadFromCache might do a readahead. Revisit to see if we | |
371 | // need to pass a timeout at that point | |
1e59de90 | 372 | // TODO: rate limit footer reads. |
11fdf7f2 | 373 | if (prefetch_buffer == nullptr || |
20effc67 | 374 | !prefetch_buffer->TryReadFromCache( |
1e59de90 TL |
375 | opts, file, read_offset, Footer::kMaxEncodedLength, &footer_input, |
376 | nullptr, opts.rate_limiter_priority)) { | |
20effc67 TL |
377 | if (file->use_direct_io()) { |
378 | s = file->Read(opts, read_offset, Footer::kMaxEncodedLength, | |
1e59de90 TL |
379 | &footer_input, nullptr, &internal_buf, |
380 | opts.rate_limiter_priority); | |
20effc67 TL |
381 | } else { |
382 | footer_buf.reserve(Footer::kMaxEncodedLength); | |
383 | s = file->Read(opts, read_offset, Footer::kMaxEncodedLength, | |
1e59de90 TL |
384 | &footer_input, &footer_buf[0], nullptr, |
385 | opts.rate_limiter_priority); | |
20effc67 | 386 | } |
11fdf7f2 TL |
387 | if (!s.ok()) return s; |
388 | } | |
7c673cae FG |
389 | |
390 | // Check that we actually read the whole footer from the file. It may be | |
391 | // that size isn't correct. | |
392 | if (footer_input.size() < Footer::kMinEncodedLength) { | |
1e59de90 TL |
393 | // FIXME: this error message is bad. We should be checking whether the |
394 | // provided file_size matches what's on disk, at least in this case. | |
395 | // Unfortunately FileSystem/Env does not provide a way to get the size | |
396 | // of an open file, so getting file size requires a full path seek. | |
397 | return Status::Corruption("file is too short (" + | |
398 | std::to_string(file_size) + | |
494da23a TL |
399 | " bytes) to be an " |
400 | "sstable" + | |
401 | file->file_name()); | |
7c673cae FG |
402 | } |
403 | ||
1e59de90 | 404 | s = footer->DecodeFrom(footer_input, read_offset); |
7c673cae FG |
405 | if (!s.ok()) { |
406 | return s; | |
407 | } | |
408 | if (enforce_table_magic_number != 0 && | |
409 | enforce_table_magic_number != footer->table_magic_number()) { | |
1e59de90 TL |
410 | return Status::Corruption("Bad table magic number: expected " + |
411 | std::to_string(enforce_table_magic_number) + | |
412 | ", found " + | |
413 | std::to_string(footer->table_magic_number()) + | |
414 | " in " + file->file_name()); | |
7c673cae FG |
415 | } |
416 | return Status::OK(); | |
417 | } | |
418 | ||
1e59de90 TL |
419 | namespace { |
420 | // Custom handling for the last byte of a block, to avoid invoking streaming | |
421 | // API to get an effective block checksum. This function is its own inverse | |
422 | // because it uses xor. | |
423 | inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) { | |
424 | // This strategy bears some resemblance to extending a CRC checksum by one | |
425 | // more byte, except we don't need to re-mix the input checksum as long as | |
426 | // we do this step only once (per checksum). | |
427 | const uint32_t kRandomPrime = 0x6b9083d9; | |
428 | return checksum ^ lossless_cast<uint8_t>(last_byte) * kRandomPrime; | |
429 | } | |
430 | } // namespace | |
431 | ||
432 | uint32_t ComputeBuiltinChecksum(ChecksumType type, const char* data, | |
433 | size_t data_size) { | |
434 | switch (type) { | |
435 | case kCRC32c: | |
436 | return crc32c::Mask(crc32c::Value(data, data_size)); | |
437 | case kxxHash: | |
438 | return XXH32(data, data_size, /*seed*/ 0); | |
439 | case kxxHash64: | |
440 | return Lower32of64(XXH64(data, data_size, /*seed*/ 0)); | |
441 | case kXXH3: { | |
442 | if (data_size == 0) { | |
443 | // Special case because of special handling for last byte, not | |
444 | // present in this case. Can be any value different from other | |
445 | // small input size checksums. | |
446 | return 0; | |
447 | } else { | |
448 | // See corresponding code in ComputeBuiltinChecksumWithLastByte | |
449 | uint32_t v = Lower32of64(XXH3_64bits(data, data_size - 1)); | |
450 | return ModifyChecksumForLastByte(v, data[data_size - 1]); | |
451 | } | |
452 | } | |
453 | default: // including kNoChecksum | |
454 | return 0; | |
455 | } | |
456 | } | |
457 | ||
458 | uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data, | |
459 | size_t data_size, char last_byte) { | |
460 | switch (type) { | |
461 | case kCRC32c: { | |
462 | uint32_t crc = crc32c::Value(data, data_size); | |
463 | // Extend to cover last byte (compression type) | |
464 | crc = crc32c::Extend(crc, &last_byte, 1); | |
465 | return crc32c::Mask(crc); | |
466 | } | |
467 | case kxxHash: { | |
468 | XXH32_state_t* const state = XXH32_createState(); | |
469 | XXH32_reset(state, 0); | |
470 | XXH32_update(state, data, data_size); | |
471 | // Extend to cover last byte (compression type) | |
472 | XXH32_update(state, &last_byte, 1); | |
473 | uint32_t v = XXH32_digest(state); | |
474 | XXH32_freeState(state); | |
475 | return v; | |
476 | } | |
477 | case kxxHash64: { | |
478 | XXH64_state_t* const state = XXH64_createState(); | |
479 | XXH64_reset(state, 0); | |
480 | XXH64_update(state, data, data_size); | |
481 | // Extend to cover last byte (compression type) | |
482 | XXH64_update(state, &last_byte, 1); | |
483 | uint32_t v = Lower32of64(XXH64_digest(state)); | |
484 | XXH64_freeState(state); | |
485 | return v; | |
486 | } | |
487 | case kXXH3: { | |
488 | // XXH3 is a complicated hash function that is extremely fast on | |
489 | // contiguous input, but that makes its streaming support rather | |
490 | // complex. It is worth custom handling of the last byte (`type`) | |
491 | // in order to avoid allocating a large state object and bringing | |
492 | // that code complexity into CPU working set. | |
493 | uint32_t v = Lower32of64(XXH3_64bits(data, data_size)); | |
494 | return ModifyChecksumForLastByte(v, last_byte); | |
495 | } | |
496 | default: // including kNoChecksum | |
497 | return 0; | |
498 | } | |
499 | } | |
500 | ||
501 | Status UncompressBlockData(const UncompressionInfo& uncompression_info, | |
502 | const char* data, size_t size, | |
503 | BlockContents* out_contents, uint32_t format_version, | |
504 | const ImmutableOptions& ioptions, | |
505 | MemoryAllocator* allocator) { | |
20effc67 | 506 | Status ret = Status::OK(); |
7c673cae | 507 | |
494da23a | 508 | assert(uncompression_info.type() != kNoCompression && |
11fdf7f2 | 509 | "Invalid compression type"); |
7c673cae | 510 | |
1e59de90 TL |
511 | StopWatchNano timer(ioptions.clock, |
512 | ShouldReportDetailedTime(ioptions.env, ioptions.stats)); | |
20effc67 TL |
513 | size_t uncompressed_size = 0; |
514 | CacheAllocationPtr ubuf = | |
1e59de90 | 515 | UncompressData(uncompression_info, data, size, &uncompressed_size, |
20effc67 TL |
516 | GetCompressFormatForVersion(format_version), allocator); |
517 | if (!ubuf) { | |
1e59de90 TL |
518 | if (!CompressionTypeSupported(uncompression_info.type())) { |
519 | return Status::NotSupported( | |
520 | "Unsupported compression method for this build", | |
521 | CompressionTypeToString(uncompression_info.type())); | |
522 | } else { | |
523 | return Status::Corruption( | |
524 | "Corrupted compressed block contents", | |
525 | CompressionTypeToString(uncompression_info.type())); | |
526 | } | |
7c673cae FG |
527 | } |
528 | ||
1e59de90 | 529 | *out_contents = BlockContents(std::move(ubuf), uncompressed_size); |
20effc67 | 530 | |
1e59de90 TL |
531 | if (ShouldReportDetailedTime(ioptions.env, ioptions.stats)) { |
532 | RecordTimeToHistogram(ioptions.stats, DECOMPRESSION_TIMES_NANOS, | |
494da23a | 533 | timer.ElapsedNanos()); |
7c673cae | 534 | } |
1e59de90 TL |
535 | RecordTimeToHistogram(ioptions.stats, BYTES_DECOMPRESSED, |
536 | out_contents->data.size()); | |
537 | RecordTick(ioptions.stats, NUMBER_BLOCK_DECOMPRESSED); | |
7c673cae | 538 | |
1e59de90 TL |
539 | TEST_SYNC_POINT_CALLBACK("UncompressBlockData:TamperWithReturnValue", |
540 | static_cast<void*>(&ret)); | |
20effc67 | 541 | TEST_SYNC_POINT_CALLBACK( |
1e59de90 | 542 | "UncompressBlockData:" |
20effc67 | 543 | "TamperWithDecompressionOutput", |
1e59de90 | 544 | static_cast<void*>(out_contents)); |
20effc67 TL |
545 | |
546 | return ret; | |
7c673cae FG |
547 | } |
548 | ||
1e59de90 TL |
549 | Status UncompressSerializedBlock(const UncompressionInfo& uncompression_info, |
550 | const char* data, size_t size, | |
551 | BlockContents* out_contents, | |
552 | uint32_t format_version, | |
553 | const ImmutableOptions& ioptions, | |
554 | MemoryAllocator* allocator) { | |
555 | assert(data[size] != kNoCompression); | |
556 | assert(data[size] == static_cast<char>(uncompression_info.type())); | |
557 | return UncompressBlockData(uncompression_info, data, size, out_contents, | |
558 | format_version, ioptions, allocator); | |
7c673cae FG |
559 | } |
560 | ||
20effc67 TL |
561 | // Replace the contents of db_host_id with the actual hostname, if db_host_id |
562 | // matches the keyword kHostnameForDbHostId | |
563 | Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id) { | |
564 | assert(db_host_id); | |
565 | if (*db_host_id == kHostnameForDbHostId) { | |
566 | Status s = env->GetHostNameString(db_host_id); | |
567 | if (!s.ok()) { | |
568 | db_host_id->clear(); | |
569 | } | |
570 | return s; | |
571 | } | |
572 | ||
573 | return Status::OK(); | |
574 | } | |
f67539c2 | 575 | } // namespace ROCKSDB_NAMESPACE |