]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #pragma once | |
11 | #include <stddef.h> | |
12 | #include <stdint.h> | |
13 | #include <string> | |
14 | #include <vector> | |
15 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE | |
16 | #ifdef OS_FREEBSD | |
17 | #include <malloc_np.h> | |
18 | #else | |
19 | #include <malloc.h> | |
20 | #endif | |
21 | #endif | |
22 | ||
23 | #include "db/dbformat.h" | |
24 | #include "db/pinned_iterators_manager.h" | |
11fdf7f2 | 25 | #include "format.h" |
7c673cae FG |
26 | #include "rocksdb/iterator.h" |
27 | #include "rocksdb/options.h" | |
28 | #include "rocksdb/statistics.h" | |
11fdf7f2 | 29 | #include "rocksdb/table.h" |
7c673cae | 30 | #include "table/block_prefix_index.h" |
11fdf7f2 | 31 | #include "table/data_block_hash_index.h" |
7c673cae | 32 | #include "table/internal_iterator.h" |
11fdf7f2 TL |
33 | #include "util/random.h" |
34 | #include "util/sync_point.h" | |
7c673cae FG |
35 | |
36 | namespace rocksdb { | |
37 | ||
38 | struct BlockContents; | |
39 | class Comparator; | |
11fdf7f2 | 40 | template <class TValue> |
7c673cae | 41 | class BlockIter; |
11fdf7f2 TL |
42 | class DataBlockIter; |
43 | class IndexBlockIter; | |
7c673cae FG |
44 | class BlockPrefixIndex; |
45 | ||
46 | // BlockReadAmpBitmap is a bitmap that map the rocksdb::Block data bytes to | |
47 | // a bitmap with ratio bytes_per_bit. Whenever we access a range of bytes in | |
48 | // the Block we update the bitmap and increment READ_AMP_ESTIMATE_USEFUL_BYTES. | |
49 | class BlockReadAmpBitmap { | |
50 | public: | |
51 | explicit BlockReadAmpBitmap(size_t block_size, size_t bytes_per_bit, | |
52 | Statistics* statistics) | |
11fdf7f2 TL |
53 | : bitmap_(nullptr), |
54 | bytes_per_bit_pow_(0), | |
55 | statistics_(statistics), | |
494da23a TL |
56 | rnd_(Random::GetTLSInstance()->Uniform( |
57 | static_cast<int>(bytes_per_bit))) { | |
11fdf7f2 | 58 | TEST_SYNC_POINT_CALLBACK("BlockReadAmpBitmap:rnd", &rnd_); |
7c673cae FG |
59 | assert(block_size > 0 && bytes_per_bit > 0); |
60 | ||
61 | // convert bytes_per_bit to be a power of 2 | |
62 | while (bytes_per_bit >>= 1) { | |
63 | bytes_per_bit_pow_++; | |
64 | } | |
65 | ||
66 | // num_bits_needed = ceil(block_size / bytes_per_bit) | |
494da23a | 67 | size_t num_bits_needed = ((block_size - 1) >> bytes_per_bit_pow_) + 1; |
11fdf7f2 | 68 | assert(num_bits_needed > 0); |
7c673cae FG |
69 | |
70 | // bitmap_size = ceil(num_bits_needed / kBitsPerEntry) | |
11fdf7f2 | 71 | size_t bitmap_size = (num_bits_needed - 1) / kBitsPerEntry + 1; |
7c673cae FG |
72 | |
73 | // Create bitmap and set all the bits to 0 | |
11fdf7f2 | 74 | bitmap_ = new std::atomic<uint32_t>[bitmap_size](); |
7c673cae | 75 | |
11fdf7f2 | 76 | RecordTick(GetStatistics(), READ_AMP_TOTAL_READ_BYTES, block_size); |
7c673cae FG |
77 | } |
78 | ||
79 | ~BlockReadAmpBitmap() { delete[] bitmap_; } | |
80 | ||
81 | void Mark(uint32_t start_offset, uint32_t end_offset) { | |
82 | assert(end_offset >= start_offset); | |
11fdf7f2 TL |
83 | // Index of first bit in mask |
84 | uint32_t start_bit = | |
85 | (start_offset + (1 << bytes_per_bit_pow_) - rnd_ - 1) >> | |
86 | bytes_per_bit_pow_; | |
87 | // Index of last bit in mask + 1 | |
88 | uint32_t exclusive_end_bit = | |
89 | (end_offset + (1 << bytes_per_bit_pow_) - rnd_) >> bytes_per_bit_pow_; | |
90 | if (start_bit >= exclusive_end_bit) { | |
91 | return; | |
7c673cae | 92 | } |
11fdf7f2 | 93 | assert(exclusive_end_bit > 0); |
7c673cae FG |
94 | |
95 | if (GetAndSet(start_bit) == 0) { | |
11fdf7f2 TL |
96 | uint32_t new_useful_bytes = (exclusive_end_bit - start_bit) |
97 | << bytes_per_bit_pow_; | |
7c673cae FG |
98 | RecordTick(GetStatistics(), READ_AMP_ESTIMATE_USEFUL_BYTES, |
99 | new_useful_bytes); | |
100 | } | |
101 | } | |
102 | ||
103 | Statistics* GetStatistics() { | |
104 | return statistics_.load(std::memory_order_relaxed); | |
105 | } | |
106 | ||
107 | void SetStatistics(Statistics* stats) { statistics_.store(stats); } | |
108 | ||
109 | uint32_t GetBytesPerBit() { return 1 << bytes_per_bit_pow_; } | |
110 | ||
11fdf7f2 TL |
111 | size_t ApproximateMemoryUsage() const { |
112 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE | |
113 | return malloc_usable_size((void*)this); | |
114 | #endif // ROCKSDB_MALLOC_USABLE_SIZE | |
115 | return sizeof(*this); | |
116 | } | |
117 | ||
7c673cae FG |
118 | private: |
119 | // Get the current value of bit at `bit_idx` and set it to 1 | |
120 | inline bool GetAndSet(uint32_t bit_idx) { | |
121 | const uint32_t byte_idx = bit_idx / kBitsPerEntry; | |
122 | const uint32_t bit_mask = 1 << (bit_idx % kBitsPerEntry); | |
123 | ||
124 | return bitmap_[byte_idx].fetch_or(bit_mask, std::memory_order_relaxed) & | |
125 | bit_mask; | |
126 | } | |
127 | ||
128 | const uint32_t kBytesPersEntry = sizeof(uint32_t); // 4 bytes | |
129 | const uint32_t kBitsPerEntry = kBytesPersEntry * 8; // 32 bits | |
130 | ||
131 | // Bitmap used to record the bytes that we read, use atomic to protect | |
132 | // against multiple threads updating the same bit | |
133 | std::atomic<uint32_t>* bitmap_; | |
134 | // (1 << bytes_per_bit_pow_) is bytes_per_bit. Use power of 2 to optimize | |
135 | // muliplication and division | |
136 | uint8_t bytes_per_bit_pow_; | |
137 | // Pointer to DB Statistics object, Since this bitmap may outlive the DB | |
138 | // this pointer maybe invalid, but the DB will update it to a valid pointer | |
139 | // by using SetStatistics() before calling Mark() | |
140 | std::atomic<Statistics*> statistics_; | |
11fdf7f2 | 141 | uint32_t rnd_; |
7c673cae FG |
142 | }; |
143 | ||
144 | class Block { | |
145 | public: | |
146 | // Initialize the block with the specified contents. | |
147 | explicit Block(BlockContents&& contents, SequenceNumber _global_seqno, | |
148 | size_t read_amp_bytes_per_bit = 0, | |
149 | Statistics* statistics = nullptr); | |
150 | ||
11fdf7f2 | 151 | ~Block(); |
7c673cae FG |
152 | |
153 | size_t size() const { return size_; } | |
154 | const char* data() const { return data_; } | |
11fdf7f2 TL |
155 | // The additional memory space taken by the block data. |
156 | size_t usable_size() const { return contents_.usable_size(); } | |
7c673cae | 157 | uint32_t NumRestarts() const; |
494da23a TL |
158 | bool own_bytes() const { return contents_.own_bytes(); } |
159 | ||
11fdf7f2 | 160 | BlockBasedTableOptions::DataBlockIndexType IndexType() const; |
7c673cae | 161 | |
11fdf7f2 TL |
162 | // If comparator is InternalKeyComparator, user_comparator is its user |
163 | // comparator; they are equal otherwise. | |
7c673cae FG |
164 | // |
165 | // If iter is null, return new Iterator | |
166 | // If iter is not null, update this one and return it as Iterator* | |
167 | // | |
11fdf7f2 TL |
168 | // key_includes_seq, default true, means that the keys are in internal key |
169 | // format. | |
494da23a | 170 | // value_is_full, default true, means that no delta encoding is |
11fdf7f2 TL |
171 | // applied to values. |
172 | // | |
173 | // NewIterator<DataBlockIter> | |
174 | // Same as above but also updates read_amp_bitmap_ if it is not nullptr. | |
175 | // | |
176 | // NewIterator<IndexBlockIter> | |
177 | // If `prefix_index` is not nullptr this block will do hash lookup for the key | |
178 | // prefix. If total_order_seek is true, prefix_index_ is ignored. | |
179 | // | |
494da23a TL |
180 | // If `block_contents_pinned` is true, the caller will guarantee that when |
181 | // the cleanup functions are transferred from the iterator to other | |
182 | // classes, e.g. PinnableSlice, the pointer to the bytes will still be | |
183 | // valid. Either the iterator holds cache handle or ownership of some resource | |
184 | // and release them in a release function, or caller is sure that the data | |
185 | // will not go away (for example, it's from mmapped file which will not be | |
186 | // closed). | |
187 | // | |
11fdf7f2 TL |
188 | // NOTE: for the hash based lookup, if a key prefix doesn't match any key, |
189 | // the iterator will simply be set as "invalid", rather than returning | |
190 | // the key that is just pass the target key. | |
191 | template <typename TBlockIter> | |
192 | TBlockIter* NewIterator( | |
193 | const Comparator* comparator, const Comparator* user_comparator, | |
194 | TBlockIter* iter = nullptr, Statistics* stats = nullptr, | |
195 | bool total_order_seek = true, bool key_includes_seq = true, | |
494da23a TL |
196 | bool value_is_full = true, bool block_contents_pinned = false, |
197 | BlockPrefixIndex* prefix_index = nullptr); | |
7c673cae FG |
198 | |
199 | // Report an approximation of how much memory has been used. | |
200 | size_t ApproximateMemoryUsage() const; | |
201 | ||
202 | SequenceNumber global_seqno() const { return global_seqno_; } | |
203 | ||
204 | private: | |
205 | BlockContents contents_; | |
494da23a TL |
206 | const char* data_; // contents_.data.data() |
207 | size_t size_; // contents_.data.size() | |
208 | uint32_t restart_offset_; // Offset in data_ of restart array | |
11fdf7f2 | 209 | uint32_t num_restarts_; |
7c673cae FG |
210 | std::unique_ptr<BlockReadAmpBitmap> read_amp_bitmap_; |
211 | // All keys in the block will have seqno = global_seqno_, regardless of | |
212 | // the encoded value (kDisableGlobalSequenceNumber means disabled) | |
213 | const SequenceNumber global_seqno_; | |
214 | ||
11fdf7f2 TL |
215 | DataBlockHashIndex data_block_hash_index_; |
216 | ||
7c673cae | 217 | // No copying allowed |
11fdf7f2 TL |
218 | Block(const Block&) = delete; |
219 | void operator=(const Block&) = delete; | |
7c673cae FG |
220 | }; |
221 | ||
11fdf7f2 TL |
222 | template <class TValue> |
223 | class BlockIter : public InternalIteratorBase<TValue> { | |
7c673cae | 224 | public: |
11fdf7f2 TL |
225 | void InitializeBase(const Comparator* comparator, const char* data, |
226 | uint32_t restarts, uint32_t num_restarts, | |
227 | SequenceNumber global_seqno, bool block_contents_pinned) { | |
494da23a TL |
228 | assert(data_ == nullptr); // Ensure it is called only once |
229 | assert(num_restarts > 0); // Ensure the param is valid | |
7c673cae FG |
230 | |
231 | comparator_ = comparator; | |
232 | data_ = data; | |
233 | restarts_ = restarts; | |
234 | num_restarts_ = num_restarts; | |
235 | current_ = restarts_; | |
236 | restart_index_ = num_restarts_; | |
7c673cae | 237 | global_seqno_ = global_seqno; |
11fdf7f2 | 238 | block_contents_pinned_ = block_contents_pinned; |
7c673cae FG |
239 | } |
240 | ||
11fdf7f2 TL |
241 | // Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do |
242 | // nothing. Calls cleanup functions. | |
243 | void InvalidateBase(Status s) { | |
244 | // Assert that the BlockIter is never deleted while Pinning is Enabled. | |
245 | assert(!pinned_iters_mgr_ || | |
246 | (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); | |
247 | ||
248 | data_ = nullptr; | |
249 | current_ = restarts_; | |
7c673cae | 250 | status_ = s; |
11fdf7f2 TL |
251 | |
252 | // Call cleanup callbacks. | |
253 | Cleanable::Reset(); | |
7c673cae FG |
254 | } |
255 | ||
256 | virtual bool Valid() const override { return current_ < restarts_; } | |
257 | virtual Status status() const override { return status_; } | |
258 | virtual Slice key() const override { | |
259 | assert(Valid()); | |
11fdf7f2 | 260 | return key_.GetKey(); |
7c673cae | 261 | } |
7c673cae FG |
262 | |
263 | #ifndef NDEBUG | |
11fdf7f2 | 264 | virtual ~BlockIter() { |
7c673cae FG |
265 | // Assert that the BlockIter is never deleted while Pinning is Enabled. |
266 | assert(!pinned_iters_mgr_ || | |
267 | (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); | |
268 | } | |
269 | virtual void SetPinnedItersMgr( | |
270 | PinnedIteratorsManager* pinned_iters_mgr) override { | |
271 | pinned_iters_mgr_ = pinned_iters_mgr; | |
272 | } | |
273 | PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; | |
274 | #endif | |
275 | ||
11fdf7f2 TL |
276 | virtual bool IsKeyPinned() const override { |
277 | return block_contents_pinned_ && key_pinned_; | |
278 | } | |
7c673cae | 279 | |
11fdf7f2 | 280 | virtual bool IsValuePinned() const override { return block_contents_pinned_; } |
7c673cae FG |
281 | |
282 | size_t TEST_CurrentEntrySize() { return NextEntryOffset() - current_; } | |
283 | ||
284 | uint32_t ValueOffset() const { | |
285 | return static_cast<uint32_t>(value_.data() - data_); | |
286 | } | |
287 | ||
11fdf7f2 TL |
288 | protected: |
289 | // Note: The type could be changed to InternalKeyComparator but we see a weird | |
290 | // performance drop by that. | |
7c673cae FG |
291 | const Comparator* comparator_; |
292 | const char* data_; // underlying block contents | |
7c673cae FG |
293 | uint32_t num_restarts_; // Number of uint32_t entries in restart array |
294 | ||
11fdf7f2 TL |
295 | // Index of restart block in which current_ or current_-1 falls |
296 | uint32_t restart_index_; | |
494da23a | 297 | uint32_t restarts_; // Offset of restart array (list of fixed32) |
7c673cae FG |
298 | // current_ is offset in data_ of current entry. >= restarts_ if !Valid |
299 | uint32_t current_; | |
7c673cae FG |
300 | IterKey key_; |
301 | Slice value_; | |
302 | Status status_; | |
7c673cae | 303 | bool key_pinned_; |
494da23a TL |
304 | // Whether the block data is guaranteed to outlive this iterator, and |
305 | // as long as the cleanup functions are transferred to another class, | |
306 | // e.g. PinnableSlice, the pointer to the bytes will still be valid. | |
11fdf7f2 | 307 | bool block_contents_pinned_; |
7c673cae FG |
308 | SequenceNumber global_seqno_; |
309 | ||
11fdf7f2 TL |
310 | public: |
311 | // Return the offset in data_ just past the end of the current entry. | |
312 | inline uint32_t NextEntryOffset() const { | |
313 | // NOTE: We don't support blocks bigger than 2GB | |
314 | return static_cast<uint32_t>((value_.data() + value_.size()) - data_); | |
315 | } | |
316 | ||
317 | uint32_t GetRestartPoint(uint32_t index) { | |
318 | assert(index < num_restarts_); | |
319 | return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); | |
320 | } | |
321 | ||
322 | void SeekToRestartPoint(uint32_t index) { | |
323 | key_.Clear(); | |
324 | restart_index_ = index; | |
325 | // current_ will be fixed by ParseNextKey(); | |
326 | ||
327 | // ParseNextKey() starts at the end of value_, so set value_ accordingly | |
328 | uint32_t offset = GetRestartPoint(index); | |
329 | value_ = Slice(data_ + offset, 0); | |
330 | } | |
331 | ||
332 | void CorruptionError(); | |
333 | ||
334 | template <typename DecodeKeyFunc> | |
335 | inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, | |
336 | uint32_t* index, const Comparator* comp); | |
337 | }; | |
338 | ||
339 | class DataBlockIter final : public BlockIter<Slice> { | |
340 | public: | |
341 | DataBlockIter() | |
342 | : BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {} | |
343 | DataBlockIter(const Comparator* comparator, const Comparator* user_comparator, | |
344 | const char* data, uint32_t restarts, uint32_t num_restarts, | |
345 | SequenceNumber global_seqno, | |
346 | BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, | |
347 | DataBlockHashIndex* data_block_hash_index) | |
348 | : DataBlockIter() { | |
349 | Initialize(comparator, user_comparator, data, restarts, num_restarts, | |
350 | global_seqno, read_amp_bitmap, block_contents_pinned, | |
351 | data_block_hash_index); | |
352 | } | |
353 | void Initialize(const Comparator* comparator, | |
354 | const Comparator* user_comparator, const char* data, | |
355 | uint32_t restarts, uint32_t num_restarts, | |
356 | SequenceNumber global_seqno, | |
357 | BlockReadAmpBitmap* read_amp_bitmap, | |
358 | bool block_contents_pinned, | |
359 | DataBlockHashIndex* data_block_hash_index) { | |
360 | InitializeBase(comparator, data, restarts, num_restarts, global_seqno, | |
361 | block_contents_pinned); | |
362 | user_comparator_ = user_comparator; | |
363 | key_.SetIsUserKey(false); | |
364 | read_amp_bitmap_ = read_amp_bitmap; | |
365 | last_bitmap_offset_ = current_ + 1; | |
366 | data_block_hash_index_ = data_block_hash_index; | |
367 | } | |
368 | ||
369 | virtual Slice value() const override { | |
370 | assert(Valid()); | |
371 | if (read_amp_bitmap_ && current_ < restarts_ && | |
372 | current_ != last_bitmap_offset_) { | |
373 | read_amp_bitmap_->Mark(current_ /* current entry offset */, | |
374 | NextEntryOffset() - 1); | |
375 | last_bitmap_offset_ = current_; | |
376 | } | |
377 | return value_; | |
378 | } | |
379 | ||
380 | virtual void Seek(const Slice& target) override; | |
381 | ||
382 | inline bool SeekForGet(const Slice& target) { | |
383 | if (!data_block_hash_index_) { | |
384 | Seek(target); | |
385 | return true; | |
386 | } | |
387 | ||
388 | return SeekForGetImpl(target); | |
389 | } | |
390 | ||
391 | virtual void SeekForPrev(const Slice& target) override; | |
392 | ||
393 | virtual void Prev() override; | |
394 | ||
395 | virtual void Next() override; | |
396 | ||
494da23a TL |
397 | // Try to advance to the next entry in the block. If there is data corruption |
398 | // or error, report it to the caller instead of aborting the process. May | |
399 | // incur higher CPU overhead because we need to perform check on every entry. | |
400 | void NextOrReport(); | |
401 | ||
11fdf7f2 TL |
402 | virtual void SeekToFirst() override; |
403 | ||
494da23a TL |
404 | // Try to seek to the first entry in the block. If there is data corruption |
405 | // or error, report it to caller instead of aborting the process. May incur | |
406 | // higher CPU overhead because we need to perform check on every entry. | |
407 | void SeekToFirstOrReport(); | |
408 | ||
11fdf7f2 TL |
409 | virtual void SeekToLast() override; |
410 | ||
411 | void Invalidate(Status s) { | |
412 | InvalidateBase(s); | |
413 | // Clear prev entries cache. | |
414 | prev_entries_keys_buff_.clear(); | |
415 | prev_entries_.clear(); | |
416 | prev_entries_idx_ = -1; | |
417 | } | |
418 | ||
419 | private: | |
7c673cae FG |
420 | // read-amp bitmap |
421 | BlockReadAmpBitmap* read_amp_bitmap_; | |
422 | // last `current_` value we report to read-amp bitmp | |
423 | mutable uint32_t last_bitmap_offset_; | |
7c673cae FG |
424 | struct CachedPrevEntry { |
425 | explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, | |
426 | size_t _key_offset, size_t _key_size, Slice _value) | |
427 | : offset(_offset), | |
428 | key_ptr(_key_ptr), | |
429 | key_offset(_key_offset), | |
430 | key_size(_key_size), | |
431 | value(_value) {} | |
432 | ||
433 | // offset of entry in block | |
434 | uint32_t offset; | |
435 | // Pointer to key data in block (nullptr if key is delta-encoded) | |
436 | const char* key_ptr; | |
437 | // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr) | |
438 | size_t key_offset; | |
439 | // size of key | |
440 | size_t key_size; | |
441 | // value slice pointing to data in block | |
442 | Slice value; | |
443 | }; | |
444 | std::string prev_entries_keys_buff_; | |
445 | std::vector<CachedPrevEntry> prev_entries_; | |
446 | int32_t prev_entries_idx_ = -1; | |
447 | ||
11fdf7f2 TL |
448 | DataBlockHashIndex* data_block_hash_index_; |
449 | const Comparator* user_comparator_; | |
450 | ||
494da23a | 451 | template <typename DecodeEntryFunc> |
11fdf7f2 TL |
452 | inline bool ParseNextDataKey(const char* limit = nullptr); |
453 | ||
454 | inline int Compare(const IterKey& ikey, const Slice& b) const { | |
455 | return comparator_->Compare(ikey.GetInternalKey(), b); | |
7c673cae FG |
456 | } |
457 | ||
11fdf7f2 TL |
458 | bool SeekForGetImpl(const Slice& target); |
459 | }; | |
460 | ||
461 | class IndexBlockIter final : public BlockIter<BlockHandle> { | |
462 | public: | |
463 | IndexBlockIter() : BlockIter(), prefix_index_(nullptr) {} | |
464 | ||
465 | virtual Slice key() const override { | |
466 | assert(Valid()); | |
467 | return key_.GetKey(); | |
468 | } | |
469 | // key_includes_seq, default true, means that the keys are in internal key | |
470 | // format. | |
494da23a | 471 | // value_is_full, default true, means that no delta encoding is |
11fdf7f2 TL |
472 | // applied to values. |
473 | IndexBlockIter(const Comparator* comparator, | |
474 | const Comparator* user_comparator, const char* data, | |
475 | uint32_t restarts, uint32_t num_restarts, | |
476 | BlockPrefixIndex* prefix_index, bool key_includes_seq, | |
477 | bool value_is_full, bool block_contents_pinned) | |
478 | : IndexBlockIter() { | |
479 | Initialize(comparator, user_comparator, data, restarts, num_restarts, | |
480 | prefix_index, key_includes_seq, block_contents_pinned, | |
481 | value_is_full, nullptr /* data_block_hash_index */); | |
7c673cae FG |
482 | } |
483 | ||
11fdf7f2 TL |
484 | void Initialize(const Comparator* comparator, |
485 | const Comparator* user_comparator, const char* data, | |
486 | uint32_t restarts, uint32_t num_restarts, | |
487 | BlockPrefixIndex* prefix_index, bool key_includes_seq, | |
488 | bool value_is_full, bool block_contents_pinned, | |
489 | DataBlockHashIndex* /*data_block_hash_index*/) { | |
490 | InitializeBase(key_includes_seq ? comparator : user_comparator, data, | |
491 | restarts, num_restarts, kDisableGlobalSequenceNumber, | |
492 | block_contents_pinned); | |
493 | key_includes_seq_ = key_includes_seq; | |
494 | key_.SetIsUserKey(!key_includes_seq_); | |
495 | prefix_index_ = prefix_index; | |
496 | value_delta_encoded_ = !value_is_full; | |
7c673cae FG |
497 | } |
498 | ||
11fdf7f2 TL |
499 | virtual BlockHandle value() const override { |
500 | assert(Valid()); | |
501 | if (value_delta_encoded_) { | |
502 | return decoded_value_; | |
503 | } else { | |
504 | BlockHandle handle; | |
505 | Slice v = value_; | |
506 | Status decode_s __attribute__((__unused__)) = handle.DecodeFrom(&v); | |
507 | assert(decode_s.ok()); | |
508 | return handle; | |
509 | } | |
510 | } | |
7c673cae | 511 | |
11fdf7f2 TL |
512 | virtual void Seek(const Slice& target) override; |
513 | ||
514 | virtual void SeekForPrev(const Slice&) override { | |
515 | assert(false); | |
516 | current_ = restarts_; | |
517 | restart_index_ = num_restarts_; | |
518 | status_ = Status::InvalidArgument( | |
519 | "RocksDB internal error: should never call SeekForPrev() on index " | |
520 | "blocks"); | |
521 | key_.Clear(); | |
522 | value_.clear(); | |
7c673cae FG |
523 | } |
524 | ||
11fdf7f2 TL |
525 | virtual void Prev() override; |
526 | ||
527 | virtual void Next() override; | |
528 | ||
529 | virtual void SeekToFirst() override; | |
7c673cae | 530 | |
11fdf7f2 | 531 | virtual void SeekToLast() override; |
7c673cae | 532 | |
11fdf7f2 | 533 | void Invalidate(Status s) { InvalidateBase(s); } |
7c673cae | 534 | |
11fdf7f2 TL |
535 | private: |
536 | // Key is in InternalKey format | |
537 | bool key_includes_seq_; | |
538 | bool value_delta_encoded_; | |
539 | BlockPrefixIndex* prefix_index_; | |
540 | // Whether the value is delta encoded. In that case the value is assumed to be | |
541 | // BlockHandle. The first value in each restart interval is the full encoded | |
542 | // BlockHandle; the restart of encoded size part of the BlockHandle. The | |
543 | // offset of delta encoded BlockHandles is computed by adding the size of | |
544 | // previous delta encoded values in the same restart interval to the offset of | |
545 | // the first value in that restart interval. | |
546 | BlockHandle decoded_value_; | |
7c673cae | 547 | |
11fdf7f2 | 548 | bool PrefixSeek(const Slice& target, uint32_t* index); |
7c673cae | 549 | bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids, |
494da23a | 550 | uint32_t left, uint32_t right, uint32_t* index); |
11fdf7f2 | 551 | inline int CompareBlockKey(uint32_t block_index, const Slice& target); |
7c673cae | 552 | |
11fdf7f2 TL |
553 | inline int Compare(const Slice& a, const Slice& b) const { |
554 | return comparator_->Compare(a, b); | |
555 | } | |
556 | ||
557 | inline int Compare(const IterKey& ikey, const Slice& b) const { | |
558 | return comparator_->Compare(ikey.GetKey(), b); | |
559 | } | |
560 | ||
561 | inline bool ParseNextIndexKey(); | |
7c673cae | 562 | |
11fdf7f2 TL |
563 | // When value_delta_encoded_ is enabled it decodes the value which is assumed |
564 | // to be BlockHandle and put it to decoded_value_ | |
565 | inline void DecodeCurrentValue(uint32_t shared); | |
7c673cae FG |
566 | }; |
567 | ||
568 | } // namespace rocksdb |