1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
16 #include <unordered_set>
19 #include "db/dbformat.h"
20 #include "db/kv_checksum.h"
21 #include "db/range_tombstone_fragmenter.h"
22 #include "db/read_callback.h"
23 #include "db/version_edit.h"
24 #include "memory/allocator.h"
25 #include "memory/concurrent_arena.h"
26 #include "monitoring/instrumented_mutex.h"
27 #include "options/cf_options.h"
28 #include "rocksdb/db.h"
29 #include "rocksdb/memtablerep.h"
30 #include "table/multiget_context.h"
31 #include "util/dynamic_bloom.h"
32 #include "util/hash.h"
33 #include "util/hash_containers.h"
35 namespace ROCKSDB_NAMESPACE
{
39 class MemTableIterator
;
43 struct ImmutableMemTableOptions
{
44 explicit ImmutableMemTableOptions(const ImmutableOptions
& ioptions
,
45 const MutableCFOptions
& mutable_cf_options
);
46 size_t arena_block_size
;
47 uint32_t memtable_prefix_bloom_bits
;
48 size_t memtable_huge_page_size
;
49 bool memtable_whole_key_filtering
;
50 bool inplace_update_support
;
51 size_t inplace_update_num_locks
;
52 UpdateStatus (*inplace_callback
)(char* existing_value
,
53 uint32_t* existing_value_size
,
55 std::string
* merged_value
);
56 size_t max_successive_merges
;
57 Statistics
* statistics
;
58 MergeOperator
* merge_operator
;
60 bool allow_data_in_errors
;
61 uint32_t protection_bytes_per_key
;
64 // Batched counters to updated when inserting keys in one write batch.
65 // In post process of the write batch, these can be updated together.
66 // Only used in concurrent memtable insert case.
67 struct MemTablePostProcessInfo
{
68 uint64_t data_size
= 0;
69 uint64_t num_entries
= 0;
70 uint64_t num_deletes
= 0;
73 using MultiGetRange
= MultiGetContext::Range
;
74 // Note: Many of the methods in this class have comments indicating that
75 // external synchronization is required as these methods are not thread-safe.
76 // It is up to higher layers of code to decide how to prevent concurrent
77 // invocation of these methods. This is usually done by acquiring either
78 // the db mutex or the single writer thread.
80 // Some of these methods are documented to only require external
81 // synchronization if this memtable is immutable. Calling MarkImmutable() is
82 // not sufficient to guarantee immutability. It is up to higher layers of
83 // code to determine if this MemTable can still be modified by other threads.
84 // Eg: The Superversion stores a pointer to the current MemTable (that can
85 // be modified) and a separate list of the MemTables that can no longer be
86 // written to (aka the 'immutable memtables').
89 struct KeyComparator
: public MemTableRep::KeyComparator
{
90 const InternalKeyComparator comparator
;
91 explicit KeyComparator(const InternalKeyComparator
& c
) : comparator(c
) {}
92 virtual int operator()(const char* prefix_len_key1
,
93 const char* prefix_len_key2
) const override
;
94 virtual int operator()(const char* prefix_len_key
,
95 const DecodedType
& key
) const override
;
98 // MemTables are reference counted. The initial reference count
99 // is zero and the caller must call Ref() at least once.
101 // earliest_seq should be the current SequenceNumber in the db such that any
102 // key inserted into this memtable will have an equal or larger seq number.
103 // (When a db is first created, the earliest sequence number will be 0).
104 // If the earliest sequence number is not known, kMaxSequenceNumber may be
105 // used, but this may prevent some transactions from succeeding until the
106 // first key is inserted into the memtable.
107 explicit MemTable(const InternalKeyComparator
& comparator
,
108 const ImmutableOptions
& ioptions
,
109 const MutableCFOptions
& mutable_cf_options
,
110 WriteBufferManager
* write_buffer_manager
,
111 SequenceNumber earliest_seq
, uint32_t column_family_id
);
112 // No copying allowed
113 MemTable(const MemTable
&) = delete;
114 MemTable
& operator=(const MemTable
&) = delete;
116 // Do not delete this MemTable unless Unref() indicates it not in use.
119 // Increase reference count.
120 // REQUIRES: external synchronization to prevent simultaneous
121 // operations on the same MemTable.
122 void Ref() { ++refs_
; }
124 // Drop reference count.
125 // If the refcount goes to zero return this memtable, otherwise return null.
126 // REQUIRES: external synchronization to prevent simultaneous
127 // operations on the same MemTable.
137 // Returns an estimate of the number of bytes of data in use by this
140 // REQUIRES: external synchronization to prevent simultaneous
141 // operations on the same MemTable (unless this Memtable is immutable).
142 size_t ApproximateMemoryUsage();
144 // As a cheap version of `ApproximateMemoryUsage()`, this function doesn't
145 // require external synchronization. The value may be less accurate though
146 size_t ApproximateMemoryUsageFast() const {
147 return approximate_memory_usage_
.load(std::memory_order_relaxed
);
150 // used by MemTableListVersion::MemoryAllocatedBytesExcludingLast
151 size_t MemoryAllocatedBytes() const {
152 return table_
->ApproximateMemoryUsage() +
153 range_del_table_
->ApproximateMemoryUsage() +
154 arena_
.MemoryAllocatedBytes();
157 // Returns a vector of unique random memtable entries of size 'sample_size'.
159 // Note: the entries are stored in the unordered_set as length-prefixed keys,
160 // hence their representation in the set as "const char*".
161 // Note2: the size of the output set 'entries' is not enforced to be strictly
162 // equal to 'target_sample_size'. Its final size might be slightly
163 // greater or slightly less than 'target_sample_size'
165 // REQUIRES: external synchronization to prevent simultaneous
166 // operations on the same MemTable (unless this Memtable is immutable).
167 // REQUIRES: SkipList memtable representation. This function is not
168 // implemented for any other type of memtable representation (vectorrep,
169 // hashskiplist,...).
170 void UniqueRandomSample(const uint64_t& target_sample_size
,
171 std::unordered_set
<const char*>* entries
) {
172 // TODO(bjlemaire): at the moment, only supported by skiplistrep.
173 // Extend it to all other memtable representations.
174 table_
->UniqueRandomSample(num_entries(), target_sample_size
, entries
);
177 // This method heuristically determines if the memtable should continue to
179 bool ShouldScheduleFlush() const {
180 return flush_state_
.load(std::memory_order_relaxed
) == FLUSH_REQUESTED
;
183 // Returns true if a flush should be scheduled and the caller should
184 // be the one to schedule it
185 bool MarkFlushScheduled() {
186 auto before
= FLUSH_REQUESTED
;
187 return flush_state_
.compare_exchange_strong(before
, FLUSH_SCHEDULED
,
188 std::memory_order_relaxed
,
189 std::memory_order_relaxed
);
192 // Return an iterator that yields the contents of the memtable.
194 // The caller must ensure that the underlying MemTable remains live
195 // while the returned iterator is live. The keys returned by this
196 // iterator are internal keys encoded by AppendInternalKey in the
197 // db/dbformat.{h,cc} module.
199 // By default, it returns an iterator for prefix seek if prefix_extractor
200 // is configured in Options.
201 // arena: If not null, the arena needs to be used to allocate the Iterator.
202 // Calling ~Iterator of the iterator will destroy all the states but
203 // those allocated in arena.
204 InternalIterator
* NewIterator(const ReadOptions
& read_options
, Arena
* arena
);
206 // Returns an iterator that yields the range tombstones of the memtable.
207 // The caller must ensure that the underlying MemTable remains live
208 // while the returned iterator is live.
209 // @param immutable_memtable Whether this memtable is an immutable memtable.
210 // This information is not stored in memtable itself, so it needs to be
211 // specified by the caller. This flag is used internally to decide whether a
212 // cached fragmented range tombstone list can be returned. This cached version
213 // is constructed when a memtable becomes immutable. Setting the flag to false
214 // will always yield correct result, but may incur performance penalty as it
215 // always creates a new fragmented range tombstone list.
216 FragmentedRangeTombstoneIterator
* NewRangeTombstoneIterator(
217 const ReadOptions
& read_options
, SequenceNumber read_seq
,
218 bool immutable_memtable
);
220 Status
VerifyEncodedEntry(Slice encoded
,
221 const ProtectionInfoKVOS64
& kv_prot_info
);
223 // Add an entry into memtable that maps key to value at the
224 // specified sequence number and with the specified type.
225 // Typically value will be empty if type==kTypeDeletion.
227 // REQUIRES: if allow_concurrent = false, external synchronization to prevent
228 // simultaneous operations on the same MemTable.
230 // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
231 // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
232 // The next attempt should try a larger value for `seq`.
233 Status
Add(SequenceNumber seq
, ValueType type
, const Slice
& key
,
234 const Slice
& value
, const ProtectionInfoKVOS64
* kv_prot_info
,
235 bool allow_concurrent
= false,
236 MemTablePostProcessInfo
* post_process_info
= nullptr,
237 void** hint
= nullptr);
239 // Used to Get value associated with key or Get Merge Operands associated
241 // If do_merge = true the default behavior which is Get value for key is
242 // executed. Expected behavior is described right below.
243 // If memtable contains a value for key, store it in *value and return true.
244 // If memtable contains a deletion for key, store a NotFound() error
245 // in *status and return true.
246 // If memtable contains Merge operation as the most recent entry for a key,
247 // and the merge process does not stop (not reaching a value or delete),
248 // prepend the current merge operand to *operands.
249 // store MergeInProgress in s, and return false.
250 // Else, return false.
251 // If any operation was found, its most recent sequence number
252 // will be stored in *seq on success (regardless of whether true/false is
253 // returned). Otherwise, *seq will be set to kMaxSequenceNumber.
254 // On success, *s may be set to OK, NotFound, or MergeInProgress. Any other
255 // status returned indicates a corruption or other unexpected error.
256 // If do_merge = false then any Merge Operands encountered for key are simply
257 // stored in merge_context.operands_list and never actually merged to get a
258 // final value. The raw Merge Operands are eventually returned to the user.
259 // @param immutable_memtable Whether this memtable is immutable. Used
260 // internally by NewRangeTombstoneIterator(). See comment above
261 // NewRangeTombstoneIterator() for more detail.
262 bool Get(const LookupKey
& key
, std::string
* value
,
263 PinnableWideColumns
* columns
, std::string
* timestamp
, Status
* s
,
264 MergeContext
* merge_context
,
265 SequenceNumber
* max_covering_tombstone_seq
, SequenceNumber
* seq
,
266 const ReadOptions
& read_opts
, bool immutable_memtable
,
267 ReadCallback
* callback
= nullptr, bool* is_blob_index
= nullptr,
268 bool do_merge
= true);
270 bool Get(const LookupKey
& key
, std::string
* value
,
271 PinnableWideColumns
* columns
, std::string
* timestamp
, Status
* s
,
272 MergeContext
* merge_context
,
273 SequenceNumber
* max_covering_tombstone_seq
,
274 const ReadOptions
& read_opts
, bool immutable_memtable
,
275 ReadCallback
* callback
= nullptr, bool* is_blob_index
= nullptr,
276 bool do_merge
= true) {
278 return Get(key
, value
, columns
, timestamp
, s
, merge_context
,
279 max_covering_tombstone_seq
, &seq
, read_opts
, immutable_memtable
,
280 callback
, is_blob_index
, do_merge
);
283 // @param immutable_memtable Whether this memtable is immutable. Used
284 // internally by NewRangeTombstoneIterator(). See comment above
285 // NewRangeTombstoneIterator() for more detail.
286 void MultiGet(const ReadOptions
& read_options
, MultiGetRange
* range
,
287 ReadCallback
* callback
, bool immutable_memtable
);
289 // If `key` exists in current memtable with type value_type and the existing
290 // value is at least as large as the new value, updates it in-place. Otherwise
291 // adds the new value to the memtable out-of-place.
293 // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
294 // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
295 // The next attempt should try a larger value for `seq`.
297 // REQUIRES: external synchronization to prevent simultaneous
298 // operations on the same MemTable.
299 Status
Update(SequenceNumber seq
, ValueType value_type
, const Slice
& key
,
300 const Slice
& value
, const ProtectionInfoKVOS64
* kv_prot_info
);
302 // If `key` exists in current memtable with type `kTypeValue` and the existing
303 // value is at least as large as the new value, updates it in-place. Otherwise
304 // if `key` exists in current memtable with type `kTypeValue`, adds the new
305 // value to the memtable out-of-place.
307 // Returns `Status::NotFound` if `key` does not exist in current memtable or
308 // the latest version of `key` does not have `kTypeValue`.
310 // Returns `Status::TryAgain` if the `seq`, `key` combination already exists
311 // in the memtable and `MemTableRepFactory::CanHandleDuplicatedKey()` is true.
312 // The next attempt should try a larger value for `seq`.
314 // REQUIRES: external synchronization to prevent simultaneous
315 // operations on the same MemTable.
316 Status
UpdateCallback(SequenceNumber seq
, const Slice
& key
,
318 const ProtectionInfoKVOS64
* kv_prot_info
);
320 // Returns the number of successive merge entries starting from the newest
321 // entry for the key up to the last non-merge entry or last entry for the
322 // key in the memtable.
323 size_t CountSuccessiveMergeEntries(const LookupKey
& key
);
325 // Update counters and flush status after inserting a whole write batch
326 // Used in concurrent memtable inserts.
327 void BatchPostProcess(const MemTablePostProcessInfo
& update_counters
) {
328 num_entries_
.fetch_add(update_counters
.num_entries
,
329 std::memory_order_relaxed
);
330 data_size_
.fetch_add(update_counters
.data_size
, std::memory_order_relaxed
);
331 if (update_counters
.num_deletes
!= 0) {
332 num_deletes_
.fetch_add(update_counters
.num_deletes
,
333 std::memory_order_relaxed
);
338 // Get total number of entries in the mem table.
339 // REQUIRES: external synchronization to prevent simultaneous
340 // operations on the same MemTable (unless this Memtable is immutable).
341 uint64_t num_entries() const {
342 return num_entries_
.load(std::memory_order_relaxed
);
345 // Get total number of deletes in the mem table.
346 // REQUIRES: external synchronization to prevent simultaneous
347 // operations on the same MemTable (unless this Memtable is immutable).
348 uint64_t num_deletes() const {
349 return num_deletes_
.load(std::memory_order_relaxed
);
352 uint64_t get_data_size() const {
353 return data_size_
.load(std::memory_order_relaxed
);
356 // Dynamically change the memtable's capacity. If set below the current usage,
357 // the next key added will trigger a flush. Can only increase size when
358 // memtable prefix bloom is disabled, since we can't easily allocate more
360 void UpdateWriteBufferSize(size_t new_write_buffer_size
) {
361 if (bloom_filter_
== nullptr ||
362 new_write_buffer_size
< write_buffer_size_
) {
363 write_buffer_size_
.store(new_write_buffer_size
,
364 std::memory_order_relaxed
);
368 // Returns the edits area that is needed for flushing the memtable
369 VersionEdit
* GetEdits() { return &edit_
; }
371 // Returns if there is no entry inserted to the mem table.
372 // REQUIRES: external synchronization to prevent simultaneous
373 // operations on the same MemTable (unless this Memtable is immutable).
374 bool IsEmpty() const { return first_seqno_
== 0; }
376 // Returns the sequence number of the first element that was inserted
377 // into the memtable.
378 // REQUIRES: external synchronization to prevent simultaneous
379 // operations on the same MemTable (unless this Memtable is immutable).
380 SequenceNumber
GetFirstSequenceNumber() {
381 return first_seqno_
.load(std::memory_order_relaxed
);
384 // Returns the sequence number of the first element that was inserted
385 // into the memtable.
386 // REQUIRES: external synchronization to prevent simultaneous
387 // operations on the same MemTable (unless this Memtable is immutable).
388 void SetFirstSequenceNumber(SequenceNumber first_seqno
) {
389 return first_seqno_
.store(first_seqno
, std::memory_order_relaxed
);
392 // Returns the sequence number that is guaranteed to be smaller than or equal
393 // to the sequence number of any key that could be inserted into this
394 // memtable. It can then be assumed that any write with a larger(or equal)
395 // sequence number will be present in this memtable or a later memtable.
397 // If the earliest sequence number could not be determined,
398 // kMaxSequenceNumber will be returned.
399 SequenceNumber
GetEarliestSequenceNumber() {
400 return earliest_seqno_
.load(std::memory_order_relaxed
);
403 // Sets the sequence number that is guaranteed to be smaller than or equal
404 // to the sequence number of any key that could be inserted into this
405 // memtable. It can then be assumed that any write with a larger(or equal)
406 // sequence number will be present in this memtable or a later memtable.
407 // Used only for MemPurge operation
408 void SetEarliestSequenceNumber(SequenceNumber earliest_seqno
) {
409 return earliest_seqno_
.store(earliest_seqno
, std::memory_order_relaxed
);
412 // DB's latest sequence ID when the memtable is created. This number
413 // may be updated to a more recent one before any key is inserted.
414 SequenceNumber
GetCreationSeq() const { return creation_seq_
; }
416 void SetCreationSeq(SequenceNumber sn
) { creation_seq_
= sn
; }
418 // Returns the next active logfile number when this memtable is about to
419 // be flushed to storage
420 // REQUIRES: external synchronization to prevent simultaneous
421 // operations on the same MemTable.
422 uint64_t GetNextLogNumber() { return mem_next_logfile_number_
; }
424 // Sets the next active logfile number when this memtable is about to
425 // be flushed to storage
426 // REQUIRES: external synchronization to prevent simultaneous
427 // operations on the same MemTable.
428 void SetNextLogNumber(uint64_t num
) { mem_next_logfile_number_
= num
; }
430 // if this memtable contains data from a committed
431 // two phase transaction we must take note of the
432 // log which contains that data so we can know
433 // when to relese that log
434 void RefLogContainingPrepSection(uint64_t log
);
435 uint64_t GetMinLogContainingPrepSection();
437 // Notify the underlying storage that no more items will be added.
438 // REQUIRES: external synchronization to prevent simultaneous
439 // operations on the same MemTable.
440 // After MarkImmutable() is called, you should not attempt to
441 // write anything to this MemTable(). (Ie. do not call Add() or Update()).
442 void MarkImmutable() {
443 table_
->MarkReadOnly();
444 mem_tracker_
.DoneAllocating();
447 // Notify the underlying storage that all data it contained has been
449 // REQUIRES: external synchronization to prevent simultaneous
450 // operations on the same MemTable.
451 void MarkFlushed() { table_
->MarkFlushed(); }
453 // return true if the current MemTableRep supports merge operator.
454 bool IsMergeOperatorSupported() const {
455 return table_
->IsMergeOperatorSupported();
458 // return true if the current MemTableRep supports snapshots.
459 // inplace update prevents snapshots,
460 bool IsSnapshotSupported() const {
461 return table_
->IsSnapshotSupported() && !moptions_
.inplace_update_support
;
464 struct MemTableStats
{
469 MemTableStats
ApproximateStats(const Slice
& start_ikey
,
470 const Slice
& end_ikey
);
472 // Get the lock associated for the key
473 port::RWMutex
* GetLock(const Slice
& key
);
475 const InternalKeyComparator
& GetInternalKeyComparator() const {
476 return comparator_
.comparator
;
479 const ImmutableMemTableOptions
* GetImmutableMemTableOptions() const {
483 uint64_t ApproximateOldestKeyTime() const {
484 return oldest_key_time_
.load(std::memory_order_relaxed
);
487 // REQUIRES: db_mutex held.
488 void SetID(uint64_t id
) { id_
= id
; }
490 uint64_t GetID() const { return id_
; }
492 void SetFlushCompleted(bool completed
) { flush_completed_
= completed
; }
494 uint64_t GetFileNumber() const { return file_number_
; }
496 void SetFileNumber(uint64_t file_num
) { file_number_
= file_num
; }
498 void SetFlushInProgress(bool in_progress
) {
499 flush_in_progress_
= in_progress
;
503 void SetFlushJobInfo(std::unique_ptr
<FlushJobInfo
>&& info
) {
504 flush_job_info_
= std::move(info
);
507 std::unique_ptr
<FlushJobInfo
> ReleaseFlushJobInfo() {
508 return std::move(flush_job_info_
);
510 #endif // !ROCKSDB_LITE
512 // Returns a heuristic flush decision
513 bool ShouldFlushNow();
515 void ConstructFragmentedRangeTombstones();
517 // Returns whether a fragmented range tombstone list is already constructed
518 // for this memtable. It should be constructed right before a memtable is
519 // added to an immutable memtable list. Note that if a memtable does not have
520 // any range tombstone, then no range tombstone list will ever be constructed.
521 // @param allow_empty Specifies whether a memtable with no range tombstone is
522 // considered to have its fragmented range tombstone list constructed.
523 bool IsFragmentedRangeTombstonesConstructed(bool allow_empty
= true) const {
525 return fragmented_range_tombstone_list_
.get() != nullptr ||
526 is_range_del_table_empty_
;
528 return fragmented_range_tombstone_list_
.get() != nullptr;
532 // Returns Corruption status if verification fails.
533 static Status
VerifyEntryChecksum(const char* entry
,
534 size_t protection_bytes_per_key
,
535 bool allow_data_in_errors
= false);
538 enum FlushStateEnum
{ FLUSH_NOT_REQUESTED
, FLUSH_REQUESTED
, FLUSH_SCHEDULED
};
540 friend class MemTableIterator
;
541 friend class MemTableBackwardIterator
;
542 friend class MemTableList
;
544 KeyComparator comparator_
;
545 const ImmutableMemTableOptions moptions_
;
547 const size_t kArenaBlockSize
;
548 AllocTracker mem_tracker_
;
549 ConcurrentArena arena_
;
550 std::unique_ptr
<MemTableRep
> table_
;
551 std::unique_ptr
<MemTableRep
> range_del_table_
;
552 std::atomic_bool is_range_del_table_empty_
;
554 // Total data size of all data inserted
555 std::atomic
<uint64_t> data_size_
;
556 std::atomic
<uint64_t> num_entries_
;
557 std::atomic
<uint64_t> num_deletes_
;
559 // Dynamically changeable memtable option
560 std::atomic
<size_t> write_buffer_size_
;
562 // These are used to manage memtable flushes to storage
563 bool flush_in_progress_
; // started the flush
564 bool flush_completed_
; // finished the flush
565 uint64_t file_number_
; // filled up after flush is complete
567 // The updates to be applied to the transaction log when this
568 // memtable is flushed to storage.
571 // The sequence number of the kv that was inserted first
572 std::atomic
<SequenceNumber
> first_seqno_
;
574 // The db sequence number at the time of creation or kMaxSequenceNumber
576 std::atomic
<SequenceNumber
> earliest_seqno_
;
578 SequenceNumber creation_seq_
;
580 // The log files earlier than this number can be deleted.
581 uint64_t mem_next_logfile_number_
;
583 // the earliest log containing a prepared section
584 // which has been inserted into this memtable.
585 std::atomic
<uint64_t> min_prep_log_referenced_
;
587 // rw locks for inplace updates
588 std::vector
<port::RWMutex
> locks_
;
590 const SliceTransform
* const prefix_extractor_
;
591 std::unique_ptr
<DynamicBloom
> bloom_filter_
;
593 std::atomic
<FlushStateEnum
> flush_state_
;
597 // Extract sequential insert prefixes.
598 const SliceTransform
* insert_with_hint_prefix_extractor_
;
600 // Insert hints for each prefix.
601 UnorderedMapH
<Slice
, void*, SliceHasher
> insert_hints_
;
603 // Timestamp of oldest key
604 std::atomic
<uint64_t> oldest_key_time_
;
606 // Memtable id to track flush.
609 // Sequence number of the atomic flush that is responsible for this memtable.
610 // The sequence number of atomic flush is a seq, such that no writes with
611 // sequence numbers greater than or equal to seq are flushed, while all
612 // writes with sequence number smaller than seq are flushed.
613 SequenceNumber atomic_flush_seqno_
;
615 // keep track of memory usage in table_, arena_, and range_del_table_.
616 // Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow`
617 std::atomic
<uint64_t> approximate_memory_usage_
;
620 // Flush job info of the current memtable.
621 std::unique_ptr
<FlushJobInfo
> flush_job_info_
;
622 #endif // !ROCKSDB_LITE
624 // Updates flush_state_ using ShouldFlushNow()
625 void UpdateFlushState();
627 void UpdateOldestKeyTime();
629 void GetFromTable(const LookupKey
& key
,
630 SequenceNumber max_covering_tombstone_seq
, bool do_merge
,
631 ReadCallback
* callback
, bool* is_blob_index
,
632 std::string
* value
, PinnableWideColumns
* columns
,
633 std::string
* timestamp
, Status
* s
,
634 MergeContext
* merge_context
, SequenceNumber
* seq
,
635 bool* found_final_value
, bool* merge_in_progress
);
637 // Always returns non-null and assumes certain pre-checks (e.g.,
638 // is_range_del_table_empty_) are done. This is only valid during the lifetime
639 // of the underlying memtable.
640 // read_seq and read_options.timestamp will be used as the upper bound
641 // for range tombstones.
642 FragmentedRangeTombstoneIterator
* NewRangeTombstoneIteratorInternal(
643 const ReadOptions
& read_options
, SequenceNumber read_seq
,
644 bool immutable_memtable
);
646 // The fragmented range tombstones of this memtable.
647 // This is constructed when this memtable becomes immutable
648 // if !is_range_del_table_empty_.
649 std::unique_ptr
<FragmentedRangeTombstoneList
>
650 fragmented_range_tombstone_list_
;
652 // makes sure there is a single range tombstone writer to invalidate cache
653 std::mutex range_del_mutex_
;
654 CoreLocalArray
<std::shared_ptr
<FragmentedRangeTombstoneListCache
>>
655 cached_range_tombstone_
;
657 void UpdateEntryChecksum(const ProtectionInfoKVOS64
* kv_prot_info
,
658 const Slice
& key
, const Slice
& value
, ValueType type
,
659 SequenceNumber s
, char* checksum_ptr
);
662 extern const char* EncodeKey(std::string
* scratch
, const Slice
& target
);
664 } // namespace ROCKSDB_NAMESPACE