X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Frocksdb%2Fdb%2Fmemtable.h;h=709e2061e5b5ab5d42cc816d56e046fab6543c69;hb=494da23a05e25ed98f5539f3b89e6af3cafe3fec;hp=e4594100781c520d2b46baf6afc6591ecb9055d4;hpb=931c18d5142274109ebfaecd012da8e5a3ebc67e;p=ceph.git diff --git a/ceph/src/rocksdb/db/memtable.h b/ceph/src/rocksdb/db/memtable.h index e45941007..709e2061e 100644 --- a/ceph/src/rocksdb/db/memtable.h +++ b/ceph/src/rocksdb/db/memtable.h @@ -16,7 +16,7 @@ #include #include #include "db/dbformat.h" -#include "db/range_del_aggregator.h" +#include "db/range_tombstone_fragmenter.h" #include "db/read_callback.h" #include "db/version_edit.h" #include "monitoring/instrumented_mutex.h" @@ -41,6 +41,7 @@ struct ImmutableMemTableOptions { size_t arena_block_size; uint32_t memtable_prefix_bloom_bits; size_t memtable_huge_page_size; + bool memtable_whole_key_filtering; bool inplace_update_support; size_t inplace_update_num_locks; UpdateStatus (*inplace_callback)(char* existing_value, @@ -158,7 +159,8 @@ class MemTable { // those allocated in arena. InternalIterator* NewIterator(const ReadOptions& read_options, Arena* arena); - InternalIterator* NewRangeTombstoneIterator(const ReadOptions& read_options); + FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( + const ReadOptions& read_options, SequenceNumber read_seq); // Add an entry into memtable that maps key to value at the // specified sequence number and with the specified type. @@ -187,17 +189,19 @@ class MemTable { // On success, *s may be set to OK, NotFound, or MergeInProgress. Any other // status returned indicates a corruption or other unexpected error. bool Get(const LookupKey& key, std::string* value, Status* s, - MergeContext* merge_context, RangeDelAggregator* range_del_agg, - SequenceNumber* seq, const ReadOptions& read_opts, - ReadCallback* callback = nullptr, bool* is_blob_index = nullptr); + MergeContext* merge_context, + SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, + const ReadOptions& read_opts, ReadCallback* callback = nullptr, + bool* is_blob_index = nullptr); bool Get(const LookupKey& key, std::string* value, Status* s, - MergeContext* merge_context, RangeDelAggregator* range_del_agg, + MergeContext* merge_context, + SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr) { SequenceNumber seq; - return Get(key, value, s, merge_context, range_del_agg, &seq, read_opts, - callback, is_blob_index); + return Get(key, value, s, merge_context, max_covering_tombstone_seq, &seq, + read_opts, callback, is_blob_index); } // Attempts to update the new_value inplace, else does normal Add @@ -262,12 +266,16 @@ class MemTable { return num_deletes_.load(std::memory_order_relaxed); } + uint64_t get_data_size() const { + return data_size_.load(std::memory_order_relaxed); + } + // Dynamically change the memtable's capacity. If set below the current usage, // the next key added will trigger a flush. Can only increase size when // memtable prefix bloom is disabled, since we can't easily allocate more // space. void UpdateWriteBufferSize(size_t new_write_buffer_size) { - if (prefix_bloom_ == nullptr || + if (bloom_filter_ == nullptr || new_write_buffer_size < write_buffer_size_) { write_buffer_size_.store(new_write_buffer_size, std::memory_order_relaxed); @@ -383,6 +391,16 @@ class MemTable { uint64_t GetID() const { return id_; } + void SetFlushCompleted(bool completed) { flush_completed_ = completed; } + + uint64_t GetFileNumber() const { return file_number_; } + + void SetFileNumber(uint64_t file_num) { file_number_ = file_num; } + + void SetFlushInProgress(bool in_progress) { + flush_in_progress_ = in_progress; + } + private: enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED }; @@ -396,9 +414,9 @@ class MemTable { const size_t kArenaBlockSize; AllocTracker mem_tracker_; ConcurrentArena arena_; - unique_ptr table_; - unique_ptr range_del_table_; - bool is_range_del_table_empty_; + std::unique_ptr table_; + std::unique_ptr range_del_table_; + std::atomic_bool is_range_del_table_empty_; // Total data size of all data inserted std::atomic data_size_; @@ -437,7 +455,7 @@ class MemTable { std::vector locks_; const SliceTransform* const prefix_extractor_; - std::unique_ptr prefix_bloom_; + std::unique_ptr bloom_filter_; std::atomic flush_state_; @@ -455,6 +473,12 @@ class MemTable { // Memtable id to track flush. uint64_t id_ = 0; + // Sequence number of the atomic flush that is responsible for this memtable. + // The sequence number of atomic flush is a seq, such that no writes with + // sequence numbers greater than or equal to seq are flushed, while all + // writes with sequence number smaller than seq are flushed. + SequenceNumber atomic_flush_seqno_; + // Returns a heuristic flush decision bool ShouldFlushNow() const;