#include <string>
#include <vector>
-#include "db/dbformat.h"
#include "db/logs_with_prep_tracker.h"
#include "db/memtable.h"
#include "db/range_del_aggregator.h"
// If any operation was found for this key, its most recent sequence number
// will be stored in *seq on success (regardless of whether true/false is
// returned). Otherwise, *seq will be set to kMaxSequenceNumber.
- bool Get(const LookupKey& key, std::string* value, std::string* timestamp,
- Status* s, MergeContext* merge_context,
+ bool Get(const LookupKey& key, std::string* value,
+ PinnableWideColumns* columns, std::string* timestamp, Status* s,
+ MergeContext* merge_context,
SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq,
const ReadOptions& read_opts, ReadCallback* callback = nullptr,
bool* is_blob_index = nullptr);
- bool Get(const LookupKey& key, std::string* value, std::string* timestamp,
- Status* s, MergeContext* merge_context,
+ bool Get(const LookupKey& key, std::string* value,
+ PinnableWideColumns* columns, std::string* timestamp, Status* s,
+ MergeContext* merge_context,
SequenceNumber* max_covering_tombstone_seq,
const ReadOptions& read_opts, ReadCallback* callback = nullptr,
bool* is_blob_index = nullptr) {
SequenceNumber seq;
- return Get(key, value, timestamp, s, merge_context,
+ return Get(key, value, columns, timestamp, s, merge_context,
max_covering_tombstone_seq, &seq, read_opts, callback,
is_blob_index);
}
void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
- ReadCallback* callback, bool* is_blob);
+ ReadCallback* callback);
// Returns all the merge operands corresponding to the key by searching all
// memtables starting from the most recent one.
// queries (such as Transaction validation) as the history may contain
// writes that are also present in the SST files.
bool GetFromHistory(const LookupKey& key, std::string* value,
- std::string* timestamp, Status* s,
- MergeContext* merge_context,
+ PinnableWideColumns* columns, std::string* timestamp,
+ Status* s, MergeContext* merge_context,
SequenceNumber* max_covering_tombstone_seq,
SequenceNumber* seq, const ReadOptions& read_opts,
bool* is_blob_index = nullptr);
bool GetFromHistory(const LookupKey& key, std::string* value,
- std::string* timestamp, Status* s,
- MergeContext* merge_context,
+ PinnableWideColumns* columns, std::string* timestamp,
+ Status* s, MergeContext* merge_context,
SequenceNumber* max_covering_tombstone_seq,
const ReadOptions& read_opts,
bool* is_blob_index = nullptr) {
SequenceNumber seq;
- return GetFromHistory(key, value, timestamp, s, merge_context,
+ return GetFromHistory(key, value, columns, timestamp, s, merge_context,
max_covering_tombstone_seq, &seq, read_opts,
is_blob_index);
}
Arena* arena);
void AddIterators(const ReadOptions& options,
- MergeIteratorBuilder* merge_iter_builder);
+ MergeIteratorBuilder* merge_iter_builder,
+ bool add_range_tombstone_iter);
uint64_t GetTotalNumEntries() const;
// History.
SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const;
+ // Return the first sequence number from the memtable list, which is the
+ // smallest sequence number of all FirstSequenceNumber.
+ // Return kMaxSequenceNumber if the list is empty.
+ SequenceNumber GetFirstSequenceNumber() const;
+
private:
friend class MemTableList;
const autovector<ColumnFamilyData*>& cfds,
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
const autovector<const autovector<MemTable*>*>& mems_list,
- VersionSet* vset, InstrumentedMutex* mu,
- const autovector<FileMetaData*>& file_meta,
+ VersionSet* vset, LogsWithPrepTracker* prep_tracker,
+ InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
+ const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+ committed_flush_jobs_info,
autovector<MemTable*>* to_delete, FSDirectory* db_directory,
LogBuffer* log_buffer);
bool TrimHistory(autovector<MemTable*>* to_delete, size_t usage);
bool GetFromList(std::list<MemTable*>* list, const LookupKey& key,
- std::string* value, std::string* timestamp, Status* s,
+ std::string* value, PinnableWideColumns* columns,
+ std::string* timestamp, Status* s,
MergeContext* merge_context,
SequenceNumber* max_covering_tombstone_seq,
SequenceNumber* seq, const ReadOptions& read_opts,
// excluding the last MemTable in memlist_history_. The reason for excluding
// the last MemTable is to see if dropping the last MemTable will keep total
// memory usage above or equal to max_write_buffer_size_to_maintain_
- size_t ApproximateMemoryUsageExcludingLast() const;
+ size_t MemoryAllocatedBytesExcludingLast() const;
// Whether this version contains flushed memtables that are only kept around
// for transaction conflict checking.
commit_in_progress_(false),
flush_requested_(false),
current_memory_usage_(0),
- current_memory_usage_excluding_last_(0),
+ current_memory_allocted_bytes_excluding_last_(0),
current_has_history_(false) {
current_->Ref();
}
// not yet started.
bool IsFlushPending() const;
+ // Returns true if there is at least one memtable that is pending flush or
+ // flushing.
+ bool IsFlushPendingOrRunning() const;
+
// Returns the earliest memtables that needs to be flushed. The returned
// memtables are guaranteed to be in the ascending order of created time.
- void PickMemtablesToFlush(const uint64_t* max_memtable_id,
- autovector<MemTable*>* mems);
+ void PickMemtablesToFlush(uint64_t max_memtable_id,
+ autovector<MemTable*>* mems,
+ uint64_t* max_next_log_number = nullptr);
// Reset status of the given memtable list back to pending state so that
// they can get picked up again on the next round of flush.
autovector<MemTable*>* to_delete, FSDirectory* db_directory,
LogBuffer* log_buffer,
std::list<std::unique_ptr<FlushJobInfo>>* committed_flush_jobs_info,
- IOStatus* io_s);
+ bool write_edits = true);
// New memtables are inserted at the front of the list.
// Takes ownership of the referenced held on *m by the caller of Add().
+ // By default, adding memtables will flag that the memtable list needs to be
+ // flushed, but in certain situations, like after a mempurge, we may want to
+ // avoid flushing the memtable list upon addition of a memtable.
void Add(MemTable* m, autovector<MemTable*>* to_delete);
// Returns an estimate of the number of bytes of data in use.
size_t ApproximateMemoryUsage();
- // Returns the cached current_memory_usage_excluding_last_ value.
- size_t ApproximateMemoryUsageExcludingLast() const;
+ // Returns the cached current_memory_allocted_bytes_excluding_last_ value.
+ size_t MemoryAllocatedBytesExcludingLast() const;
// Returns the cached current_has_history_ value.
bool HasHistory() const;
- // Updates current_memory_usage_excluding_last_ and current_has_history_
- // from MemTableListVersion. Must be called whenever InstallNewVersion is
- // called.
+ // Updates current_memory_allocted_bytes_excluding_last_ and
+ // current_has_history_ from MemTableListVersion. Must be called whenever
+ // InstallNewVersion is called.
void UpdateCachedValuesFromMemTableListVersion();
// `usage` is the current size of the mutable Memtable. When
// non-empty (regardless of the min_write_buffer_number_to_merge
// parameter). This flush request will persist until the next time
// PickMemtablesToFlush() is called.
- void FlushRequested() { flush_requested_ = true; }
+ void FlushRequested() {
+ flush_requested_ = true;
+ // If there are some memtables stored in imm() that don't trigger
+ // flush (eg: mempurge output memtable), then update imm_flush_needed.
+ // Note: if race condition and imm_flush_needed is set to true
+ // when there is num_flush_not_started_==0, then there is no
+ // impact whatsoever. Imm_flush_needed is only used in an assert
+ // in IsFlushPending().
+ if (num_flush_not_started_ > 0) {
+ imm_flush_needed.store(true, std::memory_order_release);
+ }
+ }
bool HasFlushRequested() { return flush_requested_; }
// Returns the min log containing the prep section after memtables listsed in
// `memtables_to_flush` are flushed and their status is persisted in manifest.
uint64_t PrecomputeMinLogContainingPrepSection(
- const autovector<MemTable*>& memtables_to_flush);
+ const std::unordered_set<MemTable*>* memtables_to_flush = nullptr);
uint64_t GetEarliestMemTableID() const {
auto& memlist = current_->memlist_;
const autovector<ColumnFamilyData*>& cfds,
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
const autovector<const autovector<MemTable*>*>& mems_list,
- VersionSet* vset, InstrumentedMutex* mu,
- const autovector<FileMetaData*>& file_meta,
+ VersionSet* vset, LogsWithPrepTracker* prep_tracker,
+ InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
+ const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+ committed_flush_jobs_info,
autovector<MemTable*>* to_delete, FSDirectory* db_directory,
LogBuffer* log_buffer);
// The current memory usage.
size_t current_memory_usage_;
- // Cached value of current_->ApproximateMemoryUsageExcludingLast().
- std::atomic<size_t> current_memory_usage_excluding_last_;
+ // Cached value of current_->MemoryAllocatedBytesExcludingLast().
+ std::atomic<size_t> current_memory_allocted_bytes_excluding_last_;
// Cached value of current_->HasHistory().
std::atomic<bool> current_has_history_;
const autovector<ColumnFamilyData*>& cfds,
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
const autovector<const autovector<MemTable*>*>& mems_list, VersionSet* vset,
- InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
+ LogsWithPrepTracker* prep_tracker, InstrumentedMutex* mu,
+ const autovector<FileMetaData*>& file_meta,
+ const autovector<std::list<std::unique_ptr<FlushJobInfo>>*>&
+ committed_flush_jobs_info,
autovector<MemTable*>* to_delete, FSDirectory* db_directory,
LogBuffer* log_buffer);
} // namespace ROCKSDB_NAMESPACE