#include "db/logs_with_prep_tracker.h"
#include "db/memtable.h"
#include "db/range_del_aggregator.h"
+#include "file/filename.h"
+#include "logging/log_buffer.h"
#include "monitoring/instrumented_mutex.h"
#include "rocksdb/db.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/types.h"
#include "util/autovector.h"
-#include "util/filename.h"
-#include "util/log_buffer.h"
-namespace rocksdb {
+namespace ROCKSDB_NAMESPACE {
class ColumnFamilyData;
class InternalKeyComparator;
class MergeIteratorBuilder;
class MemTableList;
+struct FlushJobInfo;
+
// keeps a list of immutable memtables in a vector. the list is immutable
// if refcount is bigger than one. It is used as a state for Get() and
// Iterator code paths
explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage,
MemTableListVersion* old = nullptr);
explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage,
- int max_write_buffer_number_to_maintain);
+ int max_write_buffer_number_to_maintain,
+ int64_t max_write_buffer_size_to_maintain);
void Ref();
void Unref(autovector<MemTable*>* to_delete = nullptr);
read_opts, callback, is_blob_index);
}
+ void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
+ ReadCallback* callback, bool* is_blob);
+
+ // Returns all the merge operands corresponding to the key by searching all
+ // memtables starting from the most recent one.
+ bool GetMergeOperands(const LookupKey& key, Status* s,
+ MergeContext* merge_context,
+ SequenceNumber* max_covering_tombstone_seq,
+ const ReadOptions& read_opts);
+
// Similar to Get(), but searches the Memtable history of memtables that
// have already been flushed. Should only be used from in-memory only
// queries (such as Transaction validation) as the history may contain
// REQUIRE: m is an immutable memtable
void Remove(MemTable* m, autovector<MemTable*>* to_delete);
- void TrimHistory(autovector<MemTable*>* to_delete);
+ void TrimHistory(autovector<MemTable*>* to_delete, size_t usage);
bool GetFromList(std::list<MemTable*>* list, const LookupKey& key,
std::string* value, Status* s, MergeContext* merge_context,
void UnrefMemTable(autovector<MemTable*>* to_delete, MemTable* m);
+ // Calculate the total amount of memory used by memlist_ and memlist_history_
+ // excluding the last MemTable in memlist_history_. The reason for excluding
+ // the last MemTable is to see if dropping the last MemTable will keep total
+ // memory usage above or equal to max_write_buffer_size_to_maintain_
+ size_t ApproximateMemoryUsageExcludingLast() const;
+
+ // Whether this version contains flushed memtables that are only kept around
+ // for transaction conflict checking.
+ bool HasHistory() const { return !memlist_history_.empty(); }
+
+ bool MemtableLimitExceeded(size_t usage);
+
// Immutable MemTables that have not yet been flushed.
std::list<MemTable*> memlist_;
std::list<MemTable*> memlist_history_;
// Maximum number of MemTables to keep in memory (including both flushed
- // and not-yet-flushed tables).
const int max_write_buffer_number_to_maintain_;
+ // Maximum size of MemTables to keep in memory (including both flushed
+ // and not-yet-flushed tables).
+ const int64_t max_write_buffer_size_to_maintain_;
int refs_ = 0;
// recoverability from a crash.
//
//
-// Other than imm_flush_needed, this class is not thread-safe and requires
-// external synchronization (such as holding the db mutex or being on the
-// write thread.)
+// Other than imm_flush_needed and imm_trim_needed, this class is not
+// thread-safe and requires external synchronization (such as holding the db
+// mutex or being on the write thread.)
class MemTableList {
public:
// A list of memtables.
explicit MemTableList(int min_write_buffer_number_to_merge,
- int max_write_buffer_number_to_maintain)
+ int max_write_buffer_number_to_maintain,
+ int64_t max_write_buffer_size_to_maintain)
: imm_flush_needed(false),
+ imm_trim_needed(false),
min_write_buffer_number_to_merge_(min_write_buffer_number_to_merge),
current_(new MemTableListVersion(¤t_memory_usage_,
- max_write_buffer_number_to_maintain)),
+ max_write_buffer_number_to_maintain,
+ max_write_buffer_size_to_maintain)),
num_flush_not_started_(0),
commit_in_progress_(false),
- flush_requested_(false) {
+ flush_requested_(false),
+ current_memory_usage_(0),
+ current_memory_usage_excluding_last_(0),
+ current_has_history_(false) {
current_->Ref();
- current_memory_usage_ = 0;
}
// Should not delete MemTableList without making sure MemTableList::current()
// is Unref()'d.
~MemTableList() {}
- MemTableListVersion* current() { return current_; }
+ MemTableListVersion* current() const { return current_; }
// so that background threads can detect non-nullptr pointer to
// determine whether there is anything more to start flushing.
std::atomic<bool> imm_flush_needed;
+ std::atomic<bool> imm_trim_needed;
+
// Returns the total number of memtables in the list that haven't yet
// been flushed and logged.
int NumNotFlushed() const;
const autovector<MemTable*>& m, LogsWithPrepTracker* prep_tracker,
VersionSet* vset, InstrumentedMutex* mu, uint64_t file_number,
autovector<MemTable*>* to_delete, Directory* db_directory,
- LogBuffer* log_buffer);
+ LogBuffer* log_buffer,
+ std::list<std::unique_ptr<FlushJobInfo>>* committed_flush_jobs_info);
// New memtables are inserted at the front of the list.
// Takes ownership of the referenced held on *m by the caller of Add().
// Returns an estimate of the number of bytes of data in use.
size_t ApproximateMemoryUsage();
+ // Returns the cached current_memory_usage_excluding_last_ value.
+ size_t ApproximateMemoryUsageExcludingLast() const;
+
+ // Returns the cached current_has_history_ value.
+ bool HasHistory() const;
+
+ // Updates current_memory_usage_excluding_last_ and current_has_history_
+ // from MemTableListVersion. Must be called whenever InstallNewVersion is
+ // called.
+ void UpdateCachedValuesFromMemTableListVersion();
+
+ // `usage` is the current size of the mutable Memtable. When
+ // max_write_buffer_size_to_maintain is used, total size of mutable and
+ // immutable memtables is checked against it to decide whether to trim
+ // memtable list.
+ void TrimHistory(autovector<MemTable*>* to_delete, size_t usage);
+
// Returns an estimate of the number of bytes of data used by
// the unflushed mem-tables.
size_t ApproximateUnflushedMemTablesMemoryUsage();
bool HasFlushRequested() { return flush_requested_; }
+ // Returns true if a trim history should be scheduled and the caller should
+ // be the one to schedule it
+ bool MarkTrimHistoryNeeded() {
+ auto expected = false;
+ return imm_trim_needed.compare_exchange_strong(
+ expected, true, std::memory_order_relaxed, std::memory_order_relaxed);
+ }
+
+ void ResetTrimHistoryNeeded() {
+ auto expected = true;
+ imm_trim_needed.compare_exchange_strong(
+ expected, false, std::memory_order_relaxed, std::memory_order_relaxed);
+ }
+
// Copying allowed
// MemTableList(const MemTableList&);
// void operator=(const MemTableList&);
}
}
+ // Used only by DBImplSecondary during log replay.
+ // Remove memtables whose data were written before the WAL with log_number
+ // was created, i.e. mem->GetNextLogNumber() <= log_number. The memtables are
+ // not freed, but put into a vector for future deref and reclamation.
+ void RemoveOldMemTables(uint64_t log_number,
+ autovector<MemTable*>* to_delete);
+
private:
friend Status InstallMemtableAtomicFlushResults(
const autovector<MemTableList*>* imm_lists,
// The current memory usage.
size_t current_memory_usage_;
+
+ // Cached value of current_->ApproximateMemoryUsageExcludingLast().
+ std::atomic<size_t> current_memory_usage_excluding_last_;
+
+ // Cached value of current_->HasHistory().
+ std::atomic<bool> current_has_history_;
};
// Installs memtable atomic flush results.
InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
autovector<MemTable*>* to_delete, Directory* db_directory,
LogBuffer* log_buffer);
-} // namespace rocksdb
+} // namespace ROCKSDB_NAMESPACE