#include "db/dbformat.h"
#include "db/wal_edit.h"
#include "memory/arena.h"
+#include "port/malloc.h"
+#include "rocksdb/advanced_options.h"
#include "rocksdb/cache.h"
#include "table/table_reader.h"
+#include "table/unique_id_impl.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
kLogNumber = 2,
kNextFileNumber = 3,
kLastSequence = 4,
- kCompactPointer = 5,
+ kCompactCursor = 5,
kDeletedFile = 6,
kNewFile = 7,
// 8 was used for large value refs
kInAtomicGroup = 300,
+ kBlobFileAddition = 400,
+ kBlobFileGarbage,
+
// Mask for an unidentified tag from the future which can be safely ignored.
kTagSafeIgnoreMask = 1 << 13,
// Forward compatible (aka ignorable) records
kDbId,
- kBlobFileAddition,
- kBlobFileGarbage,
+ kBlobFileAddition_DEPRECATED,
+ kBlobFileGarbage_DEPRECATED,
kWalAddition,
kWalDeletion,
+ kFullHistoryTsLow,
+ kWalAddition2,
+ kWalDeletion2,
};
enum NewFileCustomTag : uint32_t {
kNeedCompaction = 2,
// Since Manifest is not entirely forward-compatible, we currently encode
// kMinLogNumberToKeep as part of NewFile as a hack. This should be removed
- // when manifest becomes forward-comptabile.
+ // when manifest becomes forward-compatible.
kMinLogNumberToKeepHack = 3,
kOldestBlobFileNumber = 4,
kOldestAncesterTime = 5,
kFileCreationTime = 6,
kFileChecksum = 7,
kFileChecksumFuncName = 8,
+ kTemperature = 9,
+ kMinTimestamp = 10,
+ kMaxTimestamp = 11,
+ kUniqueId = 12,
// If this bit for the custom tag is set, opening DB should fail if
// we don't know this field.
// Table reader in table_reader_handle
TableReader* table_reader;
uint64_t packed_number_and_path_id;
- uint64_t file_size; // File size in bytes
+ uint64_t file_size; // File size in bytes
SequenceNumber smallest_seqno; // The smallest seqno in this file
SequenceNumber largest_seqno; // The largest seqno in this file
return packed_number_and_path_id & kFileNumberMask;
}
uint32_t GetPathId() const {
- return static_cast<uint32_t>(
- packed_number_and_path_id / (kFileNumberMask + 1));
+ return static_cast<uint32_t>(packed_number_and_path_id /
+ (kFileNumberMask + 1));
}
uint64_t GetFileSize() const { return file_size; }
};
struct FileMetaData {
FileDescriptor fd;
- InternalKey smallest; // Smallest internal key served by table
- InternalKey largest; // Largest internal key served by table
+ InternalKey smallest; // Smallest internal key served by table
+ InternalKey largest; // Largest internal key served by table
// Needs to be disposed when refs becomes 0.
Cache::Handle* table_reader_handle = nullptr;
bool marked_for_compaction = false; // True if client asked us nicely to
// compact this file.
+ Temperature temperature = Temperature::kUnknown;
// Used only in BlobDB. The file number of the oldest blob file this SST file
// refers to. 0 is an invalid value; BlobDB numbers the files starting from 1.
// The file could be the compaction output from other SST files, which could
// in turn be outputs for compact older SST files. We track the memtable
- // flush timestamp for the oldest SST file that eventaully contribute data
+ // flush timestamp for the oldest SST file that eventually contribute data
// to this file. 0 means the information is not available.
uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
// File checksum function name
std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
+ // SST unique id
+ UniqueId64x2 unique_id{};
+
FileMetaData() = default;
FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,
const InternalKey& smallest_key, const InternalKey& largest_key,
const SequenceNumber& smallest_seq,
const SequenceNumber& largest_seq, bool marked_for_compact,
- uint64_t oldest_blob_file, uint64_t _oldest_ancester_time,
- uint64_t _file_creation_time, const std::string& _file_checksum,
- const std::string& _file_checksum_func_name)
+ Temperature _temperature, uint64_t oldest_blob_file,
+ uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
+ const std::string& _file_checksum,
+ const std::string& _file_checksum_func_name,
+ UniqueId64x2 _unique_id)
: fd(file, file_path_id, file_size, smallest_seq, largest_seq),
smallest(smallest_key),
largest(largest_key),
marked_for_compaction(marked_for_compact),
+ temperature(_temperature),
oldest_blob_file_number(oldest_blob_file),
oldest_ancester_time(_oldest_ancester_time),
file_creation_time(_file_creation_time),
file_checksum(_file_checksum),
- file_checksum_func_name(_file_checksum_func_name) {
+ file_checksum_func_name(_file_checksum_func_name),
+ unique_id(std::move(_unique_id)) {
TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this);
}
// REQUIRED: Keys must be given to the function in sorted order (it expects
// the last key to be the largest).
- void UpdateBoundaries(const Slice& key, const Slice& value,
- SequenceNumber seqno, ValueType value_type);
+ Status UpdateBoundaries(const Slice& key, const Slice& value,
+ SequenceNumber seqno, ValueType value_type);
// Unlike UpdateBoundaries, ranges do not need to be presented in any
// particular order.
}
return kUnknownFileCreationTime;
}
+
+ // WARNING: manual update to this function is needed
+ // whenever a new string property is added to FileMetaData
+ // to reduce approximation error.
+ //
+ // TODO: eliminate the need of manually updating this function
+ // for new string properties
+ size_t ApproximateMemoryUsage() const {
+ size_t usage = 0;
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+ usage += malloc_usable_size(const_cast<FileMetaData*>(this));
+#else
+ usage += sizeof(*this);
+#endif // ROCKSDB_MALLOC_USABLE_SIZE
+ usage += smallest.size() + largest.size() + file_checksum.size() +
+ file_checksum_func_name.size();
+ return usage;
+ }
};
// A compressed copy of file meta data that just contain minimum data needed
-// to server read operations, while still keeping the pointer to full metadata
+// to serve read operations, while still keeping the pointer to full metadata
// of the file in case it is needed.
struct FdWithKeyRange {
FileDescriptor fd;
FileMetaData* file_metadata; // Point to all metadata
- Slice smallest_key; // slice that contain smallest key
- Slice largest_key; // slice that contain largest key
+ Slice smallest_key; // slice that contain smallest key
+ Slice largest_key; // slice that contain largest key
FdWithKeyRange()
- : fd(),
- file_metadata(nullptr),
- smallest_key(),
- largest_key() {
- }
+ : fd(), file_metadata(nullptr), smallest_key(), largest_key() {}
FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key,
FileMetaData* _file_metadata)
const DeletedFiles& GetDeletedFiles() const { return deleted_files_; }
// Add the specified table file at the specified level.
- // REQUIRES: This version has not been saved (see VersionSet::SaveTo)
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file
// REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file
// referred to by this file if any, kInvalidBlobFileNumber otherwise.
uint64_t file_size, const InternalKey& smallest,
const InternalKey& largest, const SequenceNumber& smallest_seqno,
const SequenceNumber& largest_seqno, bool marked_for_compaction,
- uint64_t oldest_blob_file_number, uint64_t oldest_ancester_time,
- uint64_t file_creation_time, const std::string& file_checksum,
- const std::string& file_checksum_func_name) {
+ Temperature temperature, uint64_t oldest_blob_file_number,
+ uint64_t oldest_ancester_time, uint64_t file_creation_time,
+ const std::string& file_checksum,
+ const std::string& file_checksum_func_name,
+ const UniqueId64x2& unique_id) {
assert(smallest_seqno <= largest_seqno);
new_files_.emplace_back(
- level, FileMetaData(file, file_path_id, file_size, smallest, largest,
- smallest_seqno, largest_seqno,
- marked_for_compaction, oldest_blob_file_number,
- oldest_ancester_time, file_creation_time,
- file_checksum, file_checksum_func_name));
+ level,
+ FileMetaData(file, file_path_id, file_size, smallest, largest,
+ smallest_seqno, largest_seqno, marked_for_compaction,
+ temperature, oldest_blob_file_number, oldest_ancester_time,
+ file_creation_time, file_checksum, file_checksum_func_name,
+ unique_id));
+ if (!HasLastSequence() || largest_seqno > GetLastSequence()) {
+ SetLastSequence(largest_seqno);
+ }
}
void AddFile(int level, const FileMetaData& f) {
assert(f.fd.smallest_seqno <= f.fd.largest_seqno);
new_files_.emplace_back(level, f);
+ if (!HasLastSequence() || f.fd.largest_seqno > GetLastSequence()) {
+ SetLastSequence(f.fd.largest_seqno);
+ }
}
// Retrieve the table files added as well as their associated levels.
using NewFiles = std::vector<std::pair<int, FileMetaData>>;
const NewFiles& GetNewFiles() const { return new_files_; }
+ // Retrieve all the compact cursors
+ using CompactCursors = std::vector<std::pair<int, InternalKey>>;
+ const CompactCursors& GetCompactCursors() const { return compact_cursors_; }
+ void AddCompactCursor(int level, const InternalKey& cursor) {
+ compact_cursors_.push_back(std::make_pair(level, cursor));
+ }
+ void SetCompactCursors(
+ const std::vector<InternalKey>& compact_cursors_by_level) {
+ compact_cursors_.clear();
+ compact_cursors_.reserve(compact_cursors_by_level.size());
+ for (int i = 0; i < (int)compact_cursors_by_level.size(); i++) {
+ if (compact_cursors_by_level[i].Valid()) {
+ compact_cursors_.push_back(
+ std::make_pair(i, compact_cursors_by_level[i]));
+ }
+ }
+ }
+
// Add a new blob file.
void AddBlobFile(uint64_t blob_file_number, uint64_t total_blob_count,
uint64_t total_blob_bytes, std::string checksum_method,
}
void SetBlobFileAdditions(BlobFileAdditions blob_file_additions) {
+ assert(blob_file_additions_.empty());
blob_file_additions_ = std::move(blob_file_additions);
}
}
void SetBlobFileGarbages(BlobFileGarbages blob_file_garbages) {
+ assert(blob_file_garbages_.empty());
blob_file_garbages_ = std::move(blob_file_garbages);
}
bool IsInAtomicGroup() const { return is_in_atomic_group_; }
uint32_t GetRemainingEntries() const { return remaining_entries_; }
+ bool HasFullHistoryTsLow() const { return !full_history_ts_low_.empty(); }
+ const std::string& GetFullHistoryTsLow() const {
+ assert(HasFullHistoryTsLow());
+ return full_history_ts_low_;
+ }
+ void SetFullHistoryTsLow(std::string full_history_ts_low) {
+ assert(!full_history_ts_low.empty());
+ full_history_ts_low_ = std::move(full_history_ts_low);
+ }
+
// return true on success.
bool EncodeTo(std::string* dst) const;
Status DecodeFrom(const Slice& src);
bool has_min_log_number_to_keep_ = false;
bool has_last_sequence_ = false;
+ // Compaction cursors for round-robin compaction policy
+ CompactCursors compact_cursors_;
+
DeletedFiles deleted_files_;
NewFiles new_files_;
bool is_in_atomic_group_ = false;
uint32_t remaining_entries_ = 0;
+
+ std::string full_history_ts_low_;
};
} // namespace ROCKSDB_NAMESPACE