#pragma once
#include <stdio.h>
+
#include <memory>
#include <string>
#include <utility>
-#include "db/lookup_key.h"
-#include "db/merge_context.h"
-#include "logging/logging.h"
-#include "monitoring/perf_context_imp.h"
+
#include "rocksdb/comparator.h"
-#include "rocksdb/db.h"
-#include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
-#include "rocksdb/table.h"
#include "rocksdb/types.h"
#include "util/coding.h"
#include "util/user_comparator_wrapper.h"
// another.
kTypeBeginUnprepareXID = 0x13, // WAL only.
kTypeDeletionWithTimestamp = 0x14,
+ kTypeCommitXIDAndTimestamp = 0x15, // WAL only
+ kTypeWideColumnEntity = 0x16,
+ kTypeColumnFamilyWideColumnEntity = 0x17, // WAL only
+ kTypeMaxValid, // Should be after the last valid type, only used for
+ // validation
kMaxValue = 0x7F // Not used for storing records.
};
// Checks whether a type is an inline value type
// (i.e. a type used in memtable skiplist and sst file datablock).
inline bool IsValueType(ValueType t) {
- return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex ||
- kTypeDeletionWithTimestamp == t;
+ return t <= kTypeMerge || kTypeSingleDeletion == t || kTypeBlobIndex == t ||
+ kTypeDeletionWithTimestamp == t || kTypeWideColumnEntity == t;
}
// Checks whether a type is from user operation
// can be packed together into 64-bits.
static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
-static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
+static const SequenceNumber kDisableGlobalSequenceNumber =
+ std::numeric_limits<uint64_t>::max();
constexpr uint64_t kNumInternalBytes = 8;
+// Defined in dbformat.cc
+extern const std::string kDisableUserTimestamp;
+
// The data structure that represents an internal key in the way that user_key,
// sequence number and type are stored in separated forms.
struct ParsedInternalKey {
void SetTimestamp(const Slice& ts) {
assert(ts.size() <= user_key.size());
- const char* addr = user_key.data() - ts.size();
+ const char* addr = user_key.data() + user_key.size() - ts.size();
memcpy(const_cast<char*>(addr), ts.data(), ts.size());
}
+
+ Slice GetTimestamp(size_t ts_sz) {
+ assert(ts_sz <= user_key.size());
+ const char* addr = user_key.data() + user_key.size() - ts_sz;
+ return Slice(const_cast<char*>(addr), ts_sz);
+ }
};
// Return the length of the encoding of "key".
// Pack a sequence number and a ValueType into a uint64_t
inline uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
- assert(IsExtendedValueType(t));
+ // kTypeMaxValid is used in TruncatedRangeDelIterator, see its constructor.
+ assert(IsExtendedValueType(t) || t == kTypeMaxValid);
return (seq << 8) | t;
}
*seq = packed >> 8;
*t = static_cast<ValueType>(packed & 0xff);
- assert(*seq <= kMaxSequenceNumber);
- assert(IsExtendedValueType(*t));
+ // Commented the following two assertions in order to test key-value checksum
+ // on corrupted keys without crashing ("DbKvChecksumTest").
+ // assert(*seq <= kMaxSequenceNumber);
+ // assert(IsExtendedValueType(*t));
}
EntryType GetEntryType(ValueType value_type);
extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
ValueType t);
+// Append the key and a minimal timestamp to *result
+extern void AppendKeyWithMinTimestamp(std::string* result, const Slice& key,
+ size_t ts_sz);
+
+// Append the key and a maximal timestamp to *result
+extern void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
+ size_t ts_sz);
+
+// `key` is a user key with timestamp. Append the user key without timestamp
+// and the maximal timestamp to *result.
+extern void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
+ size_t ts_sz);
+
// Attempt to parse an internal key from "internal_key". On success,
// stores the parsed data in "*result", and returns true.
//
inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
size_t ts_sz) {
- assert(internal_key.size() >= kNumInternalBytes + ts_sz);
- return Slice(internal_key.data(),
- internal_key.size() - kNumInternalBytes - ts_sz);
+ Slice ret = internal_key;
+ ret.remove_suffix(kNumInternalBytes + ts_sz);
+ return ret;
}
inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
- assert(user_key.size() >= ts_sz);
- return Slice(user_key.data(), user_key.size() - ts_sz);
+ Slice ret = user_key;
+ ret.remove_suffix(ts_sz);
+ return ret;
}
inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz);
}
+inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) {
+ const size_t key_size = internal_key.size();
+ assert(key_size >= kNumInternalBytes + ts_sz);
+ return Slice(internal_key.data() + key_size - ts_sz - kNumInternalBytes,
+ ts_sz);
+}
+
inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
assert(internal_key.size() >= kNumInternalBytes);
const size_t n = internal_key.size();
#ifdef NDEBUG
final
#endif
- : public Comparator {
+ : public CompareInterface {
private:
UserComparatorWrapper user_comparator_;
- std::string name_;
public:
// `InternalKeyComparator`s constructed with the default constructor are not
// this constructor to precompute the result of `Name()`. To avoid this
// overhead, set `named` to false. In that case, `Name()` will return a
// generic name that is non-specific to the underlying comparator.
- explicit InternalKeyComparator(const Comparator* c, bool named = true)
- : Comparator(c->timestamp_size()), user_comparator_(c) {
- if (named) {
- name_ = "rocksdb.InternalKeyComparator:" +
- std::string(user_comparator_.Name());
- }
- }
+ explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
virtual ~InternalKeyComparator() {}
- virtual const char* Name() const override;
- virtual int Compare(const Slice& a, const Slice& b) const override;
+ int Compare(const Slice& a, const Slice& b) const override;
+
+ bool Equal(const Slice& a, const Slice& b) const {
+ // TODO Use user_comparator_.Equal(). Perhaps compare seqno before
+ // comparing the user key too.
+ return Compare(a, b) == 0;
+ }
+
// Same as Compare except that it excludes the value type from comparison
- virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
- virtual void FindShortestSeparator(std::string* start,
- const Slice& limit) const override;
- virtual void FindShortSuccessor(std::string* key) const override;
+ int CompareKeySeq(const Slice& a, const Slice& b) const;
const Comparator* user_comparator() const {
return user_comparator_.user_comparator();
// value `kDisableGlobalSequenceNumber`.
int Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b,
SequenceNumber b_global_seqno) const;
- virtual const Comparator* GetRootComparator() const override {
- return user_comparator_.GetRootComparator();
- }
};
// The class represent the internal key in encoded form.
InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
}
+ InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t, Slice ts) {
+ AppendInternalKeyWithDifferentTimestamp(
+ &rep_, ParsedInternalKey(_user_key, s, t), ts);
+ }
// sets the internal key to be bigger or equal to all internal keys with this
// user key
}
Slice user_key() const { return ExtractUserKey(rep_); }
- size_t size() { return rep_.size(); }
+ size_t size() const { return rep_.size(); }
void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
SetFrom(ParsedInternalKey(_user_key, s, t));
}
+ void Set(const Slice& _user_key_with_ts, SequenceNumber s, ValueType t,
+ const Slice& ts) {
+ ParsedInternalKey pik = ParsedInternalKey(_user_key_with_ts, s, t);
+ // Should not call pik.SetTimestamp() directly as it overwrites the buffer
+ // containing _user_key.
+ SetFrom(pik, ts);
+ }
+
void SetFrom(const ParsedInternalKey& p) {
rep_.clear();
AppendInternalKey(&rep_, p);
}
+ void SetFrom(const ParsedInternalKey& p, const Slice& ts) {
+ rep_.clear();
+ AppendInternalKeyWithDifferentTimestamp(&rep_, p, ts);
+ }
+
void Clear() { rep_.clear(); }
// The underlying representation.
void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
// Returns the key in whichever format that was provided to KeyIter
+ // If user-defined timestamp is enabled, then timestamp is included in the
+ // return result.
Slice GetKey() const { return Slice(key_, key_size_); }
Slice GetInternalKey() const {
return Slice(key_, key_size_);
}
+ // If user-defined timestamp is enabled, then timestamp is included in the
+ // return result of GetUserKey();
Slice GetUserKey() const {
if (IsUserKey()) {
return Slice(key_, key_size_);
return SetKeyImpl(key, copy);
}
+ // If user-defined timestamp is enabled, then `key` includes timestamp.
+ // TODO(yanqin) this is also used to set prefix, which do not include
+ // timestamp. Should be handled.
Slice SetUserKey(const Slice& key, bool copy = true) {
is_user_key_ = true;
return SetKeyImpl(key, copy);
bool IsKeyPinned() const { return (key_ != buf_); }
+ // If `ts` is provided, user_key should not contain timestamp,
+ // and `ts` is appended after user_key.
+ // TODO: more efficient storage for timestamp.
void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
SequenceNumber s,
ValueType value_type = kValueTypeForSeek,
void EnlargeBuffer(size_t key_size);
};
-// Convert from a SliceTranform of user keys, to a SliceTransform of
-// user keys.
+// Convert from a SliceTransform of user keys, to a SliceTransform of
+// internal keys.
class InternalKeySliceTransform : public SliceTransform {
public:
explicit InternalKeySliceTransform(const SliceTransform* transform)
// Read record from a write batch piece from input.
// tag, column_family, key, value and blob are return values. Callers own the
-// Slice they point to.
+// slice they point to.
// Tag is defined as ValueType.
// input will be advanced to after the record.
+// If user-defined timestamp is enabled for a column family, then the `key`
+// resulting from this call will include timestamp.
extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
uint32_t* column_family, Slice* key,
Slice* value, Slice* blob, Slice* xid);
// When user call DeleteRange() to delete a range of keys,
// we will store a serialized RangeTombstone in MemTable and SST.
-// the struct here is a easy-understood form
+// the struct here is an easy-understood form
// start/end_key_ is the start/end user key of the range to be deleted
struct RangeTombstone {
Slice start_key_;
Slice end_key_;
SequenceNumber seq_;
+ // TODO: we should optimize the storage here when user-defined timestamp
+ // is NOT enabled: they currently take up (16 + 32 + 32) bytes per tombstone.
+ Slice ts_;
+ std::string pinned_start_key_;
+ std::string pinned_end_key_;
+
RangeTombstone() = default;
RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
: start_key_(sk), end_key_(ek), seq_(sn) {}
+ // User-defined timestamp is enabled, `sk` and `ek` should be user key
+ // with timestamp, `ts` will replace the timestamps in `sk` and
+ // `ek`.
+ RangeTombstone(Slice sk, Slice ek, SequenceNumber sn, Slice ts)
+ : seq_(sn), ts_(ts) {
+ assert(!ts.empty());
+ pinned_start_key_.reserve(sk.size());
+ pinned_start_key_.append(sk.data(), sk.size() - ts.size());
+ pinned_start_key_.append(ts.data(), ts.size());
+ pinned_end_key_.reserve(ek.size());
+ pinned_end_key_.append(ek.data(), ek.size() - ts.size());
+ pinned_end_key_.append(ts.data(), ts.size());
+ start_key_ = pinned_start_key_;
+ end_key_ = pinned_end_key_;
+ }
+
RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
start_key_ = parsed_key.user_key;
seq_ = parsed_key.sequence;
// be careful to use Serialize(), allocates new memory
std::pair<InternalKey, Slice> Serialize() const {
auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
- Slice value = end_key_;
- return std::make_pair(std::move(key), std::move(value));
+ return std::make_pair(std::move(key), end_key_);
}
// be careful to use SerializeKey(), allocates new memory
//
// be careful to use SerializeEndKey(), allocates new memory
InternalKey SerializeEndKey() const {
+ if (!ts_.empty()) {
+ static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
+ if (ts_.size() <= strlen(kTsMax)) {
+ return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion,
+ Slice(kTsMax, ts_.size()));
+ } else {
+ return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion,
+ std::string(ts_.size(), '\xff'));
+ }
+ }
return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
}
};