]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/dbformat.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / db / dbformat.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#pragma once
11#include <stdio.h>
12#include <string>
13#include <utility>
11fdf7f2 14#include "monitoring/perf_context_imp.h"
7c673cae
FG
15#include "rocksdb/comparator.h"
16#include "rocksdb/db.h"
17#include "rocksdb/filter_policy.h"
18#include "rocksdb/slice.h"
19#include "rocksdb/slice_transform.h"
20#include "rocksdb/table.h"
21#include "rocksdb/types.h"
22#include "util/coding.h"
23#include "util/logging.h"
494da23a 24#include "util/user_comparator_wrapper.h"
7c673cae
FG
25
26namespace rocksdb {
27
28class InternalKey;
29
30// Value types encoded as the last component of internal keys.
31// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
32// data structures.
33// The highest bit of the value type needs to be reserved to SST tables
34// for them to do more flexible encoding.
35enum ValueType : unsigned char {
36 kTypeDeletion = 0x0,
37 kTypeValue = 0x1,
38 kTypeMerge = 0x2,
39 kTypeLogData = 0x3, // WAL only.
40 kTypeColumnFamilyDeletion = 0x4, // WAL only.
41 kTypeColumnFamilyValue = 0x5, // WAL only.
42 kTypeColumnFamilyMerge = 0x6, // WAL only.
43 kTypeSingleDeletion = 0x7,
44 kTypeColumnFamilySingleDeletion = 0x8, // WAL only.
45 kTypeBeginPrepareXID = 0x9, // WAL only.
46 kTypeEndPrepareXID = 0xA, // WAL only.
47 kTypeCommitXID = 0xB, // WAL only.
48 kTypeRollbackXID = 0xC, // WAL only.
49 kTypeNoop = 0xD, // WAL only.
50 kTypeColumnFamilyRangeDeletion = 0xE, // WAL only.
51 kTypeRangeDeletion = 0xF, // meta block
11fdf7f2
TL
52 kTypeColumnFamilyBlobIndex = 0x10, // Blob DB only
53 kTypeBlobIndex = 0x11, // Blob DB only
54 // When the prepared record is also persisted in db, we use a different
55 // record. This is to ensure that the WAL that is generated by a WritePolicy
56 // is not mistakenly read by another, which would result into data
57 // inconsistency.
58 kTypeBeginPersistedPrepareXID = 0x12, // WAL only.
59 // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL
60 // generated by WriteUnprepared write policy is not mistakenly read by
61 // another.
62 kTypeBeginUnprepareXID = 0x13, // WAL only.
63 kMaxValue = 0x7F // Not used for storing records.
7c673cae
FG
64};
65
66// Defined in dbformat.cc
67extern const ValueType kValueTypeForSeek;
68extern const ValueType kValueTypeForSeekForPrev;
69
70// Checks whether a type is an inline value type
71// (i.e. a type used in memtable skiplist and sst file datablock).
72inline bool IsValueType(ValueType t) {
11fdf7f2 73 return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex;
7c673cae
FG
74}
75
76// Checks whether a type is from user operation
77// kTypeRangeDeletion is in meta block so this API is separated from above
78inline bool IsExtendedValueType(ValueType t) {
79 return IsValueType(t) || t == kTypeRangeDeletion;
80}
81
82// We leave eight bits empty at the bottom so a type and sequence#
83// can be packed together into 64-bits.
494da23a 84static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
7c673cae
FG
85
86static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
87
88struct ParsedInternalKey {
89 Slice user_key;
90 SequenceNumber sequence;
91 ValueType type;
92
11fdf7f2
TL
93 ParsedInternalKey()
94 : sequence(kMaxSequenceNumber) // Make code analyzer happy
494da23a 95 {} // Intentionally left uninitialized (for speed)
7c673cae 96 ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
494da23a 97 : user_key(u), sequence(seq), type(t) {}
7c673cae 98 std::string DebugString(bool hex = false) const;
11fdf7f2
TL
99
100 void clear() {
101 user_key.clear();
102 sequence = 0;
103 type = kTypeDeletion;
104 }
7c673cae
FG
105};
106
107// Return the length of the encoding of "key".
108inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
109 return key.user_key.size() + 8;
110}
111
112// Pack a sequence number and a ValueType into a uint64_t
113extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t);
114
115// Given the result of PackSequenceAndType, store the sequence number in *seq
116// and the ValueType in *t.
117extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t);
118
11fdf7f2
TL
119EntryType GetEntryType(ValueType value_type);
120
7c673cae
FG
121// Append the serialization of "key" to *result.
122extern void AppendInternalKey(std::string* result,
123 const ParsedInternalKey& key);
124// Serialized internal key consists of user key followed by footer.
125// This function appends the footer to *result, assuming that *result already
126// contains the user key at the end.
127extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
128 ValueType t);
129
130// Attempt to parse an internal key from "internal_key". On success,
131// stores the parsed data in "*result", and returns true.
132//
133// On error, returns false, leaves "*result" in an undefined state.
134extern bool ParseInternalKey(const Slice& internal_key,
135 ParsedInternalKey* result);
136
137// Returns the user key portion of an internal key.
138inline Slice ExtractUserKey(const Slice& internal_key) {
139 assert(internal_key.size() >= 8);
140 return Slice(internal_key.data(), internal_key.size() - 8);
141}
142
11fdf7f2 143inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
7c673cae
FG
144 assert(internal_key.size() >= 8);
145 const size_t n = internal_key.size();
11fdf7f2
TL
146 return DecodeFixed64(internal_key.data() + n - 8);
147}
148
149inline ValueType ExtractValueType(const Slice& internal_key) {
150 uint64_t num = ExtractInternalKeyFooter(internal_key);
7c673cae
FG
151 unsigned char c = num & 0xff;
152 return static_cast<ValueType>(c);
153}
154
155// A comparator for internal keys that uses a specified comparator for
156// the user key portion and breaks ties by decreasing sequence number.
11fdf7f2
TL
157class InternalKeyComparator
158#ifdef NDEBUG
159 final
160#endif
161 : public Comparator {
7c673cae 162 private:
494da23a 163 UserComparatorWrapper user_comparator_;
7c673cae 164 std::string name_;
494da23a 165
7c673cae 166 public:
494da23a
TL
167 explicit InternalKeyComparator(const Comparator* c)
168 : user_comparator_(c),
169 name_("rocksdb.InternalKeyComparator:" +
170 std::string(user_comparator_.Name())) {}
7c673cae
FG
171 virtual ~InternalKeyComparator() {}
172
173 virtual const char* Name() const override;
174 virtual int Compare(const Slice& a, const Slice& b) const override;
11fdf7f2
TL
175 // Same as Compare except that it excludes the value type from comparison
176 virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
7c673cae
FG
177 virtual void FindShortestSeparator(std::string* start,
178 const Slice& limit) const override;
179 virtual void FindShortSuccessor(std::string* key) const override;
180
494da23a
TL
181 const Comparator* user_comparator() const {
182 return user_comparator_.user_comparator();
183 }
7c673cae
FG
184
185 int Compare(const InternalKey& a, const InternalKey& b) const;
186 int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
11fdf7f2 187 virtual const Comparator* GetRootComparator() const override {
494da23a 188 return user_comparator_.GetRootComparator();
11fdf7f2 189 }
7c673cae
FG
190};
191
192// Modules in this directory should keep internal keys wrapped inside
193// the following class instead of plain strings so that we do not
194// incorrectly use string comparisons instead of an InternalKeyComparator.
195class InternalKey {
196 private:
197 std::string rep_;
494da23a 198
7c673cae 199 public:
494da23a 200 InternalKey() {} // Leave rep_ as empty to indicate it is invalid
7c673cae
FG
201 InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
202 AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
203 }
204
205 // sets the internal key to be bigger or equal to all internal keys with this
206 // user key
207 void SetMaxPossibleForUserKey(const Slice& _user_key) {
11fdf7f2
TL
208 AppendInternalKey(
209 &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0)));
7c673cae
FG
210 }
211
212 // sets the internal key to be smaller or equal to all internal keys with this
213 // user key
214 void SetMinPossibleForUserKey(const Slice& _user_key) {
11fdf7f2
TL
215 AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber,
216 kValueTypeForSeek));
7c673cae
FG
217 }
218
219 bool Valid() const {
220 ParsedInternalKey parsed;
221 return ParseInternalKey(Slice(rep_), &parsed);
222 }
223
224 void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
225 Slice Encode() const {
226 assert(!rep_.empty());
227 return rep_;
228 }
229
230 Slice user_key() const { return ExtractUserKey(rep_); }
231 size_t size() { return rep_.size(); }
232
233 void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
234 SetFrom(ParsedInternalKey(_user_key, s, t));
235 }
236
237 void SetFrom(const ParsedInternalKey& p) {
238 rep_.clear();
239 AppendInternalKey(&rep_, p);
240 }
241
242 void Clear() { rep_.clear(); }
243
244 // The underlying representation.
245 // Intended only to be used together with ConvertFromUserKey().
246 std::string* rep() { return &rep_; }
247
248 // Assuming that *rep() contains a user key, this method makes internal key
249 // out of it in-place. This saves a memcpy compared to Set()/SetFrom().
250 void ConvertFromUserKey(SequenceNumber s, ValueType t) {
251 AppendInternalKeyFooter(&rep_, s, t);
252 }
253
254 std::string DebugString(bool hex = false) const;
255};
256
494da23a
TL
257inline int InternalKeyComparator::Compare(const InternalKey& a,
258 const InternalKey& b) const {
7c673cae
FG
259 return Compare(a.Encode(), b.Encode());
260}
261
262inline bool ParseInternalKey(const Slice& internal_key,
263 ParsedInternalKey* result) {
264 const size_t n = internal_key.size();
265 if (n < 8) return false;
266 uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
267 unsigned char c = num & 0xff;
268 result->sequence = num >> 8;
269 result->type = static_cast<ValueType>(c);
270 assert(result->type <= ValueType::kMaxValue);
271 result->user_key = Slice(internal_key.data(), n - 8);
272 return IsExtendedValueType(result->type);
273}
274
275// Update the sequence number in the internal key.
276// Guarantees not to invalidate ikey.data().
277inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) {
278 size_t ikey_sz = ikey->size();
279 assert(ikey_sz >= 8);
280 uint64_t newval = (seq << 8) | t;
281
282 // Note: Since C++11, strings are guaranteed to be stored contiguously and
283 // string::operator[]() is guaranteed not to change ikey.data().
284 EncodeFixed64(&(*ikey)[ikey_sz - 8], newval);
285}
286
287// Get the sequence number from the internal key
288inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
289 const size_t n = internal_key.size();
290 assert(n >= 8);
291 uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
292 return num >> 8;
293}
294
7c673cae
FG
295// A helper class useful for DBImpl::Get()
296class LookupKey {
297 public:
298 // Initialize *this for looking up user_key at a snapshot with
299 // the specified sequence number.
300 LookupKey(const Slice& _user_key, SequenceNumber sequence);
301
302 ~LookupKey();
303
304 // Return a key suitable for lookup in a MemTable.
305 Slice memtable_key() const {
306 return Slice(start_, static_cast<size_t>(end_ - start_));
307 }
308
309 // Return an internal key (suitable for passing to an internal iterator)
310 Slice internal_key() const {
311 return Slice(kstart_, static_cast<size_t>(end_ - kstart_));
312 }
313
314 // Return the user key
315 Slice user_key() const {
316 return Slice(kstart_, static_cast<size_t>(end_ - kstart_ - 8));
317 }
318
319 private:
320 // We construct a char array of the form:
321 // klength varint32 <-- start_
322 // userkey char[klength] <-- kstart_
323 // tag uint64
324 // <-- end_
325 // The array is a suitable MemTable key.
326 // The suffix starting with "userkey" can be used as an InternalKey.
327 const char* start_;
328 const char* kstart_;
329 const char* end_;
494da23a 330 char space_[200]; // Avoid allocation for short keys
7c673cae
FG
331
332 // No copying allowed
333 LookupKey(const LookupKey&);
334 void operator=(const LookupKey&);
335};
336
337inline LookupKey::~LookupKey() {
338 if (start_ != space_) delete[] start_;
339}
340
341class IterKey {
342 public:
343 IterKey()
344 : buf_(space_),
7c673cae
FG
345 key_(buf_),
346 key_size_(0),
494da23a 347 buf_size_(sizeof(space_)),
7c673cae
FG
348 is_user_key_(true) {}
349
350 ~IterKey() { ResetBuffer(); }
351
11fdf7f2
TL
352 // The bool will be picked up by the next calls to SetKey
353 void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
354
355 // Returns the key in whichever format that was provided to KeyIter
356 Slice GetKey() const { return Slice(key_, key_size_); }
357
7c673cae
FG
358 Slice GetInternalKey() const {
359 assert(!IsUserKey());
360 return Slice(key_, key_size_);
361 }
362
363 Slice GetUserKey() const {
364 if (IsUserKey()) {
365 return Slice(key_, key_size_);
366 } else {
367 assert(key_size_ >= 8);
368 return Slice(key_, key_size_ - 8);
369 }
370 }
371
372 size_t Size() const { return key_size_; }
373
374 void Clear() { key_size_ = 0; }
375
376 // Append "non_shared_data" to its back, from "shared_len"
377 // This function is used in Block::Iter::ParseNextKey
378 // shared_len: bytes in [0, shard_len-1] would be remained
379 // non_shared_data: data to be append, its length must be >= non_shared_len
380 void TrimAppend(const size_t shared_len, const char* non_shared_data,
381 const size_t non_shared_len) {
382 assert(shared_len <= key_size_);
383 size_t total_size = shared_len + non_shared_len;
384
385 if (IsKeyPinned() /* key is not in buf_ */) {
386 // Copy the key from external memory to buf_ (copy shared_len bytes)
387 EnlargeBufferIfNeeded(total_size);
388 memcpy(buf_, key_, shared_len);
389 } else if (total_size > buf_size_) {
390 // Need to allocate space, delete previous space
391 char* p = new char[total_size];
392 memcpy(p, key_, shared_len);
393
394 if (buf_ != space_) {
395 delete[] buf_;
396 }
397
398 buf_ = p;
399 buf_size_ = total_size;
400 }
401
402 memcpy(buf_ + shared_len, non_shared_data, non_shared_len);
403 key_ = buf_;
404 key_size_ = total_size;
405 }
406
11fdf7f2
TL
407 Slice SetKey(const Slice& key, bool copy = true) {
408 // is_user_key_ expected to be set already via SetIsUserKey
409 return SetKeyImpl(key, copy);
410 }
411
7c673cae
FG
412 Slice SetUserKey(const Slice& key, bool copy = true) {
413 is_user_key_ = true;
414 return SetKeyImpl(key, copy);
415 }
416
417 Slice SetInternalKey(const Slice& key, bool copy = true) {
418 is_user_key_ = false;
419 return SetKeyImpl(key, copy);
420 }
421
422 // Copies the content of key, updates the reference to the user key in ikey
423 // and returns a Slice referencing the new copy.
424 Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) {
425 size_t key_n = key.size();
426 assert(key_n >= 8);
427 SetInternalKey(key);
428 ikey->user_key = Slice(key_, key_n - 8);
429 return Slice(key_, key_n);
430 }
431
432 // Copy the key into IterKey own buf_
433 void OwnKey() {
434 assert(IsKeyPinned() == true);
435
436 Reserve(key_size_);
437 memcpy(buf_, key_, key_size_);
438 key_ = buf_;
439 }
440
441 // Update the sequence number in the internal key. Guarantees not to
442 // invalidate slices to the key (and the user key).
443 void UpdateInternalKey(uint64_t seq, ValueType t) {
444 assert(!IsKeyPinned());
445 assert(key_size_ >= 8);
446 uint64_t newval = (seq << 8) | t;
447 EncodeFixed64(&buf_[key_size_ - 8], newval);
448 }
449
450 bool IsKeyPinned() const { return (key_ != buf_); }
451
452 void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
453 SequenceNumber s,
454 ValueType value_type = kValueTypeForSeek) {
455 size_t psize = key_prefix.size();
456 size_t usize = user_key.size();
457 EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t));
458 if (psize > 0) {
459 memcpy(buf_, key_prefix.data(), psize);
460 }
461 memcpy(buf_ + psize, user_key.data(), usize);
462 EncodeFixed64(buf_ + usize + psize, PackSequenceAndType(s, value_type));
463
464 key_ = buf_;
465 key_size_ = psize + usize + sizeof(uint64_t);
466 is_user_key_ = false;
467 }
468
469 void SetInternalKey(const Slice& user_key, SequenceNumber s,
470 ValueType value_type = kValueTypeForSeek) {
471 SetInternalKey(Slice(), user_key, s, value_type);
472 }
473
474 void Reserve(size_t size) {
475 EnlargeBufferIfNeeded(size);
476 key_size_ = size;
477 }
478
479 void SetInternalKey(const ParsedInternalKey& parsed_key) {
480 SetInternalKey(Slice(), parsed_key);
481 }
482
483 void SetInternalKey(const Slice& key_prefix,
484 const ParsedInternalKey& parsed_key_suffix) {
485 SetInternalKey(key_prefix, parsed_key_suffix.user_key,
486 parsed_key_suffix.sequence, parsed_key_suffix.type);
487 }
488
489 void EncodeLengthPrefixedKey(const Slice& key) {
490 auto size = key.size();
491 EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size)));
492 char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size));
493 memcpy(ptr, key.data(), size);
494 key_ = buf_;
495 is_user_key_ = true;
496 }
497
498 bool IsUserKey() const { return is_user_key_; }
499
500 private:
501 char* buf_;
7c673cae
FG
502 const char* key_;
503 size_t key_size_;
494da23a 504 size_t buf_size_;
7c673cae
FG
505 char space_[32]; // Avoid allocation for short keys
506 bool is_user_key_;
507
508 Slice SetKeyImpl(const Slice& key, bool copy) {
509 size_t size = key.size();
510 if (copy) {
511 // Copy key to buf_
512 EnlargeBufferIfNeeded(size);
513 memcpy(buf_, key.data(), size);
514 key_ = buf_;
515 } else {
516 // Update key_ to point to external memory
517 key_ = key.data();
518 }
519 key_size_ = size;
520 return Slice(key_, key_size_);
521 }
522
523 void ResetBuffer() {
524 if (buf_ != space_) {
525 delete[] buf_;
526 buf_ = space_;
527 }
528 buf_size_ = sizeof(space_);
529 key_size_ = 0;
530 }
531
532 // Enlarge the buffer size if needed based on key_size.
533 // By default, static allocated buffer is used. Once there is a key
534 // larger than the static allocated buffer, another buffer is dynamically
535 // allocated, until a larger key buffer is requested. In that case, we
536 // reallocate buffer and delete the old one.
537 void EnlargeBufferIfNeeded(size_t key_size) {
538 // If size is smaller than buffer size, continue using current buffer,
539 // or the static allocated one, as default
540 if (key_size > buf_size_) {
11fdf7f2 541 EnlargeBuffer(key_size);
7c673cae
FG
542 }
543 }
544
11fdf7f2
TL
545 void EnlargeBuffer(size_t key_size);
546
7c673cae
FG
547 // No copying allowed
548 IterKey(const IterKey&) = delete;
549 void operator=(const IterKey&) = delete;
550};
551
552class InternalKeySliceTransform : public SliceTransform {
553 public:
554 explicit InternalKeySliceTransform(const SliceTransform* transform)
555 : transform_(transform) {}
556
557 virtual const char* Name() const override { return transform_->Name(); }
558
559 virtual Slice Transform(const Slice& src) const override {
560 auto user_key = ExtractUserKey(src);
561 return transform_->Transform(user_key);
562 }
563
564 virtual bool InDomain(const Slice& src) const override {
565 auto user_key = ExtractUserKey(src);
566 return transform_->InDomain(user_key);
567 }
568
569 virtual bool InRange(const Slice& dst) const override {
570 auto user_key = ExtractUserKey(dst);
571 return transform_->InRange(user_key);
572 }
573
574 const SliceTransform* user_prefix_extractor() const { return transform_; }
575
576 private:
577 // Like comparator, InternalKeySliceTransform will not take care of the
578 // deletion of transform_
579 const SliceTransform* const transform_;
580};
581
582// Read the key of a record from a write batch.
583// if this record represent the default column family then cf_record
584// must be passed as false, otherwise it must be passed as true.
585extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key,
586 bool cf_record);
587
588// Read record from a write batch piece from input.
589// tag, column_family, key, value and blob are return values. Callers own the
590// Slice they point to.
591// Tag is defined as ValueType.
592// input will be advanced to after the record.
593extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
594 uint32_t* column_family, Slice* key,
595 Slice* value, Slice* blob, Slice* xid);
596
597// When user call DeleteRange() to delete a range of keys,
598// we will store a serialized RangeTombstone in MemTable and SST.
599// the struct here is a easy-understood form
600// start/end_key_ is the start/end user key of the range to be deleted
601struct RangeTombstone {
602 Slice start_key_;
603 Slice end_key_;
604 SequenceNumber seq_;
605 RangeTombstone() = default;
606 RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
607 : start_key_(sk), end_key_(ek), seq_(sn) {}
608
609 RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
610 start_key_ = parsed_key.user_key;
611 seq_ = parsed_key.sequence;
612 end_key_ = value;
613 }
614
615 // be careful to use Serialize(), allocates new memory
616 std::pair<InternalKey, Slice> Serialize() const {
617 auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
618 Slice value = end_key_;
619 return std::make_pair(std::move(key), std::move(value));
620 }
621
622 // be careful to use SerializeKey(), allocates new memory
623 InternalKey SerializeKey() const {
624 return InternalKey(start_key_, seq_, kTypeRangeDeletion);
625 }
626
11fdf7f2
TL
627 // The tombstone end-key is exclusive, so we generate an internal-key here
628 // which has a similar property. Using kMaxSequenceNumber guarantees that
629 // the returned internal-key will compare less than any other internal-key
630 // with the same user-key. This in turn guarantees that the serialized
631 // end-key for a tombstone such as [a-b] will compare less than the key "b".
632 //
7c673cae
FG
633 // be careful to use SerializeEndKey(), allocates new memory
634 InternalKey SerializeEndKey() const {
11fdf7f2 635 return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
7c673cae
FG
636 }
637};
638
494da23a
TL
639inline int InternalKeyComparator::Compare(const Slice& akey,
640 const Slice& bkey) const {
11fdf7f2
TL
641 // Order by:
642 // increasing user key (according to user-supplied comparator)
643 // decreasing sequence number
644 // decreasing type (though sequence# should be enough to disambiguate)
494da23a 645 int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
11fdf7f2
TL
646 if (r == 0) {
647 const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
648 const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
649 if (anum > bnum) {
650 r = -1;
651 } else if (anum < bnum) {
652 r = +1;
653 }
654 }
655 return r;
656}
657
494da23a
TL
658inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
659 const Slice& bkey) const {
11fdf7f2
TL
660 // Order by:
661 // increasing user key (according to user-supplied comparator)
662 // decreasing sequence number
494da23a 663 int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
11fdf7f2
TL
664 if (r == 0) {
665 // Shift the number to exclude the last byte which contains the value type
666 const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8;
667 const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8;
668 if (anum > bnum) {
669 r = -1;
670 } else if (anum < bnum) {
671 r = +1;
672 }
673 }
674 return r;
675}
676
494da23a
TL
677struct ParsedInternalKeyComparator {
678 explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)
679 : cmp(c) {}
680
681 bool operator()(const ParsedInternalKey& a,
682 const ParsedInternalKey& b) const {
683 return cmp->Compare(a, b) < 0;
684 }
685
686 const InternalKeyComparator* cmp;
687};
11fdf7f2 688
7c673cae 689} // namespace rocksdb