]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/dbformat.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / db / dbformat.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #pragma once
11 #include <stdio.h>
12 #include <string>
13 #include <utility>
14 #include "monitoring/perf_context_imp.h"
15 #include "rocksdb/comparator.h"
16 #include "rocksdb/db.h"
17 #include "rocksdb/filter_policy.h"
18 #include "rocksdb/slice.h"
19 #include "rocksdb/slice_transform.h"
20 #include "rocksdb/table.h"
21 #include "rocksdb/types.h"
22 #include "util/coding.h"
23 #include "util/logging.h"
24
25 namespace rocksdb {
26
27 class InternalKey;
28
29 // Value types encoded as the last component of internal keys.
30 // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
31 // data structures.
32 // The highest bit of the value type needs to be reserved to SST tables
33 // for them to do more flexible encoding.
34 enum ValueType : unsigned char {
35 kTypeDeletion = 0x0,
36 kTypeValue = 0x1,
37 kTypeMerge = 0x2,
38 kTypeLogData = 0x3, // WAL only.
39 kTypeColumnFamilyDeletion = 0x4, // WAL only.
40 kTypeColumnFamilyValue = 0x5, // WAL only.
41 kTypeColumnFamilyMerge = 0x6, // WAL only.
42 kTypeSingleDeletion = 0x7,
43 kTypeColumnFamilySingleDeletion = 0x8, // WAL only.
44 kTypeBeginPrepareXID = 0x9, // WAL only.
45 kTypeEndPrepareXID = 0xA, // WAL only.
46 kTypeCommitXID = 0xB, // WAL only.
47 kTypeRollbackXID = 0xC, // WAL only.
48 kTypeNoop = 0xD, // WAL only.
49 kTypeColumnFamilyRangeDeletion = 0xE, // WAL only.
50 kTypeRangeDeletion = 0xF, // meta block
51 kTypeColumnFamilyBlobIndex = 0x10, // Blob DB only
52 kTypeBlobIndex = 0x11, // Blob DB only
53 // When the prepared record is also persisted in db, we use a different
54 // record. This is to ensure that the WAL that is generated by a WritePolicy
55 // is not mistakenly read by another, which would result into data
56 // inconsistency.
57 kTypeBeginPersistedPrepareXID = 0x12, // WAL only.
58 // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL
59 // generated by WriteUnprepared write policy is not mistakenly read by
60 // another.
61 kTypeBeginUnprepareXID = 0x13, // WAL only.
62 kMaxValue = 0x7F // Not used for storing records.
63 };
64
65 // Defined in dbformat.cc
66 extern const ValueType kValueTypeForSeek;
67 extern const ValueType kValueTypeForSeekForPrev;
68
69 // Checks whether a type is an inline value type
70 // (i.e. a type used in memtable skiplist and sst file datablock).
71 inline bool IsValueType(ValueType t) {
72 return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex;
73 }
74
75 // Checks whether a type is from user operation
76 // kTypeRangeDeletion is in meta block so this API is separated from above
77 inline bool IsExtendedValueType(ValueType t) {
78 return IsValueType(t) || t == kTypeRangeDeletion;
79 }
80
81 // We leave eight bits empty at the bottom so a type and sequence#
82 // can be packed together into 64-bits.
83 static const SequenceNumber kMaxSequenceNumber =
84 ((0x1ull << 56) - 1);
85
86 static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
87
88 struct ParsedInternalKey {
89 Slice user_key;
90 SequenceNumber sequence;
91 ValueType type;
92
93 ParsedInternalKey()
94 : sequence(kMaxSequenceNumber) // Make code analyzer happy
95 {} // Intentionally left uninitialized (for speed)
96 ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
97 : user_key(u), sequence(seq), type(t) { }
98 std::string DebugString(bool hex = false) const;
99
100 void clear() {
101 user_key.clear();
102 sequence = 0;
103 type = kTypeDeletion;
104 }
105 };
106
107 // Return the length of the encoding of "key".
108 inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
109 return key.user_key.size() + 8;
110 }
111
112 // Pack a sequence number and a ValueType into a uint64_t
113 extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t);
114
115 // Given the result of PackSequenceAndType, store the sequence number in *seq
116 // and the ValueType in *t.
117 extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t);
118
119 EntryType GetEntryType(ValueType value_type);
120
121 // Append the serialization of "key" to *result.
122 extern void AppendInternalKey(std::string* result,
123 const ParsedInternalKey& key);
124 // Serialized internal key consists of user key followed by footer.
125 // This function appends the footer to *result, assuming that *result already
126 // contains the user key at the end.
127 extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
128 ValueType t);
129
130 // Attempt to parse an internal key from "internal_key". On success,
131 // stores the parsed data in "*result", and returns true.
132 //
133 // On error, returns false, leaves "*result" in an undefined state.
134 extern bool ParseInternalKey(const Slice& internal_key,
135 ParsedInternalKey* result);
136
137 // Returns the user key portion of an internal key.
138 inline Slice ExtractUserKey(const Slice& internal_key) {
139 assert(internal_key.size() >= 8);
140 return Slice(internal_key.data(), internal_key.size() - 8);
141 }
142
143 inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
144 assert(internal_key.size() >= 8);
145 const size_t n = internal_key.size();
146 return DecodeFixed64(internal_key.data() + n - 8);
147 }
148
149 inline ValueType ExtractValueType(const Slice& internal_key) {
150 uint64_t num = ExtractInternalKeyFooter(internal_key);
151 unsigned char c = num & 0xff;
152 return static_cast<ValueType>(c);
153 }
154
155 // A comparator for internal keys that uses a specified comparator for
156 // the user key portion and breaks ties by decreasing sequence number.
157 class InternalKeyComparator
158 #ifdef NDEBUG
159 final
160 #endif
161 : public Comparator {
162 private:
163 const Comparator* user_comparator_;
164 std::string name_;
165 public:
166 explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c),
167 name_("rocksdb.InternalKeyComparator:" +
168 std::string(user_comparator_->Name())) {
169 }
170 virtual ~InternalKeyComparator() {}
171
172 virtual const char* Name() const override;
173 virtual int Compare(const Slice& a, const Slice& b) const override;
174 // Same as Compare except that it excludes the value type from comparison
175 virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
176 virtual void FindShortestSeparator(std::string* start,
177 const Slice& limit) const override;
178 virtual void FindShortSuccessor(std::string* key) const override;
179
180 const Comparator* user_comparator() const { return user_comparator_; }
181
182 int Compare(const InternalKey& a, const InternalKey& b) const;
183 int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
184 virtual const Comparator* GetRootComparator() const override {
185 return user_comparator_->GetRootComparator();
186 }
187 };
188
189 // Modules in this directory should keep internal keys wrapped inside
190 // the following class instead of plain strings so that we do not
191 // incorrectly use string comparisons instead of an InternalKeyComparator.
192 class InternalKey {
193 private:
194 std::string rep_;
195 public:
196 InternalKey() { } // Leave rep_ as empty to indicate it is invalid
197 InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
198 AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
199 }
200
201 // sets the internal key to be bigger or equal to all internal keys with this
202 // user key
203 void SetMaxPossibleForUserKey(const Slice& _user_key) {
204 AppendInternalKey(
205 &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0)));
206 }
207
208 // sets the internal key to be smaller or equal to all internal keys with this
209 // user key
210 void SetMinPossibleForUserKey(const Slice& _user_key) {
211 AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber,
212 kValueTypeForSeek));
213 }
214
215 bool Valid() const {
216 ParsedInternalKey parsed;
217 return ParseInternalKey(Slice(rep_), &parsed);
218 }
219
220 void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
221 Slice Encode() const {
222 assert(!rep_.empty());
223 return rep_;
224 }
225
226 Slice user_key() const { return ExtractUserKey(rep_); }
227 size_t size() { return rep_.size(); }
228
229 void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
230 SetFrom(ParsedInternalKey(_user_key, s, t));
231 }
232
233 void SetFrom(const ParsedInternalKey& p) {
234 rep_.clear();
235 AppendInternalKey(&rep_, p);
236 }
237
238 void Clear() { rep_.clear(); }
239
240 // The underlying representation.
241 // Intended only to be used together with ConvertFromUserKey().
242 std::string* rep() { return &rep_; }
243
244 // Assuming that *rep() contains a user key, this method makes internal key
245 // out of it in-place. This saves a memcpy compared to Set()/SetFrom().
246 void ConvertFromUserKey(SequenceNumber s, ValueType t) {
247 AppendInternalKeyFooter(&rep_, s, t);
248 }
249
250 std::string DebugString(bool hex = false) const;
251 };
252
253 inline int InternalKeyComparator::Compare(
254 const InternalKey& a, const InternalKey& b) const {
255 return Compare(a.Encode(), b.Encode());
256 }
257
258 inline bool ParseInternalKey(const Slice& internal_key,
259 ParsedInternalKey* result) {
260 const size_t n = internal_key.size();
261 if (n < 8) return false;
262 uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
263 unsigned char c = num & 0xff;
264 result->sequence = num >> 8;
265 result->type = static_cast<ValueType>(c);
266 assert(result->type <= ValueType::kMaxValue);
267 result->user_key = Slice(internal_key.data(), n - 8);
268 return IsExtendedValueType(result->type);
269 }
270
271 // Update the sequence number in the internal key.
272 // Guarantees not to invalidate ikey.data().
273 inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) {
274 size_t ikey_sz = ikey->size();
275 assert(ikey_sz >= 8);
276 uint64_t newval = (seq << 8) | t;
277
278 // Note: Since C++11, strings are guaranteed to be stored contiguously and
279 // string::operator[]() is guaranteed not to change ikey.data().
280 EncodeFixed64(&(*ikey)[ikey_sz - 8], newval);
281 }
282
283 // Get the sequence number from the internal key
284 inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
285 const size_t n = internal_key.size();
286 assert(n >= 8);
287 uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
288 return num >> 8;
289 }
290
291
292 // A helper class useful for DBImpl::Get()
293 class LookupKey {
294 public:
295 // Initialize *this for looking up user_key at a snapshot with
296 // the specified sequence number.
297 LookupKey(const Slice& _user_key, SequenceNumber sequence);
298
299 ~LookupKey();
300
301 // Return a key suitable for lookup in a MemTable.
302 Slice memtable_key() const {
303 return Slice(start_, static_cast<size_t>(end_ - start_));
304 }
305
306 // Return an internal key (suitable for passing to an internal iterator)
307 Slice internal_key() const {
308 return Slice(kstart_, static_cast<size_t>(end_ - kstart_));
309 }
310
311 // Return the user key
312 Slice user_key() const {
313 return Slice(kstart_, static_cast<size_t>(end_ - kstart_ - 8));
314 }
315
316 private:
317 // We construct a char array of the form:
318 // klength varint32 <-- start_
319 // userkey char[klength] <-- kstart_
320 // tag uint64
321 // <-- end_
322 // The array is a suitable MemTable key.
323 // The suffix starting with "userkey" can be used as an InternalKey.
324 const char* start_;
325 const char* kstart_;
326 const char* end_;
327 char space_[200]; // Avoid allocation for short keys
328
329 // No copying allowed
330 LookupKey(const LookupKey&);
331 void operator=(const LookupKey&);
332 };
333
334 inline LookupKey::~LookupKey() {
335 if (start_ != space_) delete[] start_;
336 }
337
338 class IterKey {
339 public:
340 IterKey()
341 : buf_(space_),
342 buf_size_(sizeof(space_)),
343 key_(buf_),
344 key_size_(0),
345 is_user_key_(true) {}
346
347 ~IterKey() { ResetBuffer(); }
348
349 // The bool will be picked up by the next calls to SetKey
350 void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
351
352 // Returns the key in whichever format that was provided to KeyIter
353 Slice GetKey() const { return Slice(key_, key_size_); }
354
355 Slice GetInternalKey() const {
356 assert(!IsUserKey());
357 return Slice(key_, key_size_);
358 }
359
360 Slice GetUserKey() const {
361 if (IsUserKey()) {
362 return Slice(key_, key_size_);
363 } else {
364 assert(key_size_ >= 8);
365 return Slice(key_, key_size_ - 8);
366 }
367 }
368
369 size_t Size() const { return key_size_; }
370
371 void Clear() { key_size_ = 0; }
372
373 // Append "non_shared_data" to its back, from "shared_len"
374 // This function is used in Block::Iter::ParseNextKey
375 // shared_len: bytes in [0, shard_len-1] would be remained
376 // non_shared_data: data to be append, its length must be >= non_shared_len
377 void TrimAppend(const size_t shared_len, const char* non_shared_data,
378 const size_t non_shared_len) {
379 assert(shared_len <= key_size_);
380 size_t total_size = shared_len + non_shared_len;
381
382 if (IsKeyPinned() /* key is not in buf_ */) {
383 // Copy the key from external memory to buf_ (copy shared_len bytes)
384 EnlargeBufferIfNeeded(total_size);
385 memcpy(buf_, key_, shared_len);
386 } else if (total_size > buf_size_) {
387 // Need to allocate space, delete previous space
388 char* p = new char[total_size];
389 memcpy(p, key_, shared_len);
390
391 if (buf_ != space_) {
392 delete[] buf_;
393 }
394
395 buf_ = p;
396 buf_size_ = total_size;
397 }
398
399 memcpy(buf_ + shared_len, non_shared_data, non_shared_len);
400 key_ = buf_;
401 key_size_ = total_size;
402 }
403
404 Slice SetKey(const Slice& key, bool copy = true) {
405 // is_user_key_ expected to be set already via SetIsUserKey
406 return SetKeyImpl(key, copy);
407 }
408
409 Slice SetUserKey(const Slice& key, bool copy = true) {
410 is_user_key_ = true;
411 return SetKeyImpl(key, copy);
412 }
413
414 Slice SetInternalKey(const Slice& key, bool copy = true) {
415 is_user_key_ = false;
416 return SetKeyImpl(key, copy);
417 }
418
419 // Copies the content of key, updates the reference to the user key in ikey
420 // and returns a Slice referencing the new copy.
421 Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) {
422 size_t key_n = key.size();
423 assert(key_n >= 8);
424 SetInternalKey(key);
425 ikey->user_key = Slice(key_, key_n - 8);
426 return Slice(key_, key_n);
427 }
428
429 // Copy the key into IterKey own buf_
430 void OwnKey() {
431 assert(IsKeyPinned() == true);
432
433 Reserve(key_size_);
434 memcpy(buf_, key_, key_size_);
435 key_ = buf_;
436 }
437
438 // Update the sequence number in the internal key. Guarantees not to
439 // invalidate slices to the key (and the user key).
440 void UpdateInternalKey(uint64_t seq, ValueType t) {
441 assert(!IsKeyPinned());
442 assert(key_size_ >= 8);
443 uint64_t newval = (seq << 8) | t;
444 EncodeFixed64(&buf_[key_size_ - 8], newval);
445 }
446
447 bool IsKeyPinned() const { return (key_ != buf_); }
448
449 void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
450 SequenceNumber s,
451 ValueType value_type = kValueTypeForSeek) {
452 size_t psize = key_prefix.size();
453 size_t usize = user_key.size();
454 EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t));
455 if (psize > 0) {
456 memcpy(buf_, key_prefix.data(), psize);
457 }
458 memcpy(buf_ + psize, user_key.data(), usize);
459 EncodeFixed64(buf_ + usize + psize, PackSequenceAndType(s, value_type));
460
461 key_ = buf_;
462 key_size_ = psize + usize + sizeof(uint64_t);
463 is_user_key_ = false;
464 }
465
466 void SetInternalKey(const Slice& user_key, SequenceNumber s,
467 ValueType value_type = kValueTypeForSeek) {
468 SetInternalKey(Slice(), user_key, s, value_type);
469 }
470
471 void Reserve(size_t size) {
472 EnlargeBufferIfNeeded(size);
473 key_size_ = size;
474 }
475
476 void SetInternalKey(const ParsedInternalKey& parsed_key) {
477 SetInternalKey(Slice(), parsed_key);
478 }
479
480 void SetInternalKey(const Slice& key_prefix,
481 const ParsedInternalKey& parsed_key_suffix) {
482 SetInternalKey(key_prefix, parsed_key_suffix.user_key,
483 parsed_key_suffix.sequence, parsed_key_suffix.type);
484 }
485
486 void EncodeLengthPrefixedKey(const Slice& key) {
487 auto size = key.size();
488 EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size)));
489 char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size));
490 memcpy(ptr, key.data(), size);
491 key_ = buf_;
492 is_user_key_ = true;
493 }
494
495 bool IsUserKey() const { return is_user_key_; }
496
497 private:
498 char* buf_;
499 size_t buf_size_;
500 const char* key_;
501 size_t key_size_;
502 char space_[32]; // Avoid allocation for short keys
503 bool is_user_key_;
504
505 Slice SetKeyImpl(const Slice& key, bool copy) {
506 size_t size = key.size();
507 if (copy) {
508 // Copy key to buf_
509 EnlargeBufferIfNeeded(size);
510 memcpy(buf_, key.data(), size);
511 key_ = buf_;
512 } else {
513 // Update key_ to point to external memory
514 key_ = key.data();
515 }
516 key_size_ = size;
517 return Slice(key_, key_size_);
518 }
519
520 void ResetBuffer() {
521 if (buf_ != space_) {
522 delete[] buf_;
523 buf_ = space_;
524 }
525 buf_size_ = sizeof(space_);
526 key_size_ = 0;
527 }
528
529 // Enlarge the buffer size if needed based on key_size.
530 // By default, static allocated buffer is used. Once there is a key
531 // larger than the static allocated buffer, another buffer is dynamically
532 // allocated, until a larger key buffer is requested. In that case, we
533 // reallocate buffer and delete the old one.
534 void EnlargeBufferIfNeeded(size_t key_size) {
535 // If size is smaller than buffer size, continue using current buffer,
536 // or the static allocated one, as default
537 if (key_size > buf_size_) {
538 EnlargeBuffer(key_size);
539 }
540 }
541
542 void EnlargeBuffer(size_t key_size);
543
544 // No copying allowed
545 IterKey(const IterKey&) = delete;
546 void operator=(const IterKey&) = delete;
547 };
548
549 class InternalKeySliceTransform : public SliceTransform {
550 public:
551 explicit InternalKeySliceTransform(const SliceTransform* transform)
552 : transform_(transform) {}
553
554 virtual const char* Name() const override { return transform_->Name(); }
555
556 virtual Slice Transform(const Slice& src) const override {
557 auto user_key = ExtractUserKey(src);
558 return transform_->Transform(user_key);
559 }
560
561 virtual bool InDomain(const Slice& src) const override {
562 auto user_key = ExtractUserKey(src);
563 return transform_->InDomain(user_key);
564 }
565
566 virtual bool InRange(const Slice& dst) const override {
567 auto user_key = ExtractUserKey(dst);
568 return transform_->InRange(user_key);
569 }
570
571 const SliceTransform* user_prefix_extractor() const { return transform_; }
572
573 private:
574 // Like comparator, InternalKeySliceTransform will not take care of the
575 // deletion of transform_
576 const SliceTransform* const transform_;
577 };
578
579 // Read the key of a record from a write batch.
580 // if this record represent the default column family then cf_record
581 // must be passed as false, otherwise it must be passed as true.
582 extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key,
583 bool cf_record);
584
585 // Read record from a write batch piece from input.
586 // tag, column_family, key, value and blob are return values. Callers own the
587 // Slice they point to.
588 // Tag is defined as ValueType.
589 // input will be advanced to after the record.
590 extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
591 uint32_t* column_family, Slice* key,
592 Slice* value, Slice* blob, Slice* xid);
593
594 // When user call DeleteRange() to delete a range of keys,
595 // we will store a serialized RangeTombstone in MemTable and SST.
596 // the struct here is a easy-understood form
597 // start/end_key_ is the start/end user key of the range to be deleted
598 struct RangeTombstone {
599 Slice start_key_;
600 Slice end_key_;
601 SequenceNumber seq_;
602 RangeTombstone() = default;
603 RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
604 : start_key_(sk), end_key_(ek), seq_(sn) {}
605
606 RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
607 start_key_ = parsed_key.user_key;
608 seq_ = parsed_key.sequence;
609 end_key_ = value;
610 }
611
612 // be careful to use Serialize(), allocates new memory
613 std::pair<InternalKey, Slice> Serialize() const {
614 auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
615 Slice value = end_key_;
616 return std::make_pair(std::move(key), std::move(value));
617 }
618
619 // be careful to use SerializeKey(), allocates new memory
620 InternalKey SerializeKey() const {
621 return InternalKey(start_key_, seq_, kTypeRangeDeletion);
622 }
623
624 // The tombstone end-key is exclusive, so we generate an internal-key here
625 // which has a similar property. Using kMaxSequenceNumber guarantees that
626 // the returned internal-key will compare less than any other internal-key
627 // with the same user-key. This in turn guarantees that the serialized
628 // end-key for a tombstone such as [a-b] will compare less than the key "b".
629 //
630 // be careful to use SerializeEndKey(), allocates new memory
631 InternalKey SerializeEndKey() const {
632 return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
633 }
634 };
635
636 inline
637 int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
638 // Order by:
639 // increasing user key (according to user-supplied comparator)
640 // decreasing sequence number
641 // decreasing type (though sequence# should be enough to disambiguate)
642 int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
643 PERF_COUNTER_ADD(user_key_comparison_count, 1);
644 if (r == 0) {
645 const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
646 const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
647 if (anum > bnum) {
648 r = -1;
649 } else if (anum < bnum) {
650 r = +1;
651 }
652 }
653 return r;
654 }
655
656 inline
657 int InternalKeyComparator::CompareKeySeq(const Slice& akey,
658 const Slice& bkey) const {
659 // Order by:
660 // increasing user key (according to user-supplied comparator)
661 // decreasing sequence number
662 int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
663 PERF_COUNTER_ADD(user_key_comparison_count, 1);
664 if (r == 0) {
665 // Shift the number to exclude the last byte which contains the value type
666 const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8;
667 const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8;
668 if (anum > bnum) {
669 r = -1;
670 } else if (anum < bnum) {
671 r = +1;
672 }
673 }
674 return r;
675 }
676
677
678 } // namespace rocksdb