]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #pragma once | |
11 | #include <stdio.h> | |
12 | #include <string> | |
13 | #include <utility> | |
11fdf7f2 | 14 | #include "monitoring/perf_context_imp.h" |
7c673cae FG |
15 | #include "rocksdb/comparator.h" |
16 | #include "rocksdb/db.h" | |
17 | #include "rocksdb/filter_policy.h" | |
18 | #include "rocksdb/slice.h" | |
19 | #include "rocksdb/slice_transform.h" | |
20 | #include "rocksdb/table.h" | |
21 | #include "rocksdb/types.h" | |
22 | #include "util/coding.h" | |
23 | #include "util/logging.h" | |
494da23a | 24 | #include "util/user_comparator_wrapper.h" |
7c673cae FG |
25 | |
26 | namespace rocksdb { | |
27 | ||
28 | class InternalKey; | |
29 | ||
30 | // Value types encoded as the last component of internal keys. | |
31 | // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk | |
32 | // data structures. | |
33 | // The highest bit of the value type needs to be reserved to SST tables | |
34 | // for them to do more flexible encoding. | |
35 | enum ValueType : unsigned char { | |
36 | kTypeDeletion = 0x0, | |
37 | kTypeValue = 0x1, | |
38 | kTypeMerge = 0x2, | |
39 | kTypeLogData = 0x3, // WAL only. | |
40 | kTypeColumnFamilyDeletion = 0x4, // WAL only. | |
41 | kTypeColumnFamilyValue = 0x5, // WAL only. | |
42 | kTypeColumnFamilyMerge = 0x6, // WAL only. | |
43 | kTypeSingleDeletion = 0x7, | |
44 | kTypeColumnFamilySingleDeletion = 0x8, // WAL only. | |
45 | kTypeBeginPrepareXID = 0x9, // WAL only. | |
46 | kTypeEndPrepareXID = 0xA, // WAL only. | |
47 | kTypeCommitXID = 0xB, // WAL only. | |
48 | kTypeRollbackXID = 0xC, // WAL only. | |
49 | kTypeNoop = 0xD, // WAL only. | |
50 | kTypeColumnFamilyRangeDeletion = 0xE, // WAL only. | |
51 | kTypeRangeDeletion = 0xF, // meta block | |
11fdf7f2 TL |
52 | kTypeColumnFamilyBlobIndex = 0x10, // Blob DB only |
53 | kTypeBlobIndex = 0x11, // Blob DB only | |
54 | // When the prepared record is also persisted in db, we use a different | |
55 | // record. This is to ensure that the WAL that is generated by a WritePolicy | |
56 | // is not mistakenly read by another, which would result into data | |
57 | // inconsistency. | |
58 | kTypeBeginPersistedPrepareXID = 0x12, // WAL only. | |
59 | // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL | |
60 | // generated by WriteUnprepared write policy is not mistakenly read by | |
61 | // another. | |
62 | kTypeBeginUnprepareXID = 0x13, // WAL only. | |
63 | kMaxValue = 0x7F // Not used for storing records. | |
7c673cae FG |
64 | }; |
65 | ||
66 | // Defined in dbformat.cc | |
67 | extern const ValueType kValueTypeForSeek; | |
68 | extern const ValueType kValueTypeForSeekForPrev; | |
69 | ||
70 | // Checks whether a type is an inline value type | |
71 | // (i.e. a type used in memtable skiplist and sst file datablock). | |
72 | inline bool IsValueType(ValueType t) { | |
11fdf7f2 | 73 | return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex; |
7c673cae FG |
74 | } |
75 | ||
76 | // Checks whether a type is from user operation | |
77 | // kTypeRangeDeletion is in meta block so this API is separated from above | |
78 | inline bool IsExtendedValueType(ValueType t) { | |
79 | return IsValueType(t) || t == kTypeRangeDeletion; | |
80 | } | |
81 | ||
82 | // We leave eight bits empty at the bottom so a type and sequence# | |
83 | // can be packed together into 64-bits. | |
494da23a | 84 | static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1); |
7c673cae FG |
85 | |
86 | static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64; | |
87 | ||
88 | struct ParsedInternalKey { | |
89 | Slice user_key; | |
90 | SequenceNumber sequence; | |
91 | ValueType type; | |
92 | ||
11fdf7f2 TL |
93 | ParsedInternalKey() |
94 | : sequence(kMaxSequenceNumber) // Make code analyzer happy | |
494da23a | 95 | {} // Intentionally left uninitialized (for speed) |
7c673cae | 96 | ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) |
494da23a | 97 | : user_key(u), sequence(seq), type(t) {} |
7c673cae | 98 | std::string DebugString(bool hex = false) const; |
11fdf7f2 TL |
99 | |
100 | void clear() { | |
101 | user_key.clear(); | |
102 | sequence = 0; | |
103 | type = kTypeDeletion; | |
104 | } | |
7c673cae FG |
105 | }; |
106 | ||
107 | // Return the length of the encoding of "key". | |
108 | inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { | |
109 | return key.user_key.size() + 8; | |
110 | } | |
111 | ||
112 | // Pack a sequence number and a ValueType into a uint64_t | |
113 | extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t); | |
114 | ||
115 | // Given the result of PackSequenceAndType, store the sequence number in *seq | |
116 | // and the ValueType in *t. | |
117 | extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t); | |
118 | ||
11fdf7f2 TL |
119 | EntryType GetEntryType(ValueType value_type); |
120 | ||
7c673cae FG |
121 | // Append the serialization of "key" to *result. |
122 | extern void AppendInternalKey(std::string* result, | |
123 | const ParsedInternalKey& key); | |
124 | // Serialized internal key consists of user key followed by footer. | |
125 | // This function appends the footer to *result, assuming that *result already | |
126 | // contains the user key at the end. | |
127 | extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s, | |
128 | ValueType t); | |
129 | ||
130 | // Attempt to parse an internal key from "internal_key". On success, | |
131 | // stores the parsed data in "*result", and returns true. | |
132 | // | |
133 | // On error, returns false, leaves "*result" in an undefined state. | |
134 | extern bool ParseInternalKey(const Slice& internal_key, | |
135 | ParsedInternalKey* result); | |
136 | ||
137 | // Returns the user key portion of an internal key. | |
138 | inline Slice ExtractUserKey(const Slice& internal_key) { | |
139 | assert(internal_key.size() >= 8); | |
140 | return Slice(internal_key.data(), internal_key.size() - 8); | |
141 | } | |
142 | ||
11fdf7f2 | 143 | inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) { |
7c673cae FG |
144 | assert(internal_key.size() >= 8); |
145 | const size_t n = internal_key.size(); | |
11fdf7f2 TL |
146 | return DecodeFixed64(internal_key.data() + n - 8); |
147 | } | |
148 | ||
149 | inline ValueType ExtractValueType(const Slice& internal_key) { | |
150 | uint64_t num = ExtractInternalKeyFooter(internal_key); | |
7c673cae FG |
151 | unsigned char c = num & 0xff; |
152 | return static_cast<ValueType>(c); | |
153 | } | |
154 | ||
155 | // A comparator for internal keys that uses a specified comparator for | |
156 | // the user key portion and breaks ties by decreasing sequence number. | |
11fdf7f2 TL |
157 | class InternalKeyComparator |
158 | #ifdef NDEBUG | |
159 | final | |
160 | #endif | |
161 | : public Comparator { | |
7c673cae | 162 | private: |
494da23a | 163 | UserComparatorWrapper user_comparator_; |
7c673cae | 164 | std::string name_; |
494da23a | 165 | |
7c673cae | 166 | public: |
494da23a TL |
167 | explicit InternalKeyComparator(const Comparator* c) |
168 | : user_comparator_(c), | |
169 | name_("rocksdb.InternalKeyComparator:" + | |
170 | std::string(user_comparator_.Name())) {} | |
7c673cae FG |
171 | virtual ~InternalKeyComparator() {} |
172 | ||
173 | virtual const char* Name() const override; | |
174 | virtual int Compare(const Slice& a, const Slice& b) const override; | |
11fdf7f2 TL |
175 | // Same as Compare except that it excludes the value type from comparison |
176 | virtual int CompareKeySeq(const Slice& a, const Slice& b) const; | |
7c673cae FG |
177 | virtual void FindShortestSeparator(std::string* start, |
178 | const Slice& limit) const override; | |
179 | virtual void FindShortSuccessor(std::string* key) const override; | |
180 | ||
494da23a TL |
181 | const Comparator* user_comparator() const { |
182 | return user_comparator_.user_comparator(); | |
183 | } | |
7c673cae FG |
184 | |
185 | int Compare(const InternalKey& a, const InternalKey& b) const; | |
186 | int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const; | |
11fdf7f2 | 187 | virtual const Comparator* GetRootComparator() const override { |
494da23a | 188 | return user_comparator_.GetRootComparator(); |
11fdf7f2 | 189 | } |
7c673cae FG |
190 | }; |
191 | ||
192 | // Modules in this directory should keep internal keys wrapped inside | |
193 | // the following class instead of plain strings so that we do not | |
194 | // incorrectly use string comparisons instead of an InternalKeyComparator. | |
195 | class InternalKey { | |
196 | private: | |
197 | std::string rep_; | |
494da23a | 198 | |
7c673cae | 199 | public: |
494da23a | 200 | InternalKey() {} // Leave rep_ as empty to indicate it is invalid |
7c673cae FG |
201 | InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) { |
202 | AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t)); | |
203 | } | |
204 | ||
205 | // sets the internal key to be bigger or equal to all internal keys with this | |
206 | // user key | |
207 | void SetMaxPossibleForUserKey(const Slice& _user_key) { | |
11fdf7f2 TL |
208 | AppendInternalKey( |
209 | &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0))); | |
7c673cae FG |
210 | } |
211 | ||
212 | // sets the internal key to be smaller or equal to all internal keys with this | |
213 | // user key | |
214 | void SetMinPossibleForUserKey(const Slice& _user_key) { | |
11fdf7f2 TL |
215 | AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber, |
216 | kValueTypeForSeek)); | |
7c673cae FG |
217 | } |
218 | ||
219 | bool Valid() const { | |
220 | ParsedInternalKey parsed; | |
221 | return ParseInternalKey(Slice(rep_), &parsed); | |
222 | } | |
223 | ||
224 | void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); } | |
225 | Slice Encode() const { | |
226 | assert(!rep_.empty()); | |
227 | return rep_; | |
228 | } | |
229 | ||
230 | Slice user_key() const { return ExtractUserKey(rep_); } | |
231 | size_t size() { return rep_.size(); } | |
232 | ||
233 | void Set(const Slice& _user_key, SequenceNumber s, ValueType t) { | |
234 | SetFrom(ParsedInternalKey(_user_key, s, t)); | |
235 | } | |
236 | ||
237 | void SetFrom(const ParsedInternalKey& p) { | |
238 | rep_.clear(); | |
239 | AppendInternalKey(&rep_, p); | |
240 | } | |
241 | ||
242 | void Clear() { rep_.clear(); } | |
243 | ||
244 | // The underlying representation. | |
245 | // Intended only to be used together with ConvertFromUserKey(). | |
246 | std::string* rep() { return &rep_; } | |
247 | ||
248 | // Assuming that *rep() contains a user key, this method makes internal key | |
249 | // out of it in-place. This saves a memcpy compared to Set()/SetFrom(). | |
250 | void ConvertFromUserKey(SequenceNumber s, ValueType t) { | |
251 | AppendInternalKeyFooter(&rep_, s, t); | |
252 | } | |
253 | ||
254 | std::string DebugString(bool hex = false) const; | |
255 | }; | |
256 | ||
494da23a TL |
257 | inline int InternalKeyComparator::Compare(const InternalKey& a, |
258 | const InternalKey& b) const { | |
7c673cae FG |
259 | return Compare(a.Encode(), b.Encode()); |
260 | } | |
261 | ||
262 | inline bool ParseInternalKey(const Slice& internal_key, | |
263 | ParsedInternalKey* result) { | |
264 | const size_t n = internal_key.size(); | |
265 | if (n < 8) return false; | |
266 | uint64_t num = DecodeFixed64(internal_key.data() + n - 8); | |
267 | unsigned char c = num & 0xff; | |
268 | result->sequence = num >> 8; | |
269 | result->type = static_cast<ValueType>(c); | |
270 | assert(result->type <= ValueType::kMaxValue); | |
271 | result->user_key = Slice(internal_key.data(), n - 8); | |
272 | return IsExtendedValueType(result->type); | |
273 | } | |
274 | ||
275 | // Update the sequence number in the internal key. | |
276 | // Guarantees not to invalidate ikey.data(). | |
277 | inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) { | |
278 | size_t ikey_sz = ikey->size(); | |
279 | assert(ikey_sz >= 8); | |
280 | uint64_t newval = (seq << 8) | t; | |
281 | ||
282 | // Note: Since C++11, strings are guaranteed to be stored contiguously and | |
283 | // string::operator[]() is guaranteed not to change ikey.data(). | |
284 | EncodeFixed64(&(*ikey)[ikey_sz - 8], newval); | |
285 | } | |
286 | ||
287 | // Get the sequence number from the internal key | |
288 | inline uint64_t GetInternalKeySeqno(const Slice& internal_key) { | |
289 | const size_t n = internal_key.size(); | |
290 | assert(n >= 8); | |
291 | uint64_t num = DecodeFixed64(internal_key.data() + n - 8); | |
292 | return num >> 8; | |
293 | } | |
294 | ||
7c673cae FG |
295 | // A helper class useful for DBImpl::Get() |
296 | class LookupKey { | |
297 | public: | |
298 | // Initialize *this for looking up user_key at a snapshot with | |
299 | // the specified sequence number. | |
300 | LookupKey(const Slice& _user_key, SequenceNumber sequence); | |
301 | ||
302 | ~LookupKey(); | |
303 | ||
304 | // Return a key suitable for lookup in a MemTable. | |
305 | Slice memtable_key() const { | |
306 | return Slice(start_, static_cast<size_t>(end_ - start_)); | |
307 | } | |
308 | ||
309 | // Return an internal key (suitable for passing to an internal iterator) | |
310 | Slice internal_key() const { | |
311 | return Slice(kstart_, static_cast<size_t>(end_ - kstart_)); | |
312 | } | |
313 | ||
314 | // Return the user key | |
315 | Slice user_key() const { | |
316 | return Slice(kstart_, static_cast<size_t>(end_ - kstart_ - 8)); | |
317 | } | |
318 | ||
319 | private: | |
320 | // We construct a char array of the form: | |
321 | // klength varint32 <-- start_ | |
322 | // userkey char[klength] <-- kstart_ | |
323 | // tag uint64 | |
324 | // <-- end_ | |
325 | // The array is a suitable MemTable key. | |
326 | // The suffix starting with "userkey" can be used as an InternalKey. | |
327 | const char* start_; | |
328 | const char* kstart_; | |
329 | const char* end_; | |
494da23a | 330 | char space_[200]; // Avoid allocation for short keys |
7c673cae FG |
331 | |
332 | // No copying allowed | |
333 | LookupKey(const LookupKey&); | |
334 | void operator=(const LookupKey&); | |
335 | }; | |
336 | ||
337 | inline LookupKey::~LookupKey() { | |
338 | if (start_ != space_) delete[] start_; | |
339 | } | |
340 | ||
341 | class IterKey { | |
342 | public: | |
343 | IterKey() | |
344 | : buf_(space_), | |
7c673cae FG |
345 | key_(buf_), |
346 | key_size_(0), | |
494da23a | 347 | buf_size_(sizeof(space_)), |
7c673cae FG |
348 | is_user_key_(true) {} |
349 | ||
350 | ~IterKey() { ResetBuffer(); } | |
351 | ||
11fdf7f2 TL |
352 | // The bool will be picked up by the next calls to SetKey |
353 | void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; } | |
354 | ||
355 | // Returns the key in whichever format that was provided to KeyIter | |
356 | Slice GetKey() const { return Slice(key_, key_size_); } | |
357 | ||
7c673cae FG |
358 | Slice GetInternalKey() const { |
359 | assert(!IsUserKey()); | |
360 | return Slice(key_, key_size_); | |
361 | } | |
362 | ||
363 | Slice GetUserKey() const { | |
364 | if (IsUserKey()) { | |
365 | return Slice(key_, key_size_); | |
366 | } else { | |
367 | assert(key_size_ >= 8); | |
368 | return Slice(key_, key_size_ - 8); | |
369 | } | |
370 | } | |
371 | ||
372 | size_t Size() const { return key_size_; } | |
373 | ||
374 | void Clear() { key_size_ = 0; } | |
375 | ||
376 | // Append "non_shared_data" to its back, from "shared_len" | |
377 | // This function is used in Block::Iter::ParseNextKey | |
378 | // shared_len: bytes in [0, shard_len-1] would be remained | |
379 | // non_shared_data: data to be append, its length must be >= non_shared_len | |
380 | void TrimAppend(const size_t shared_len, const char* non_shared_data, | |
381 | const size_t non_shared_len) { | |
382 | assert(shared_len <= key_size_); | |
383 | size_t total_size = shared_len + non_shared_len; | |
384 | ||
385 | if (IsKeyPinned() /* key is not in buf_ */) { | |
386 | // Copy the key from external memory to buf_ (copy shared_len bytes) | |
387 | EnlargeBufferIfNeeded(total_size); | |
388 | memcpy(buf_, key_, shared_len); | |
389 | } else if (total_size > buf_size_) { | |
390 | // Need to allocate space, delete previous space | |
391 | char* p = new char[total_size]; | |
392 | memcpy(p, key_, shared_len); | |
393 | ||
394 | if (buf_ != space_) { | |
395 | delete[] buf_; | |
396 | } | |
397 | ||
398 | buf_ = p; | |
399 | buf_size_ = total_size; | |
400 | } | |
401 | ||
402 | memcpy(buf_ + shared_len, non_shared_data, non_shared_len); | |
403 | key_ = buf_; | |
404 | key_size_ = total_size; | |
405 | } | |
406 | ||
11fdf7f2 TL |
407 | Slice SetKey(const Slice& key, bool copy = true) { |
408 | // is_user_key_ expected to be set already via SetIsUserKey | |
409 | return SetKeyImpl(key, copy); | |
410 | } | |
411 | ||
7c673cae FG |
412 | Slice SetUserKey(const Slice& key, bool copy = true) { |
413 | is_user_key_ = true; | |
414 | return SetKeyImpl(key, copy); | |
415 | } | |
416 | ||
417 | Slice SetInternalKey(const Slice& key, bool copy = true) { | |
418 | is_user_key_ = false; | |
419 | return SetKeyImpl(key, copy); | |
420 | } | |
421 | ||
422 | // Copies the content of key, updates the reference to the user key in ikey | |
423 | // and returns a Slice referencing the new copy. | |
424 | Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) { | |
425 | size_t key_n = key.size(); | |
426 | assert(key_n >= 8); | |
427 | SetInternalKey(key); | |
428 | ikey->user_key = Slice(key_, key_n - 8); | |
429 | return Slice(key_, key_n); | |
430 | } | |
431 | ||
432 | // Copy the key into IterKey own buf_ | |
433 | void OwnKey() { | |
434 | assert(IsKeyPinned() == true); | |
435 | ||
436 | Reserve(key_size_); | |
437 | memcpy(buf_, key_, key_size_); | |
438 | key_ = buf_; | |
439 | } | |
440 | ||
441 | // Update the sequence number in the internal key. Guarantees not to | |
442 | // invalidate slices to the key (and the user key). | |
443 | void UpdateInternalKey(uint64_t seq, ValueType t) { | |
444 | assert(!IsKeyPinned()); | |
445 | assert(key_size_ >= 8); | |
446 | uint64_t newval = (seq << 8) | t; | |
447 | EncodeFixed64(&buf_[key_size_ - 8], newval); | |
448 | } | |
449 | ||
450 | bool IsKeyPinned() const { return (key_ != buf_); } | |
451 | ||
452 | void SetInternalKey(const Slice& key_prefix, const Slice& user_key, | |
453 | SequenceNumber s, | |
454 | ValueType value_type = kValueTypeForSeek) { | |
455 | size_t psize = key_prefix.size(); | |
456 | size_t usize = user_key.size(); | |
457 | EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t)); | |
458 | if (psize > 0) { | |
459 | memcpy(buf_, key_prefix.data(), psize); | |
460 | } | |
461 | memcpy(buf_ + psize, user_key.data(), usize); | |
462 | EncodeFixed64(buf_ + usize + psize, PackSequenceAndType(s, value_type)); | |
463 | ||
464 | key_ = buf_; | |
465 | key_size_ = psize + usize + sizeof(uint64_t); | |
466 | is_user_key_ = false; | |
467 | } | |
468 | ||
469 | void SetInternalKey(const Slice& user_key, SequenceNumber s, | |
470 | ValueType value_type = kValueTypeForSeek) { | |
471 | SetInternalKey(Slice(), user_key, s, value_type); | |
472 | } | |
473 | ||
474 | void Reserve(size_t size) { | |
475 | EnlargeBufferIfNeeded(size); | |
476 | key_size_ = size; | |
477 | } | |
478 | ||
479 | void SetInternalKey(const ParsedInternalKey& parsed_key) { | |
480 | SetInternalKey(Slice(), parsed_key); | |
481 | } | |
482 | ||
483 | void SetInternalKey(const Slice& key_prefix, | |
484 | const ParsedInternalKey& parsed_key_suffix) { | |
485 | SetInternalKey(key_prefix, parsed_key_suffix.user_key, | |
486 | parsed_key_suffix.sequence, parsed_key_suffix.type); | |
487 | } | |
488 | ||
489 | void EncodeLengthPrefixedKey(const Slice& key) { | |
490 | auto size = key.size(); | |
491 | EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size))); | |
492 | char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size)); | |
493 | memcpy(ptr, key.data(), size); | |
494 | key_ = buf_; | |
495 | is_user_key_ = true; | |
496 | } | |
497 | ||
498 | bool IsUserKey() const { return is_user_key_; } | |
499 | ||
500 | private: | |
501 | char* buf_; | |
7c673cae FG |
502 | const char* key_; |
503 | size_t key_size_; | |
494da23a | 504 | size_t buf_size_; |
7c673cae FG |
505 | char space_[32]; // Avoid allocation for short keys |
506 | bool is_user_key_; | |
507 | ||
508 | Slice SetKeyImpl(const Slice& key, bool copy) { | |
509 | size_t size = key.size(); | |
510 | if (copy) { | |
511 | // Copy key to buf_ | |
512 | EnlargeBufferIfNeeded(size); | |
513 | memcpy(buf_, key.data(), size); | |
514 | key_ = buf_; | |
515 | } else { | |
516 | // Update key_ to point to external memory | |
517 | key_ = key.data(); | |
518 | } | |
519 | key_size_ = size; | |
520 | return Slice(key_, key_size_); | |
521 | } | |
522 | ||
523 | void ResetBuffer() { | |
524 | if (buf_ != space_) { | |
525 | delete[] buf_; | |
526 | buf_ = space_; | |
527 | } | |
528 | buf_size_ = sizeof(space_); | |
529 | key_size_ = 0; | |
530 | } | |
531 | ||
532 | // Enlarge the buffer size if needed based on key_size. | |
533 | // By default, static allocated buffer is used. Once there is a key | |
534 | // larger than the static allocated buffer, another buffer is dynamically | |
535 | // allocated, until a larger key buffer is requested. In that case, we | |
536 | // reallocate buffer and delete the old one. | |
537 | void EnlargeBufferIfNeeded(size_t key_size) { | |
538 | // If size is smaller than buffer size, continue using current buffer, | |
539 | // or the static allocated one, as default | |
540 | if (key_size > buf_size_) { | |
11fdf7f2 | 541 | EnlargeBuffer(key_size); |
7c673cae FG |
542 | } |
543 | } | |
544 | ||
11fdf7f2 TL |
545 | void EnlargeBuffer(size_t key_size); |
546 | ||
7c673cae FG |
547 | // No copying allowed |
548 | IterKey(const IterKey&) = delete; | |
549 | void operator=(const IterKey&) = delete; | |
550 | }; | |
551 | ||
552 | class InternalKeySliceTransform : public SliceTransform { | |
553 | public: | |
554 | explicit InternalKeySliceTransform(const SliceTransform* transform) | |
555 | : transform_(transform) {} | |
556 | ||
557 | virtual const char* Name() const override { return transform_->Name(); } | |
558 | ||
559 | virtual Slice Transform(const Slice& src) const override { | |
560 | auto user_key = ExtractUserKey(src); | |
561 | return transform_->Transform(user_key); | |
562 | } | |
563 | ||
564 | virtual bool InDomain(const Slice& src) const override { | |
565 | auto user_key = ExtractUserKey(src); | |
566 | return transform_->InDomain(user_key); | |
567 | } | |
568 | ||
569 | virtual bool InRange(const Slice& dst) const override { | |
570 | auto user_key = ExtractUserKey(dst); | |
571 | return transform_->InRange(user_key); | |
572 | } | |
573 | ||
574 | const SliceTransform* user_prefix_extractor() const { return transform_; } | |
575 | ||
576 | private: | |
577 | // Like comparator, InternalKeySliceTransform will not take care of the | |
578 | // deletion of transform_ | |
579 | const SliceTransform* const transform_; | |
580 | }; | |
581 | ||
582 | // Read the key of a record from a write batch. | |
583 | // if this record represent the default column family then cf_record | |
584 | // must be passed as false, otherwise it must be passed as true. | |
585 | extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key, | |
586 | bool cf_record); | |
587 | ||
588 | // Read record from a write batch piece from input. | |
589 | // tag, column_family, key, value and blob are return values. Callers own the | |
590 | // Slice they point to. | |
591 | // Tag is defined as ValueType. | |
592 | // input will be advanced to after the record. | |
593 | extern Status ReadRecordFromWriteBatch(Slice* input, char* tag, | |
594 | uint32_t* column_family, Slice* key, | |
595 | Slice* value, Slice* blob, Slice* xid); | |
596 | ||
597 | // When user call DeleteRange() to delete a range of keys, | |
598 | // we will store a serialized RangeTombstone in MemTable and SST. | |
599 | // the struct here is a easy-understood form | |
600 | // start/end_key_ is the start/end user key of the range to be deleted | |
601 | struct RangeTombstone { | |
602 | Slice start_key_; | |
603 | Slice end_key_; | |
604 | SequenceNumber seq_; | |
605 | RangeTombstone() = default; | |
606 | RangeTombstone(Slice sk, Slice ek, SequenceNumber sn) | |
607 | : start_key_(sk), end_key_(ek), seq_(sn) {} | |
608 | ||
609 | RangeTombstone(ParsedInternalKey parsed_key, Slice value) { | |
610 | start_key_ = parsed_key.user_key; | |
611 | seq_ = parsed_key.sequence; | |
612 | end_key_ = value; | |
613 | } | |
614 | ||
615 | // be careful to use Serialize(), allocates new memory | |
616 | std::pair<InternalKey, Slice> Serialize() const { | |
617 | auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion); | |
618 | Slice value = end_key_; | |
619 | return std::make_pair(std::move(key), std::move(value)); | |
620 | } | |
621 | ||
622 | // be careful to use SerializeKey(), allocates new memory | |
623 | InternalKey SerializeKey() const { | |
624 | return InternalKey(start_key_, seq_, kTypeRangeDeletion); | |
625 | } | |
626 | ||
11fdf7f2 TL |
627 | // The tombstone end-key is exclusive, so we generate an internal-key here |
628 | // which has a similar property. Using kMaxSequenceNumber guarantees that | |
629 | // the returned internal-key will compare less than any other internal-key | |
630 | // with the same user-key. This in turn guarantees that the serialized | |
631 | // end-key for a tombstone such as [a-b] will compare less than the key "b". | |
632 | // | |
7c673cae FG |
633 | // be careful to use SerializeEndKey(), allocates new memory |
634 | InternalKey SerializeEndKey() const { | |
11fdf7f2 | 635 | return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion); |
7c673cae FG |
636 | } |
637 | }; | |
638 | ||
494da23a TL |
639 | inline int InternalKeyComparator::Compare(const Slice& akey, |
640 | const Slice& bkey) const { | |
11fdf7f2 TL |
641 | // Order by: |
642 | // increasing user key (according to user-supplied comparator) | |
643 | // decreasing sequence number | |
644 | // decreasing type (though sequence# should be enough to disambiguate) | |
494da23a | 645 | int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); |
11fdf7f2 TL |
646 | if (r == 0) { |
647 | const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); | |
648 | const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); | |
649 | if (anum > bnum) { | |
650 | r = -1; | |
651 | } else if (anum < bnum) { | |
652 | r = +1; | |
653 | } | |
654 | } | |
655 | return r; | |
656 | } | |
657 | ||
494da23a TL |
658 | inline int InternalKeyComparator::CompareKeySeq(const Slice& akey, |
659 | const Slice& bkey) const { | |
11fdf7f2 TL |
660 | // Order by: |
661 | // increasing user key (according to user-supplied comparator) | |
662 | // decreasing sequence number | |
494da23a | 663 | int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); |
11fdf7f2 TL |
664 | if (r == 0) { |
665 | // Shift the number to exclude the last byte which contains the value type | |
666 | const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8; | |
667 | const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8; | |
668 | if (anum > bnum) { | |
669 | r = -1; | |
670 | } else if (anum < bnum) { | |
671 | r = +1; | |
672 | } | |
673 | } | |
674 | return r; | |
675 | } | |
676 | ||
494da23a TL |
677 | struct ParsedInternalKeyComparator { |
678 | explicit ParsedInternalKeyComparator(const InternalKeyComparator* c) | |
679 | : cmp(c) {} | |
680 | ||
681 | bool operator()(const ParsedInternalKey& a, | |
682 | const ParsedInternalKey& b) const { | |
683 | return cmp->Compare(a, b) < 0; | |
684 | } | |
685 | ||
686 | const InternalKeyComparator* cmp; | |
687 | }; | |
11fdf7f2 | 688 | |
7c673cae | 689 | } // namespace rocksdb |