]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | #pragma once |
6 | ||
7 | #ifndef ROCKSDB_LITE | |
8 | ||
9 | #include <limits> | |
10 | #include <string> | |
11 | #include <vector> | |
12 | ||
13 | #include "options/db_options.h" | |
14 | #include "port/port.h" | |
15 | #include "rocksdb/comparator.h" | |
16 | #include "rocksdb/iterator.h" | |
17 | #include "rocksdb/slice.h" | |
18 | #include "rocksdb/status.h" | |
19 | #include "rocksdb/utilities/write_batch_with_index.h" | |
20 | ||
f67539c2 | 21 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
22 | |
23 | class MergeContext; | |
24 | struct Options; | |
25 | ||
26 | // Key used by skip list, as the binary searchable index of WriteBatchWithIndex. | |
27 | struct WriteBatchIndexEntry { | |
28 | WriteBatchIndexEntry(size_t o, uint32_t c, size_t ko, size_t ksz) | |
29 | : offset(o), | |
30 | column_family(c), | |
31 | key_offset(ko), | |
32 | key_size(ksz), | |
33 | search_key(nullptr) {} | |
11fdf7f2 TL |
34 | // Create a dummy entry as the search key. This index entry won't be backed |
35 | // by an entry from the write batch, but a pointer to the search key. Or a | |
36 | // special flag of offset can indicate we are seek to first. | |
37 | // @_search_key: the search key | |
38 | // @_column_family: column family | |
39 | // @is_forward_direction: true for Seek(). False for SeekForPrev() | |
40 | // @is_seek_to_first: true if we seek to the beginning of the column family | |
41 | // _search_key should be null in this case. | |
42 | WriteBatchIndexEntry(const Slice* _search_key, uint32_t _column_family, | |
43 | bool is_forward_direction, bool is_seek_to_first) | |
44 | // For SeekForPrev(), we need to make the dummy entry larger than any | |
45 | // entry who has the same search key. Otherwise, we'll miss those entries. | |
46 | : offset(is_forward_direction ? 0 : port::kMaxSizet), | |
47 | column_family(_column_family), | |
7c673cae | 48 | key_offset(0), |
11fdf7f2 TL |
49 | key_size(is_seek_to_first ? kFlagMinInCf : 0), |
50 | search_key(_search_key) { | |
51 | assert(_search_key != nullptr || is_seek_to_first); | |
52 | } | |
53 | ||
54 | // If this flag appears in the key_size, it indicates a | |
55 | // key that is smaller than any other entry for the same column family. | |
56 | static const size_t kFlagMinInCf = port::kMaxSizet; | |
7c673cae | 57 | |
11fdf7f2 TL |
58 | bool is_min_in_cf() const { |
59 | assert(key_size != kFlagMinInCf || | |
60 | (key_offset == 0 && search_key == nullptr)); | |
61 | return key_size == kFlagMinInCf; | |
62 | } | |
7c673cae | 63 | |
11fdf7f2 TL |
64 | // offset of an entry in write batch's string buffer. If this is a dummy |
65 | // lookup key, in which case search_key != nullptr, offset is set to either | |
66 | // 0 or max, only for comparison purpose. Because when entries have the same | |
67 | // key, the entry with larger offset is larger, offset = 0 will make a seek | |
68 | // key small or equal than all the entries with the seek key, so that Seek() | |
69 | // will find all the entries of the same key. Similarly, offset = MAX will | |
70 | // make the entry just larger than all entries with the search key so | |
71 | // SeekForPrev() will see all the keys with the same key. | |
72 | size_t offset; | |
73 | uint32_t column_family; // c1olumn family of the entry. | |
7c673cae | 74 | size_t key_offset; // offset of the key in write batch's string buffer. |
11fdf7f2 TL |
75 | size_t key_size; // size of the key. kFlagMinInCf indicates |
76 | // that this is a dummy look up entry for | |
77 | // SeekToFirst() to the beginning of the column | |
78 | // family. We use the flag here to save a boolean | |
79 | // in the struct. | |
7c673cae FG |
80 | |
81 | const Slice* search_key; // if not null, instead of reading keys from | |
82 | // write batch, use it to compare. This is used | |
83 | // for lookup key. | |
84 | }; | |
85 | ||
86 | class ReadableWriteBatch : public WriteBatch { | |
87 | public: | |
88 | explicit ReadableWriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0) | |
89 | : WriteBatch(reserved_bytes, max_bytes) {} | |
90 | // Retrieve some information from a write entry in the write batch, given | |
91 | // the start offset of the write entry. | |
92 | Status GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* Key, | |
93 | Slice* value, Slice* blob, Slice* xid) const; | |
94 | }; | |
95 | ||
96 | class WriteBatchEntryComparator { | |
97 | public: | |
98 | WriteBatchEntryComparator(const Comparator* _default_comparator, | |
99 | const ReadableWriteBatch* write_batch) | |
100 | : default_comparator_(_default_comparator), write_batch_(write_batch) {} | |
101 | // Compare a and b. Return a negative value if a is less than b, 0 if they | |
102 | // are equal, and a positive value if a is greater than b | |
103 | int operator()(const WriteBatchIndexEntry* entry1, | |
104 | const WriteBatchIndexEntry* entry2) const; | |
105 | ||
106 | int CompareKey(uint32_t column_family, const Slice& key1, | |
107 | const Slice& key2) const; | |
108 | ||
109 | void SetComparatorForCF(uint32_t column_family_id, | |
110 | const Comparator* comparator) { | |
111 | if (column_family_id >= cf_comparators_.size()) { | |
112 | cf_comparators_.resize(column_family_id + 1, nullptr); | |
113 | } | |
114 | cf_comparators_[column_family_id] = comparator; | |
115 | } | |
116 | ||
117 | const Comparator* default_comparator() { return default_comparator_; } | |
118 | ||
119 | private: | |
120 | const Comparator* default_comparator_; | |
121 | std::vector<const Comparator*> cf_comparators_; | |
122 | const ReadableWriteBatch* write_batch_; | |
123 | }; | |
124 | ||
125 | class WriteBatchWithIndexInternal { | |
126 | public: | |
127 | enum Result { kFound, kDeleted, kNotFound, kMergeInProgress, kError }; | |
128 | ||
129 | // If batch contains a value for key, store it in *value and return kFound. | |
130 | // If batch contains a deletion for key, return Deleted. | |
131 | // If batch contains Merge operations as the most recent entry for a key, | |
132 | // and the merge process does not stop (not reaching a value or delete), | |
133 | // prepend the current merge operands to *operands, | |
134 | // and return kMergeInProgress | |
135 | // If batch does not contain this key, return kNotFound | |
136 | // Else, return kError on error with error Status stored in *s. | |
137 | static WriteBatchWithIndexInternal::Result GetFromBatch( | |
138 | const ImmutableDBOptions& ioptions, WriteBatchWithIndex* batch, | |
139 | ColumnFamilyHandle* column_family, const Slice& key, | |
140 | MergeContext* merge_context, WriteBatchEntryComparator* cmp, | |
141 | std::string* value, bool overwrite_key, Status* s); | |
142 | }; | |
143 | ||
f67539c2 | 144 | } // namespace ROCKSDB_NAMESPACE |
7c673cae | 145 | #endif // !ROCKSDB_LITE |