]>
Commit | Line | Data |
---|---|---|
494da23a TL |
1 | // Copyright (c) 2018-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #pragma once | |
7 | ||
8 | #include <list> | |
9 | #include <memory> | |
10 | #include <set> | |
11 | #include <string> | |
12 | #include <vector> | |
13 | ||
14 | #include "db/dbformat.h" | |
15 | #include "db/pinned_iterators_manager.h" | |
16 | #include "rocksdb/status.h" | |
17 | #include "table/internal_iterator.h" | |
18 | ||
f67539c2 | 19 | namespace ROCKSDB_NAMESPACE { |
494da23a TL |
20 | |
21 | struct FragmentedRangeTombstoneList { | |
22 | public: | |
23 | // A compact representation of a "stack" of range tombstone fragments, which | |
24 | // start and end at the same user keys but have different sequence numbers. | |
25 | // The members seq_start_idx and seq_end_idx are intended to be parameters to | |
26 | // seq_iter(). | |
27 | struct RangeTombstoneStack { | |
28 | RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx, | |
29 | size_t end_idx) | |
30 | : start_key(start), | |
31 | end_key(end), | |
32 | seq_start_idx(start_idx), | |
33 | seq_end_idx(end_idx) {} | |
34 | ||
35 | Slice start_key; | |
36 | Slice end_key; | |
37 | size_t seq_start_idx; | |
38 | size_t seq_end_idx; | |
39 | }; | |
40 | FragmentedRangeTombstoneList( | |
41 | std::unique_ptr<InternalIterator> unfragmented_tombstones, | |
42 | const InternalKeyComparator& icmp, bool for_compaction = false, | |
43 | const std::vector<SequenceNumber>& snapshots = {}); | |
44 | ||
45 | std::vector<RangeTombstoneStack>::const_iterator begin() const { | |
46 | return tombstones_.begin(); | |
47 | } | |
48 | ||
49 | std::vector<RangeTombstoneStack>::const_iterator end() const { | |
50 | return tombstones_.end(); | |
51 | } | |
52 | ||
53 | std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const { | |
54 | return std::next(tombstone_seqs_.begin(), idx); | |
55 | } | |
56 | ||
57 | std::vector<SequenceNumber>::const_iterator seq_begin() const { | |
58 | return tombstone_seqs_.begin(); | |
59 | } | |
60 | ||
61 | std::vector<SequenceNumber>::const_iterator seq_end() const { | |
62 | return tombstone_seqs_.end(); | |
63 | } | |
64 | ||
65 | bool empty() const { return tombstones_.empty(); } | |
66 | ||
67 | // Returns true if the stored tombstones contain with one with a sequence | |
68 | // number in [lower, upper]. | |
69 | bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const; | |
70 | ||
71 | private: | |
72 | // Given an ordered range tombstone iterator unfragmented_tombstones, | |
73 | // "fragment" the tombstones into non-overlapping pieces, and store them in | |
74 | // tombstones_ and tombstone_seqs_. | |
75 | void FragmentTombstones( | |
76 | std::unique_ptr<InternalIterator> unfragmented_tombstones, | |
77 | const InternalKeyComparator& icmp, bool for_compaction, | |
78 | const std::vector<SequenceNumber>& snapshots); | |
79 | ||
80 | std::vector<RangeTombstoneStack> tombstones_; | |
81 | std::vector<SequenceNumber> tombstone_seqs_; | |
82 | std::set<SequenceNumber> seq_set_; | |
83 | std::list<std::string> pinned_slices_; | |
84 | PinnedIteratorsManager pinned_iters_mgr_; | |
85 | }; | |
86 | ||
87 | // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del | |
88 | // meta block into an iterator over non-overlapping tombstone fragments. The | |
89 | // tombstone fragmentation process should be more efficient than the range | |
90 | // tombstone collapsing algorithm in RangeDelAggregator because this leverages | |
91 | // the internal key ordering already provided by the input iterator, if | |
92 | // applicable (when the iterator is unsorted, a new sorted iterator is created | |
93 | // before proceeding). If there are few overlaps, creating a | |
94 | // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator | |
95 | // tombstone collapsing is always O(n log n). | |
96 | class FragmentedRangeTombstoneIterator : public InternalIterator { | |
97 | public: | |
98 | FragmentedRangeTombstoneIterator( | |
99 | const FragmentedRangeTombstoneList* tombstones, | |
100 | const InternalKeyComparator& icmp, SequenceNumber upper_bound, | |
101 | SequenceNumber lower_bound = 0); | |
102 | FragmentedRangeTombstoneIterator( | |
103 | const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones, | |
104 | const InternalKeyComparator& icmp, SequenceNumber upper_bound, | |
105 | SequenceNumber lower_bound = 0); | |
106 | ||
107 | void SeekToFirst() override; | |
108 | void SeekToLast() override; | |
109 | ||
110 | void SeekToTopFirst(); | |
111 | void SeekToTopLast(); | |
112 | ||
113 | // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator | |
114 | // seeking should behave. This is OK because they are not currently used, but | |
115 | // eventually FragmentedRangeTombstoneIterator should no longer implement | |
116 | // InternalIterator. | |
117 | // | |
118 | // Seeks to the range tombstone that covers target at a seqnum in the | |
119 | // snapshot. If no such tombstone exists, seek to the earliest tombstone in | |
120 | // the snapshot that ends after target. | |
121 | void Seek(const Slice& target) override; | |
122 | // Seeks to the range tombstone that covers target at a seqnum in the | |
123 | // snapshot. If no such tombstone exists, seek to the latest tombstone in the | |
124 | // snapshot that starts before target. | |
125 | void SeekForPrev(const Slice& target) override; | |
126 | ||
127 | void Next() override; | |
128 | void Prev() override; | |
129 | ||
130 | void TopNext(); | |
131 | void TopPrev(); | |
132 | ||
133 | bool Valid() const override; | |
134 | Slice key() const override { | |
135 | MaybePinKey(); | |
136 | return current_start_key_.Encode(); | |
137 | } | |
138 | Slice value() const override { return pos_->end_key; } | |
139 | bool IsKeyPinned() const override { return false; } | |
140 | bool IsValuePinned() const override { return true; } | |
141 | Status status() const override { return Status::OK(); } | |
142 | ||
143 | bool empty() const { return tombstones_->empty(); } | |
144 | void Invalidate() { | |
145 | pos_ = tombstones_->end(); | |
146 | seq_pos_ = tombstones_->seq_end(); | |
f67539c2 TL |
147 | pinned_pos_ = tombstones_->end(); |
148 | pinned_seq_pos_ = tombstones_->seq_end(); | |
494da23a TL |
149 | } |
150 | ||
151 | RangeTombstone Tombstone() const { | |
152 | return RangeTombstone(start_key(), end_key(), seq()); | |
153 | } | |
154 | Slice start_key() const { return pos_->start_key; } | |
155 | Slice end_key() const { return pos_->end_key; } | |
156 | SequenceNumber seq() const { return *seq_pos_; } | |
157 | ParsedInternalKey parsed_start_key() const { | |
158 | return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber, | |
159 | kTypeRangeDeletion); | |
160 | } | |
161 | ParsedInternalKey parsed_end_key() const { | |
162 | return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber, | |
163 | kTypeRangeDeletion); | |
164 | } | |
165 | ||
166 | SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key); | |
167 | ||
168 | // Splits the iterator into n+1 iterators (where n is the number of | |
169 | // snapshots), each providing a view over a "stripe" of sequence numbers. The | |
170 | // iterators are keyed by the upper bound of their ranges (the provided | |
171 | // snapshots + kMaxSequenceNumber). | |
172 | // | |
173 | // NOTE: the iterators in the returned map are no longer valid if their | |
174 | // parent iterator is deleted, since they do not modify the refcount of the | |
175 | // underlying tombstone list. Therefore, this map should be deleted before | |
176 | // the parent iterator. | |
177 | std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>> | |
178 | SplitBySnapshot(const std::vector<SequenceNumber>& snapshots); | |
179 | ||
180 | SequenceNumber upper_bound() const { return upper_bound_; } | |
181 | SequenceNumber lower_bound() const { return lower_bound_; } | |
182 | ||
183 | private: | |
184 | using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack; | |
185 | ||
186 | struct RangeTombstoneStackStartComparator { | |
187 | explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {} | |
188 | ||
189 | bool operator()(const RangeTombstoneStack& a, | |
190 | const RangeTombstoneStack& b) const { | |
191 | return cmp->Compare(a.start_key, b.start_key) < 0; | |
192 | } | |
193 | ||
194 | bool operator()(const RangeTombstoneStack& a, const Slice& b) const { | |
195 | return cmp->Compare(a.start_key, b) < 0; | |
196 | } | |
197 | ||
198 | bool operator()(const Slice& a, const RangeTombstoneStack& b) const { | |
199 | return cmp->Compare(a, b.start_key) < 0; | |
200 | } | |
201 | ||
202 | const Comparator* cmp; | |
203 | }; | |
204 | ||
205 | struct RangeTombstoneStackEndComparator { | |
206 | explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {} | |
207 | ||
208 | bool operator()(const RangeTombstoneStack& a, | |
209 | const RangeTombstoneStack& b) const { | |
210 | return cmp->Compare(a.end_key, b.end_key) < 0; | |
211 | } | |
212 | ||
213 | bool operator()(const RangeTombstoneStack& a, const Slice& b) const { | |
214 | return cmp->Compare(a.end_key, b) < 0; | |
215 | } | |
216 | ||
217 | bool operator()(const Slice& a, const RangeTombstoneStack& b) const { | |
218 | return cmp->Compare(a, b.end_key) < 0; | |
219 | } | |
220 | ||
221 | const Comparator* cmp; | |
222 | }; | |
223 | ||
224 | void MaybePinKey() const { | |
225 | if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() && | |
226 | (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) { | |
227 | current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion); | |
228 | pinned_pos_ = pos_; | |
229 | pinned_seq_pos_ = seq_pos_; | |
230 | } | |
231 | } | |
232 | ||
233 | void SeekToCoveringTombstone(const Slice& key); | |
234 | void SeekForPrevToCoveringTombstone(const Slice& key); | |
235 | void ScanForwardToVisibleTombstone(); | |
236 | void ScanBackwardToVisibleTombstone(); | |
237 | bool ValidPos() const { | |
238 | return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx); | |
239 | } | |
240 | ||
241 | const RangeTombstoneStackStartComparator tombstone_start_cmp_; | |
242 | const RangeTombstoneStackEndComparator tombstone_end_cmp_; | |
243 | const InternalKeyComparator* icmp_; | |
244 | const Comparator* ucmp_; | |
245 | std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_; | |
246 | const FragmentedRangeTombstoneList* tombstones_; | |
247 | SequenceNumber upper_bound_; | |
248 | SequenceNumber lower_bound_; | |
249 | std::vector<RangeTombstoneStack>::const_iterator pos_; | |
250 | std::vector<SequenceNumber>::const_iterator seq_pos_; | |
251 | mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_; | |
252 | mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_; | |
253 | mutable InternalKey current_start_key_; | |
254 | }; | |
255 | ||
f67539c2 | 256 | } // namespace ROCKSDB_NAMESPACE |