]>
Commit | Line | Data |
---|---|---|
494da23a | 1 | // Copyright (c) 2018-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | ||
494da23a TL |
8 | #include <algorithm> |
9 | #include <iterator> | |
11fdf7f2 | 10 | #include <list> |
7c673cae | 11 | #include <map> |
11fdf7f2 | 12 | #include <set> |
7c673cae FG |
13 | #include <string> |
14 | #include <vector> | |
15 | ||
16 | #include "db/compaction_iteration_stats.h" | |
17 | #include "db/dbformat.h" | |
18 | #include "db/pinned_iterators_manager.h" | |
494da23a TL |
19 | #include "db/range_del_aggregator.h" |
20 | #include "db/range_tombstone_fragmenter.h" | |
7c673cae FG |
21 | #include "db/version_edit.h" |
22 | #include "include/rocksdb/comparator.h" | |
23 | #include "include/rocksdb/types.h" | |
24 | #include "table/internal_iterator.h" | |
25 | #include "table/scoped_arena_iterator.h" | |
26 | #include "table/table_builder.h" | |
494da23a | 27 | #include "util/heap.h" |
7c673cae FG |
28 | #include "util/kv_map.h" |
29 | ||
30 | namespace rocksdb { | |
31 | ||
494da23a TL |
32 | class TruncatedRangeDelIterator { |
33 | public: | |
34 | TruncatedRangeDelIterator( | |
35 | std::unique_ptr<FragmentedRangeTombstoneIterator> iter, | |
36 | const InternalKeyComparator* icmp, const InternalKey* smallest, | |
37 | const InternalKey* largest); | |
38 | ||
39 | bool Valid() const; | |
40 | ||
41 | void Next(); | |
42 | void Prev(); | |
43 | ||
44 | void InternalNext(); | |
45 | ||
46 | // Seeks to the tombstone with the highest viisble sequence number that covers | |
47 | // target (a user key). If no such tombstone exists, the position will be at | |
48 | // the earliest tombstone that ends after target. | |
49 | void Seek(const Slice& target); | |
50 | ||
51 | // Seeks to the tombstone with the highest viisble sequence number that covers | |
52 | // target (a user key). If no such tombstone exists, the position will be at | |
53 | // the latest tombstone that starts before target. | |
54 | void SeekForPrev(const Slice& target); | |
55 | ||
56 | void SeekToFirst(); | |
57 | void SeekToLast(); | |
58 | ||
59 | ParsedInternalKey start_key() const { | |
60 | return (smallest_ == nullptr || | |
61 | icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0) | |
62 | ? iter_->parsed_start_key() | |
63 | : *smallest_; | |
64 | } | |
65 | ||
66 | ParsedInternalKey end_key() const { | |
67 | return (largest_ == nullptr || | |
68 | icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0) | |
69 | ? iter_->parsed_end_key() | |
70 | : *largest_; | |
71 | } | |
72 | ||
73 | SequenceNumber seq() const { return iter_->seq(); } | |
74 | ||
75 | std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>> | |
76 | SplitBySnapshot(const std::vector<SequenceNumber>& snapshots); | |
77 | ||
78 | SequenceNumber upper_bound() const { return iter_->upper_bound(); } | |
79 | ||
80 | SequenceNumber lower_bound() const { return iter_->lower_bound(); } | |
81 | ||
82 | private: | |
83 | std::unique_ptr<FragmentedRangeTombstoneIterator> iter_; | |
84 | const InternalKeyComparator* icmp_; | |
85 | const ParsedInternalKey* smallest_ = nullptr; | |
86 | const ParsedInternalKey* largest_ = nullptr; | |
87 | std::list<ParsedInternalKey> pinned_bounds_; | |
88 | ||
89 | const InternalKey* smallest_ikey_; | |
90 | const InternalKey* largest_ikey_; | |
91 | }; | |
92 | ||
93 | struct SeqMaxComparator { | |
94 | bool operator()(const TruncatedRangeDelIterator* a, | |
95 | const TruncatedRangeDelIterator* b) const { | |
96 | return a->seq() > b->seq(); | |
97 | } | |
98 | }; | |
99 | ||
100 | struct StartKeyMinComparator { | |
101 | explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {} | |
102 | ||
103 | bool operator()(const TruncatedRangeDelIterator* a, | |
104 | const TruncatedRangeDelIterator* b) const { | |
105 | return icmp->Compare(a->start_key(), b->start_key()) > 0; | |
106 | } | |
107 | ||
108 | const InternalKeyComparator* icmp; | |
11fdf7f2 TL |
109 | }; |
110 | ||
494da23a | 111 | class ForwardRangeDelIterator { |
11fdf7f2 | 112 | public: |
494da23a TL |
113 | explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp); |
114 | ||
115 | bool ShouldDelete(const ParsedInternalKey& parsed); | |
116 | void Invalidate(); | |
117 | ||
118 | void AddNewIter(TruncatedRangeDelIterator* iter, | |
119 | const ParsedInternalKey& parsed) { | |
120 | iter->Seek(parsed.user_key); | |
121 | PushIter(iter, parsed); | |
122 | assert(active_iters_.size() == active_seqnums_.size()); | |
123 | } | |
124 | ||
125 | size_t UnusedIdx() const { return unused_idx_; } | |
126 | void IncUnusedIdx() { unused_idx_++; } | |
127 | ||
128 | private: | |
129 | using ActiveSeqSet = | |
130 | std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>; | |
11fdf7f2 | 131 | |
494da23a TL |
132 | struct EndKeyMinComparator { |
133 | explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {} | |
134 | ||
135 | bool operator()(const ActiveSeqSet::const_iterator& a, | |
136 | const ActiveSeqSet::const_iterator& b) const { | |
137 | return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0; | |
138 | } | |
139 | ||
140 | const InternalKeyComparator* icmp; | |
141 | }; | |
142 | ||
143 | void PushIter(TruncatedRangeDelIterator* iter, | |
144 | const ParsedInternalKey& parsed) { | |
145 | if (!iter->Valid()) { | |
146 | // The iterator has been fully consumed, so we don't need to add it to | |
147 | // either of the heaps. | |
148 | return; | |
149 | } | |
150 | int cmp = icmp_->Compare(parsed, iter->start_key()); | |
151 | if (cmp < 0) { | |
152 | PushInactiveIter(iter); | |
153 | } else { | |
154 | PushActiveIter(iter); | |
155 | } | |
156 | } | |
157 | ||
158 | void PushActiveIter(TruncatedRangeDelIterator* iter) { | |
159 | auto seq_pos = active_seqnums_.insert(iter); | |
160 | active_iters_.push(seq_pos); | |
161 | } | |
162 | ||
163 | TruncatedRangeDelIterator* PopActiveIter() { | |
164 | auto active_top = active_iters_.top(); | |
165 | auto iter = *active_top; | |
166 | active_iters_.pop(); | |
167 | active_seqnums_.erase(active_top); | |
168 | return iter; | |
169 | } | |
170 | ||
171 | void PushInactiveIter(TruncatedRangeDelIterator* iter) { | |
172 | inactive_iters_.push(iter); | |
173 | } | |
174 | ||
175 | TruncatedRangeDelIterator* PopInactiveIter() { | |
176 | auto* iter = inactive_iters_.top(); | |
177 | inactive_iters_.pop(); | |
178 | return iter; | |
179 | } | |
180 | ||
181 | const InternalKeyComparator* icmp_; | |
182 | size_t unused_idx_; | |
183 | ActiveSeqSet active_seqnums_; | |
184 | BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_; | |
185 | BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_; | |
11fdf7f2 TL |
186 | }; |
187 | ||
494da23a | 188 | class ReverseRangeDelIterator { |
11fdf7f2 | 189 | public: |
494da23a | 190 | explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp); |
11fdf7f2 | 191 | |
494da23a TL |
192 | bool ShouldDelete(const ParsedInternalKey& parsed); |
193 | void Invalidate(); | |
194 | ||
195 | void AddNewIter(TruncatedRangeDelIterator* iter, | |
196 | const ParsedInternalKey& parsed) { | |
197 | iter->SeekForPrev(parsed.user_key); | |
198 | PushIter(iter, parsed); | |
199 | assert(active_iters_.size() == active_seqnums_.size()); | |
200 | } | |
201 | ||
202 | size_t UnusedIdx() const { return unused_idx_; } | |
203 | void IncUnusedIdx() { unused_idx_++; } | |
204 | ||
205 | private: | |
206 | using ActiveSeqSet = | |
207 | std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>; | |
11fdf7f2 | 208 | |
494da23a TL |
209 | struct EndKeyMaxComparator { |
210 | explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {} | |
11fdf7f2 | 211 | |
494da23a TL |
212 | bool operator()(const TruncatedRangeDelIterator* a, |
213 | const TruncatedRangeDelIterator* b) const { | |
214 | return icmp->Compare(a->end_key(), b->end_key()) < 0; | |
215 | } | |
216 | ||
217 | const InternalKeyComparator* icmp; | |
218 | }; | |
219 | struct StartKeyMaxComparator { | |
220 | explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {} | |
221 | ||
222 | bool operator()(const ActiveSeqSet::const_iterator& a, | |
223 | const ActiveSeqSet::const_iterator& b) const { | |
224 | return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0; | |
225 | } | |
226 | ||
227 | const InternalKeyComparator* icmp; | |
228 | }; | |
229 | ||
230 | void PushIter(TruncatedRangeDelIterator* iter, | |
231 | const ParsedInternalKey& parsed) { | |
232 | if (!iter->Valid()) { | |
233 | // The iterator has been fully consumed, so we don't need to add it to | |
234 | // either of the heaps. | |
235 | } else if (icmp_->Compare(iter->end_key(), parsed) <= 0) { | |
236 | PushInactiveIter(iter); | |
237 | } else { | |
238 | PushActiveIter(iter); | |
239 | } | |
240 | } | |
241 | ||
242 | void PushActiveIter(TruncatedRangeDelIterator* iter) { | |
243 | auto seq_pos = active_seqnums_.insert(iter); | |
244 | active_iters_.push(seq_pos); | |
245 | } | |
246 | ||
247 | TruncatedRangeDelIterator* PopActiveIter() { | |
248 | auto active_top = active_iters_.top(); | |
249 | auto iter = *active_top; | |
250 | active_iters_.pop(); | |
251 | active_seqnums_.erase(active_top); | |
252 | return iter; | |
253 | } | |
254 | ||
255 | void PushInactiveIter(TruncatedRangeDelIterator* iter) { | |
256 | inactive_iters_.push(iter); | |
257 | } | |
258 | ||
259 | TruncatedRangeDelIterator* PopInactiveIter() { | |
260 | auto* iter = inactive_iters_.top(); | |
261 | inactive_iters_.pop(); | |
262 | return iter; | |
263 | } | |
264 | ||
265 | const InternalKeyComparator* icmp_; | |
266 | size_t unused_idx_; | |
267 | ActiveSeqSet active_seqnums_; | |
268 | BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_; | |
269 | BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_; | |
11fdf7f2 TL |
270 | }; |
271 | ||
494da23a | 272 | enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal }; |
7c673cae FG |
273 | class RangeDelAggregator { |
274 | public: | |
494da23a TL |
275 | explicit RangeDelAggregator(const InternalKeyComparator* icmp) |
276 | : icmp_(icmp) {} | |
277 | virtual ~RangeDelAggregator() {} | |
278 | ||
279 | virtual void AddTombstones( | |
280 | std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter, | |
281 | const InternalKey* smallest = nullptr, | |
282 | const InternalKey* largest = nullptr) = 0; | |
283 | ||
284 | bool ShouldDelete(const Slice& key, RangeDelPositioningMode mode) { | |
285 | ParsedInternalKey parsed; | |
286 | if (!ParseInternalKey(key, &parsed)) { | |
11fdf7f2 TL |
287 | return false; |
288 | } | |
494da23a | 289 | return ShouldDelete(parsed, mode); |
11fdf7f2 | 290 | } |
494da23a TL |
291 | virtual bool ShouldDelete(const ParsedInternalKey& parsed, |
292 | RangeDelPositioningMode mode) = 0; | |
293 | ||
294 | virtual void InvalidateRangeDelMapPositions() = 0; | |
295 | ||
296 | virtual bool IsEmpty() const = 0; | |
297 | ||
298 | bool AddFile(uint64_t file_number) { | |
299 | return files_seen_.insert(file_number).second; | |
300 | } | |
301 | ||
302 | protected: | |
303 | class StripeRep { | |
304 | public: | |
305 | StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound, | |
306 | SequenceNumber lower_bound) | |
307 | : icmp_(icmp), | |
308 | forward_iter_(icmp), | |
309 | reverse_iter_(icmp), | |
310 | upper_bound_(upper_bound), | |
311 | lower_bound_(lower_bound) {} | |
312 | ||
313 | void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) { | |
314 | iters_.push_back(std::move(input_iter)); | |
11fdf7f2 | 315 | } |
494da23a TL |
316 | |
317 | bool IsEmpty() const { return iters_.empty(); } | |
318 | ||
319 | bool ShouldDelete(const ParsedInternalKey& parsed, | |
320 | RangeDelPositioningMode mode); | |
321 | ||
322 | void Invalidate() { | |
323 | InvalidateForwardIter(); | |
324 | InvalidateReverseIter(); | |
325 | } | |
326 | ||
327 | bool IsRangeOverlapped(const Slice& start, const Slice& end); | |
328 | ||
329 | private: | |
330 | bool InStripe(SequenceNumber seq) const { | |
331 | return lower_bound_ <= seq && seq <= upper_bound_; | |
332 | } | |
333 | ||
334 | void InvalidateForwardIter() { forward_iter_.Invalidate(); } | |
335 | ||
336 | void InvalidateReverseIter() { reverse_iter_.Invalidate(); } | |
337 | ||
338 | const InternalKeyComparator* icmp_; | |
339 | std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_; | |
340 | ForwardRangeDelIterator forward_iter_; | |
341 | ReverseRangeDelIterator reverse_iter_; | |
342 | SequenceNumber upper_bound_; | |
343 | SequenceNumber lower_bound_; | |
344 | }; | |
345 | ||
346 | const InternalKeyComparator* icmp_; | |
347 | ||
348 | private: | |
349 | std::set<uint64_t> files_seen_; | |
350 | }; | |
351 | ||
352 | class ReadRangeDelAggregator : public RangeDelAggregator { | |
353 | public: | |
354 | ReadRangeDelAggregator(const InternalKeyComparator* icmp, | |
355 | SequenceNumber upper_bound) | |
356 | : RangeDelAggregator(icmp), | |
357 | rep_(icmp, upper_bound, 0 /* lower_bound */) {} | |
358 | ~ReadRangeDelAggregator() override {} | |
359 | ||
360 | using RangeDelAggregator::ShouldDelete; | |
361 | void AddTombstones( | |
362 | std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter, | |
363 | const InternalKey* smallest = nullptr, | |
364 | const InternalKey* largest = nullptr) override; | |
365 | ||
366 | bool ShouldDelete(const ParsedInternalKey& parsed, | |
367 | RangeDelPositioningMode mode) override; | |
368 | ||
11fdf7f2 | 369 | bool IsRangeOverlapped(const Slice& start, const Slice& end); |
7c673cae | 370 | |
494da23a TL |
371 | void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); } |
372 | ||
373 | bool IsEmpty() const override { return rep_.IsEmpty(); } | |
7c673cae | 374 | |
11fdf7f2 | 375 | private: |
494da23a TL |
376 | StripeRep rep_; |
377 | }; | |
378 | ||
379 | class CompactionRangeDelAggregator : public RangeDelAggregator { | |
380 | public: | |
381 | CompactionRangeDelAggregator(const InternalKeyComparator* icmp, | |
382 | const std::vector<SequenceNumber>& snapshots) | |
383 | : RangeDelAggregator(icmp), snapshots_(&snapshots) {} | |
384 | ~CompactionRangeDelAggregator() override {} | |
385 | ||
386 | void AddTombstones( | |
387 | std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter, | |
388 | const InternalKey* smallest = nullptr, | |
389 | const InternalKey* largest = nullptr) override; | |
390 | ||
391 | using RangeDelAggregator::ShouldDelete; | |
392 | bool ShouldDelete(const ParsedInternalKey& parsed, | |
393 | RangeDelPositioningMode mode) override; | |
394 | ||
395 | bool IsRangeOverlapped(const Slice& start, const Slice& end); | |
396 | ||
397 | void InvalidateRangeDelMapPositions() override { | |
398 | for (auto& rep : reps_) { | |
399 | rep.second.Invalidate(); | |
400 | } | |
401 | } | |
402 | ||
403 | bool IsEmpty() const override { | |
404 | for (const auto& rep : reps_) { | |
405 | if (!rep.second.IsEmpty()) { | |
406 | return false; | |
407 | } | |
408 | } | |
409 | return true; | |
410 | } | |
411 | ||
412 | // Creates an iterator over all the range tombstones in the aggregator, for | |
413 | // use in compaction. Nullptr arguments indicate that the iterator range is | |
414 | // unbounded. | |
415 | // NOTE: the boundaries are used for optimization purposes to reduce the | |
416 | // number of tombstones that are passed to the fragmenter; they do not | |
417 | // guarantee that the resulting iterator only contains range tombstones that | |
418 | // cover keys in the provided range. If required, these bounds must be | |
419 | // enforced during iteration. | |
420 | std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator( | |
421 | const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr, | |
422 | bool upper_bound_inclusive = false); | |
423 | ||
424 | private: | |
425 | std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_; | |
426 | std::map<SequenceNumber, StripeRep> reps_; | |
427 | ||
428 | const std::vector<SequenceNumber>* snapshots_; | |
7c673cae FG |
429 | }; |
430 | ||
431 | } // namespace rocksdb |