]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/range_del_aggregator.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / db / range_del_aggregator.h
1 // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7
8 #include <algorithm>
9 #include <iterator>
10 #include <list>
11 #include <map>
12 #include <set>
13 #include <string>
14 #include <vector>
15
16 #include "db/compaction_iteration_stats.h"
17 #include "db/dbformat.h"
18 #include "db/pinned_iterators_manager.h"
19 #include "db/range_del_aggregator.h"
20 #include "db/range_tombstone_fragmenter.h"
21 #include "db/version_edit.h"
22 #include "include/rocksdb/comparator.h"
23 #include "include/rocksdb/types.h"
24 #include "table/internal_iterator.h"
25 #include "table/scoped_arena_iterator.h"
26 #include "table/table_builder.h"
27 #include "util/heap.h"
28 #include "util/kv_map.h"
29
30 namespace rocksdb {
31
32 class TruncatedRangeDelIterator {
33 public:
34 TruncatedRangeDelIterator(
35 std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
36 const InternalKeyComparator* icmp, const InternalKey* smallest,
37 const InternalKey* largest);
38
39 bool Valid() const;
40
41 void Next();
42 void Prev();
43
44 void InternalNext();
45
46 // Seeks to the tombstone with the highest viisble sequence number that covers
47 // target (a user key). If no such tombstone exists, the position will be at
48 // the earliest tombstone that ends after target.
49 void Seek(const Slice& target);
50
51 // Seeks to the tombstone with the highest viisble sequence number that covers
52 // target (a user key). If no such tombstone exists, the position will be at
53 // the latest tombstone that starts before target.
54 void SeekForPrev(const Slice& target);
55
56 void SeekToFirst();
57 void SeekToLast();
58
59 ParsedInternalKey start_key() const {
60 return (smallest_ == nullptr ||
61 icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
62 ? iter_->parsed_start_key()
63 : *smallest_;
64 }
65
66 ParsedInternalKey end_key() const {
67 return (largest_ == nullptr ||
68 icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
69 ? iter_->parsed_end_key()
70 : *largest_;
71 }
72
73 SequenceNumber seq() const { return iter_->seq(); }
74
75 std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
76 SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
77
78 SequenceNumber upper_bound() const { return iter_->upper_bound(); }
79
80 SequenceNumber lower_bound() const { return iter_->lower_bound(); }
81
82 private:
83 std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
84 const InternalKeyComparator* icmp_;
85 const ParsedInternalKey* smallest_ = nullptr;
86 const ParsedInternalKey* largest_ = nullptr;
87 std::list<ParsedInternalKey> pinned_bounds_;
88
89 const InternalKey* smallest_ikey_;
90 const InternalKey* largest_ikey_;
91 };
92
93 struct SeqMaxComparator {
94 bool operator()(const TruncatedRangeDelIterator* a,
95 const TruncatedRangeDelIterator* b) const {
96 return a->seq() > b->seq();
97 }
98 };
99
100 struct StartKeyMinComparator {
101 explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
102
103 bool operator()(const TruncatedRangeDelIterator* a,
104 const TruncatedRangeDelIterator* b) const {
105 return icmp->Compare(a->start_key(), b->start_key()) > 0;
106 }
107
108 const InternalKeyComparator* icmp;
109 };
110
111 class ForwardRangeDelIterator {
112 public:
113 explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp);
114
115 bool ShouldDelete(const ParsedInternalKey& parsed);
116 void Invalidate();
117
118 void AddNewIter(TruncatedRangeDelIterator* iter,
119 const ParsedInternalKey& parsed) {
120 iter->Seek(parsed.user_key);
121 PushIter(iter, parsed);
122 assert(active_iters_.size() == active_seqnums_.size());
123 }
124
125 size_t UnusedIdx() const { return unused_idx_; }
126 void IncUnusedIdx() { unused_idx_++; }
127
128 private:
129 using ActiveSeqSet =
130 std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
131
132 struct EndKeyMinComparator {
133 explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
134
135 bool operator()(const ActiveSeqSet::const_iterator& a,
136 const ActiveSeqSet::const_iterator& b) const {
137 return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
138 }
139
140 const InternalKeyComparator* icmp;
141 };
142
143 void PushIter(TruncatedRangeDelIterator* iter,
144 const ParsedInternalKey& parsed) {
145 if (!iter->Valid()) {
146 // The iterator has been fully consumed, so we don't need to add it to
147 // either of the heaps.
148 return;
149 }
150 int cmp = icmp_->Compare(parsed, iter->start_key());
151 if (cmp < 0) {
152 PushInactiveIter(iter);
153 } else {
154 PushActiveIter(iter);
155 }
156 }
157
158 void PushActiveIter(TruncatedRangeDelIterator* iter) {
159 auto seq_pos = active_seqnums_.insert(iter);
160 active_iters_.push(seq_pos);
161 }
162
163 TruncatedRangeDelIterator* PopActiveIter() {
164 auto active_top = active_iters_.top();
165 auto iter = *active_top;
166 active_iters_.pop();
167 active_seqnums_.erase(active_top);
168 return iter;
169 }
170
171 void PushInactiveIter(TruncatedRangeDelIterator* iter) {
172 inactive_iters_.push(iter);
173 }
174
175 TruncatedRangeDelIterator* PopInactiveIter() {
176 auto* iter = inactive_iters_.top();
177 inactive_iters_.pop();
178 return iter;
179 }
180
181 const InternalKeyComparator* icmp_;
182 size_t unused_idx_;
183 ActiveSeqSet active_seqnums_;
184 BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
185 BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
186 };
187
188 class ReverseRangeDelIterator {
189 public:
190 explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp);
191
192 bool ShouldDelete(const ParsedInternalKey& parsed);
193 void Invalidate();
194
195 void AddNewIter(TruncatedRangeDelIterator* iter,
196 const ParsedInternalKey& parsed) {
197 iter->SeekForPrev(parsed.user_key);
198 PushIter(iter, parsed);
199 assert(active_iters_.size() == active_seqnums_.size());
200 }
201
202 size_t UnusedIdx() const { return unused_idx_; }
203 void IncUnusedIdx() { unused_idx_++; }
204
205 private:
206 using ActiveSeqSet =
207 std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
208
209 struct EndKeyMaxComparator {
210 explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
211
212 bool operator()(const TruncatedRangeDelIterator* a,
213 const TruncatedRangeDelIterator* b) const {
214 return icmp->Compare(a->end_key(), b->end_key()) < 0;
215 }
216
217 const InternalKeyComparator* icmp;
218 };
219 struct StartKeyMaxComparator {
220 explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
221
222 bool operator()(const ActiveSeqSet::const_iterator& a,
223 const ActiveSeqSet::const_iterator& b) const {
224 return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
225 }
226
227 const InternalKeyComparator* icmp;
228 };
229
230 void PushIter(TruncatedRangeDelIterator* iter,
231 const ParsedInternalKey& parsed) {
232 if (!iter->Valid()) {
233 // The iterator has been fully consumed, so we don't need to add it to
234 // either of the heaps.
235 } else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
236 PushInactiveIter(iter);
237 } else {
238 PushActiveIter(iter);
239 }
240 }
241
242 void PushActiveIter(TruncatedRangeDelIterator* iter) {
243 auto seq_pos = active_seqnums_.insert(iter);
244 active_iters_.push(seq_pos);
245 }
246
247 TruncatedRangeDelIterator* PopActiveIter() {
248 auto active_top = active_iters_.top();
249 auto iter = *active_top;
250 active_iters_.pop();
251 active_seqnums_.erase(active_top);
252 return iter;
253 }
254
255 void PushInactiveIter(TruncatedRangeDelIterator* iter) {
256 inactive_iters_.push(iter);
257 }
258
259 TruncatedRangeDelIterator* PopInactiveIter() {
260 auto* iter = inactive_iters_.top();
261 inactive_iters_.pop();
262 return iter;
263 }
264
265 const InternalKeyComparator* icmp_;
266 size_t unused_idx_;
267 ActiveSeqSet active_seqnums_;
268 BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
269 BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
270 };
271
272 enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal };
273 class RangeDelAggregator {
274 public:
275 explicit RangeDelAggregator(const InternalKeyComparator* icmp)
276 : icmp_(icmp) {}
277 virtual ~RangeDelAggregator() {}
278
279 virtual void AddTombstones(
280 std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
281 const InternalKey* smallest = nullptr,
282 const InternalKey* largest = nullptr) = 0;
283
284 bool ShouldDelete(const Slice& key, RangeDelPositioningMode mode) {
285 ParsedInternalKey parsed;
286 if (!ParseInternalKey(key, &parsed)) {
287 return false;
288 }
289 return ShouldDelete(parsed, mode);
290 }
291 virtual bool ShouldDelete(const ParsedInternalKey& parsed,
292 RangeDelPositioningMode mode) = 0;
293
294 virtual void InvalidateRangeDelMapPositions() = 0;
295
296 virtual bool IsEmpty() const = 0;
297
298 bool AddFile(uint64_t file_number) {
299 return files_seen_.insert(file_number).second;
300 }
301
302 protected:
303 class StripeRep {
304 public:
305 StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound,
306 SequenceNumber lower_bound)
307 : icmp_(icmp),
308 forward_iter_(icmp),
309 reverse_iter_(icmp),
310 upper_bound_(upper_bound),
311 lower_bound_(lower_bound) {}
312
313 void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) {
314 iters_.push_back(std::move(input_iter));
315 }
316
317 bool IsEmpty() const { return iters_.empty(); }
318
319 bool ShouldDelete(const ParsedInternalKey& parsed,
320 RangeDelPositioningMode mode);
321
322 void Invalidate() {
323 InvalidateForwardIter();
324 InvalidateReverseIter();
325 }
326
327 bool IsRangeOverlapped(const Slice& start, const Slice& end);
328
329 private:
330 bool InStripe(SequenceNumber seq) const {
331 return lower_bound_ <= seq && seq <= upper_bound_;
332 }
333
334 void InvalidateForwardIter() { forward_iter_.Invalidate(); }
335
336 void InvalidateReverseIter() { reverse_iter_.Invalidate(); }
337
338 const InternalKeyComparator* icmp_;
339 std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
340 ForwardRangeDelIterator forward_iter_;
341 ReverseRangeDelIterator reverse_iter_;
342 SequenceNumber upper_bound_;
343 SequenceNumber lower_bound_;
344 };
345
346 const InternalKeyComparator* icmp_;
347
348 private:
349 std::set<uint64_t> files_seen_;
350 };
351
352 class ReadRangeDelAggregator : public RangeDelAggregator {
353 public:
354 ReadRangeDelAggregator(const InternalKeyComparator* icmp,
355 SequenceNumber upper_bound)
356 : RangeDelAggregator(icmp),
357 rep_(icmp, upper_bound, 0 /* lower_bound */) {}
358 ~ReadRangeDelAggregator() override {}
359
360 using RangeDelAggregator::ShouldDelete;
361 void AddTombstones(
362 std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
363 const InternalKey* smallest = nullptr,
364 const InternalKey* largest = nullptr) override;
365
366 bool ShouldDelete(const ParsedInternalKey& parsed,
367 RangeDelPositioningMode mode) override;
368
369 bool IsRangeOverlapped(const Slice& start, const Slice& end);
370
371 void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); }
372
373 bool IsEmpty() const override { return rep_.IsEmpty(); }
374
375 private:
376 StripeRep rep_;
377 };
378
379 class CompactionRangeDelAggregator : public RangeDelAggregator {
380 public:
381 CompactionRangeDelAggregator(const InternalKeyComparator* icmp,
382 const std::vector<SequenceNumber>& snapshots)
383 : RangeDelAggregator(icmp), snapshots_(&snapshots) {}
384 ~CompactionRangeDelAggregator() override {}
385
386 void AddTombstones(
387 std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
388 const InternalKey* smallest = nullptr,
389 const InternalKey* largest = nullptr) override;
390
391 using RangeDelAggregator::ShouldDelete;
392 bool ShouldDelete(const ParsedInternalKey& parsed,
393 RangeDelPositioningMode mode) override;
394
395 bool IsRangeOverlapped(const Slice& start, const Slice& end);
396
397 void InvalidateRangeDelMapPositions() override {
398 for (auto& rep : reps_) {
399 rep.second.Invalidate();
400 }
401 }
402
403 bool IsEmpty() const override {
404 for (const auto& rep : reps_) {
405 if (!rep.second.IsEmpty()) {
406 return false;
407 }
408 }
409 return true;
410 }
411
412 // Creates an iterator over all the range tombstones in the aggregator, for
413 // use in compaction. Nullptr arguments indicate that the iterator range is
414 // unbounded.
415 // NOTE: the boundaries are used for optimization purposes to reduce the
416 // number of tombstones that are passed to the fragmenter; they do not
417 // guarantee that the resulting iterator only contains range tombstones that
418 // cover keys in the provided range. If required, these bounds must be
419 // enforced during iteration.
420 std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator(
421 const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr,
422 bool upper_bound_inclusive = false);
423
424 private:
425 std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_;
426 std::map<SequenceNumber, StripeRep> reps_;
427
428 const std::vector<SequenceNumber>* snapshots_;
429 };
430
431 } // namespace rocksdb