]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/range_tombstone_fragmenter.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / db / range_tombstone_fragmenter.h
CommitLineData
494da23a
TL
1// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#pragma once
7
8#include <list>
9#include <memory>
10#include <set>
11#include <string>
12#include <vector>
13
14#include "db/dbformat.h"
15#include "db/pinned_iterators_manager.h"
16#include "rocksdb/status.h"
17#include "table/internal_iterator.h"
18
f67539c2 19namespace ROCKSDB_NAMESPACE {
494da23a
TL
20
21struct FragmentedRangeTombstoneList {
22 public:
23 // A compact representation of a "stack" of range tombstone fragments, which
24 // start and end at the same user keys but have different sequence numbers.
25 // The members seq_start_idx and seq_end_idx are intended to be parameters to
26 // seq_iter().
27 struct RangeTombstoneStack {
28 RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx,
29 size_t end_idx)
30 : start_key(start),
31 end_key(end),
32 seq_start_idx(start_idx),
33 seq_end_idx(end_idx) {}
34
35 Slice start_key;
36 Slice end_key;
37 size_t seq_start_idx;
38 size_t seq_end_idx;
39 };
40 FragmentedRangeTombstoneList(
41 std::unique_ptr<InternalIterator> unfragmented_tombstones,
42 const InternalKeyComparator& icmp, bool for_compaction = false,
43 const std::vector<SequenceNumber>& snapshots = {});
44
45 std::vector<RangeTombstoneStack>::const_iterator begin() const {
46 return tombstones_.begin();
47 }
48
49 std::vector<RangeTombstoneStack>::const_iterator end() const {
50 return tombstones_.end();
51 }
52
53 std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const {
54 return std::next(tombstone_seqs_.begin(), idx);
55 }
56
57 std::vector<SequenceNumber>::const_iterator seq_begin() const {
58 return tombstone_seqs_.begin();
59 }
60
61 std::vector<SequenceNumber>::const_iterator seq_end() const {
62 return tombstone_seqs_.end();
63 }
64
65 bool empty() const { return tombstones_.empty(); }
66
67 // Returns true if the stored tombstones contain with one with a sequence
68 // number in [lower, upper].
69 bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const;
70
71 private:
72 // Given an ordered range tombstone iterator unfragmented_tombstones,
73 // "fragment" the tombstones into non-overlapping pieces, and store them in
74 // tombstones_ and tombstone_seqs_.
75 void FragmentTombstones(
76 std::unique_ptr<InternalIterator> unfragmented_tombstones,
77 const InternalKeyComparator& icmp, bool for_compaction,
78 const std::vector<SequenceNumber>& snapshots);
79
80 std::vector<RangeTombstoneStack> tombstones_;
81 std::vector<SequenceNumber> tombstone_seqs_;
82 std::set<SequenceNumber> seq_set_;
83 std::list<std::string> pinned_slices_;
84 PinnedIteratorsManager pinned_iters_mgr_;
85};
86
87// FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
88// meta block into an iterator over non-overlapping tombstone fragments. The
89// tombstone fragmentation process should be more efficient than the range
90// tombstone collapsing algorithm in RangeDelAggregator because this leverages
91// the internal key ordering already provided by the input iterator, if
92// applicable (when the iterator is unsorted, a new sorted iterator is created
93// before proceeding). If there are few overlaps, creating a
94// FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator
95// tombstone collapsing is always O(n log n).
96class FragmentedRangeTombstoneIterator : public InternalIterator {
97 public:
98 FragmentedRangeTombstoneIterator(
99 const FragmentedRangeTombstoneList* tombstones,
100 const InternalKeyComparator& icmp, SequenceNumber upper_bound,
101 SequenceNumber lower_bound = 0);
102 FragmentedRangeTombstoneIterator(
103 const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
104 const InternalKeyComparator& icmp, SequenceNumber upper_bound,
105 SequenceNumber lower_bound = 0);
106
107 void SeekToFirst() override;
108 void SeekToLast() override;
109
110 void SeekToTopFirst();
111 void SeekToTopLast();
112
113 // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator
114 // seeking should behave. This is OK because they are not currently used, but
115 // eventually FragmentedRangeTombstoneIterator should no longer implement
116 // InternalIterator.
117 //
118 // Seeks to the range tombstone that covers target at a seqnum in the
119 // snapshot. If no such tombstone exists, seek to the earliest tombstone in
120 // the snapshot that ends after target.
121 void Seek(const Slice& target) override;
122 // Seeks to the range tombstone that covers target at a seqnum in the
123 // snapshot. If no such tombstone exists, seek to the latest tombstone in the
124 // snapshot that starts before target.
125 void SeekForPrev(const Slice& target) override;
126
127 void Next() override;
128 void Prev() override;
129
130 void TopNext();
131 void TopPrev();
132
133 bool Valid() const override;
134 Slice key() const override {
135 MaybePinKey();
136 return current_start_key_.Encode();
137 }
138 Slice value() const override { return pos_->end_key; }
139 bool IsKeyPinned() const override { return false; }
140 bool IsValuePinned() const override { return true; }
141 Status status() const override { return Status::OK(); }
142
143 bool empty() const { return tombstones_->empty(); }
144 void Invalidate() {
145 pos_ = tombstones_->end();
146 seq_pos_ = tombstones_->seq_end();
f67539c2
TL
147 pinned_pos_ = tombstones_->end();
148 pinned_seq_pos_ = tombstones_->seq_end();
494da23a
TL
149 }
150
151 RangeTombstone Tombstone() const {
152 return RangeTombstone(start_key(), end_key(), seq());
153 }
154 Slice start_key() const { return pos_->start_key; }
155 Slice end_key() const { return pos_->end_key; }
156 SequenceNumber seq() const { return *seq_pos_; }
157 ParsedInternalKey parsed_start_key() const {
158 return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber,
159 kTypeRangeDeletion);
160 }
161 ParsedInternalKey parsed_end_key() const {
162 return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
163 kTypeRangeDeletion);
164 }
165
166 SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
167
168 // Splits the iterator into n+1 iterators (where n is the number of
169 // snapshots), each providing a view over a "stripe" of sequence numbers. The
170 // iterators are keyed by the upper bound of their ranges (the provided
171 // snapshots + kMaxSequenceNumber).
172 //
173 // NOTE: the iterators in the returned map are no longer valid if their
174 // parent iterator is deleted, since they do not modify the refcount of the
175 // underlying tombstone list. Therefore, this map should be deleted before
176 // the parent iterator.
177 std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
178 SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
179
180 SequenceNumber upper_bound() const { return upper_bound_; }
181 SequenceNumber lower_bound() const { return lower_bound_; }
182
183 private:
184 using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
185
186 struct RangeTombstoneStackStartComparator {
187 explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {}
188
189 bool operator()(const RangeTombstoneStack& a,
190 const RangeTombstoneStack& b) const {
191 return cmp->Compare(a.start_key, b.start_key) < 0;
192 }
193
194 bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
195 return cmp->Compare(a.start_key, b) < 0;
196 }
197
198 bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
199 return cmp->Compare(a, b.start_key) < 0;
200 }
201
202 const Comparator* cmp;
203 };
204
205 struct RangeTombstoneStackEndComparator {
206 explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {}
207
208 bool operator()(const RangeTombstoneStack& a,
209 const RangeTombstoneStack& b) const {
210 return cmp->Compare(a.end_key, b.end_key) < 0;
211 }
212
213 bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
214 return cmp->Compare(a.end_key, b) < 0;
215 }
216
217 bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
218 return cmp->Compare(a, b.end_key) < 0;
219 }
220
221 const Comparator* cmp;
222 };
223
224 void MaybePinKey() const {
225 if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() &&
226 (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) {
227 current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion);
228 pinned_pos_ = pos_;
229 pinned_seq_pos_ = seq_pos_;
230 }
231 }
232
233 void SeekToCoveringTombstone(const Slice& key);
234 void SeekForPrevToCoveringTombstone(const Slice& key);
235 void ScanForwardToVisibleTombstone();
236 void ScanBackwardToVisibleTombstone();
237 bool ValidPos() const {
238 return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx);
239 }
240
241 const RangeTombstoneStackStartComparator tombstone_start_cmp_;
242 const RangeTombstoneStackEndComparator tombstone_end_cmp_;
243 const InternalKeyComparator* icmp_;
244 const Comparator* ucmp_;
245 std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_;
246 const FragmentedRangeTombstoneList* tombstones_;
247 SequenceNumber upper_bound_;
248 SequenceNumber lower_bound_;
249 std::vector<RangeTombstoneStack>::const_iterator pos_;
250 std::vector<SequenceNumber>::const_iterator seq_pos_;
251 mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
252 mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_;
253 mutable InternalKey current_start_key_;
254};
255
f67539c2 256} // namespace ROCKSDB_NAMESPACE