1 // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
14 #include "db/dbformat.h"
15 #include "db/pinned_iterators_manager.h"
16 #include "rocksdb/status.h"
17 #include "table/internal_iterator.h"
21 struct FragmentedRangeTombstoneList
{
23 // A compact representation of a "stack" of range tombstone fragments, which
24 // start and end at the same user keys but have different sequence numbers.
25 // The members seq_start_idx and seq_end_idx are intended to be parameters to
27 struct RangeTombstoneStack
{
28 RangeTombstoneStack(const Slice
& start
, const Slice
& end
, size_t start_idx
,
32 seq_start_idx(start_idx
),
33 seq_end_idx(end_idx
) {}
40 FragmentedRangeTombstoneList(
41 std::unique_ptr
<InternalIterator
> unfragmented_tombstones
,
42 const InternalKeyComparator
& icmp
, bool for_compaction
= false,
43 const std::vector
<SequenceNumber
>& snapshots
= {});
45 std::vector
<RangeTombstoneStack
>::const_iterator
begin() const {
46 return tombstones_
.begin();
49 std::vector
<RangeTombstoneStack
>::const_iterator
end() const {
50 return tombstones_
.end();
53 std::vector
<SequenceNumber
>::const_iterator
seq_iter(size_t idx
) const {
54 return std::next(tombstone_seqs_
.begin(), idx
);
57 std::vector
<SequenceNumber
>::const_iterator
seq_begin() const {
58 return tombstone_seqs_
.begin();
61 std::vector
<SequenceNumber
>::const_iterator
seq_end() const {
62 return tombstone_seqs_
.end();
65 bool empty() const { return tombstones_
.empty(); }
67 // Returns true if the stored tombstones contain with one with a sequence
68 // number in [lower, upper].
69 bool ContainsRange(SequenceNumber lower
, SequenceNumber upper
) const;
72 // Given an ordered range tombstone iterator unfragmented_tombstones,
73 // "fragment" the tombstones into non-overlapping pieces, and store them in
74 // tombstones_ and tombstone_seqs_.
75 void FragmentTombstones(
76 std::unique_ptr
<InternalIterator
> unfragmented_tombstones
,
77 const InternalKeyComparator
& icmp
, bool for_compaction
,
78 const std::vector
<SequenceNumber
>& snapshots
);
80 std::vector
<RangeTombstoneStack
> tombstones_
;
81 std::vector
<SequenceNumber
> tombstone_seqs_
;
82 std::set
<SequenceNumber
> seq_set_
;
83 std::list
<std::string
> pinned_slices_
;
84 PinnedIteratorsManager pinned_iters_mgr_
;
87 // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
88 // meta block into an iterator over non-overlapping tombstone fragments. The
89 // tombstone fragmentation process should be more efficient than the range
90 // tombstone collapsing algorithm in RangeDelAggregator because this leverages
91 // the internal key ordering already provided by the input iterator, if
92 // applicable (when the iterator is unsorted, a new sorted iterator is created
93 // before proceeding). If there are few overlaps, creating a
94 // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator
95 // tombstone collapsing is always O(n log n).
96 class FragmentedRangeTombstoneIterator
: public InternalIterator
{
98 FragmentedRangeTombstoneIterator(
99 const FragmentedRangeTombstoneList
* tombstones
,
100 const InternalKeyComparator
& icmp
, SequenceNumber upper_bound
,
101 SequenceNumber lower_bound
= 0);
102 FragmentedRangeTombstoneIterator(
103 const std::shared_ptr
<const FragmentedRangeTombstoneList
>& tombstones
,
104 const InternalKeyComparator
& icmp
, SequenceNumber upper_bound
,
105 SequenceNumber lower_bound
= 0);
107 void SeekToFirst() override
;
108 void SeekToLast() override
;
110 void SeekToTopFirst();
111 void SeekToTopLast();
113 // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator
114 // seeking should behave. This is OK because they are not currently used, but
115 // eventually FragmentedRangeTombstoneIterator should no longer implement
118 // Seeks to the range tombstone that covers target at a seqnum in the
119 // snapshot. If no such tombstone exists, seek to the earliest tombstone in
120 // the snapshot that ends after target.
121 void Seek(const Slice
& target
) override
;
122 // Seeks to the range tombstone that covers target at a seqnum in the
123 // snapshot. If no such tombstone exists, seek to the latest tombstone in the
124 // snapshot that starts before target.
125 void SeekForPrev(const Slice
& target
) override
;
127 void Next() override
;
128 void Prev() override
;
133 bool Valid() const override
;
134 Slice
key() const override
{
136 return current_start_key_
.Encode();
138 Slice
value() const override
{ return pos_
->end_key
; }
139 bool IsKeyPinned() const override
{ return false; }
140 bool IsValuePinned() const override
{ return true; }
141 Status
status() const override
{ return Status::OK(); }
143 bool empty() const { return tombstones_
->empty(); }
145 pos_
= tombstones_
->end();
146 seq_pos_
= tombstones_
->seq_end();
149 RangeTombstone
Tombstone() const {
150 return RangeTombstone(start_key(), end_key(), seq());
152 Slice
start_key() const { return pos_
->start_key
; }
153 Slice
end_key() const { return pos_
->end_key
; }
154 SequenceNumber
seq() const { return *seq_pos_
; }
155 ParsedInternalKey
parsed_start_key() const {
156 return ParsedInternalKey(pos_
->start_key
, kMaxSequenceNumber
,
159 ParsedInternalKey
parsed_end_key() const {
160 return ParsedInternalKey(pos_
->end_key
, kMaxSequenceNumber
,
164 SequenceNumber
MaxCoveringTombstoneSeqnum(const Slice
& user_key
);
166 // Splits the iterator into n+1 iterators (where n is the number of
167 // snapshots), each providing a view over a "stripe" of sequence numbers. The
168 // iterators are keyed by the upper bound of their ranges (the provided
169 // snapshots + kMaxSequenceNumber).
171 // NOTE: the iterators in the returned map are no longer valid if their
172 // parent iterator is deleted, since they do not modify the refcount of the
173 // underlying tombstone list. Therefore, this map should be deleted before
174 // the parent iterator.
175 std::map
<SequenceNumber
, std::unique_ptr
<FragmentedRangeTombstoneIterator
>>
176 SplitBySnapshot(const std::vector
<SequenceNumber
>& snapshots
);
178 SequenceNumber
upper_bound() const { return upper_bound_
; }
179 SequenceNumber
lower_bound() const { return lower_bound_
; }
182 using RangeTombstoneStack
= FragmentedRangeTombstoneList::RangeTombstoneStack
;
184 struct RangeTombstoneStackStartComparator
{
185 explicit RangeTombstoneStackStartComparator(const Comparator
* c
) : cmp(c
) {}
187 bool operator()(const RangeTombstoneStack
& a
,
188 const RangeTombstoneStack
& b
) const {
189 return cmp
->Compare(a
.start_key
, b
.start_key
) < 0;
192 bool operator()(const RangeTombstoneStack
& a
, const Slice
& b
) const {
193 return cmp
->Compare(a
.start_key
, b
) < 0;
196 bool operator()(const Slice
& a
, const RangeTombstoneStack
& b
) const {
197 return cmp
->Compare(a
, b
.start_key
) < 0;
200 const Comparator
* cmp
;
203 struct RangeTombstoneStackEndComparator
{
204 explicit RangeTombstoneStackEndComparator(const Comparator
* c
) : cmp(c
) {}
206 bool operator()(const RangeTombstoneStack
& a
,
207 const RangeTombstoneStack
& b
) const {
208 return cmp
->Compare(a
.end_key
, b
.end_key
) < 0;
211 bool operator()(const RangeTombstoneStack
& a
, const Slice
& b
) const {
212 return cmp
->Compare(a
.end_key
, b
) < 0;
215 bool operator()(const Slice
& a
, const RangeTombstoneStack
& b
) const {
216 return cmp
->Compare(a
, b
.end_key
) < 0;
219 const Comparator
* cmp
;
222 void MaybePinKey() const {
223 if (pos_
!= tombstones_
->end() && seq_pos_
!= tombstones_
->seq_end() &&
224 (pinned_pos_
!= pos_
|| pinned_seq_pos_
!= seq_pos_
)) {
225 current_start_key_
.Set(pos_
->start_key
, *seq_pos_
, kTypeRangeDeletion
);
227 pinned_seq_pos_
= seq_pos_
;
231 void SeekToCoveringTombstone(const Slice
& key
);
232 void SeekForPrevToCoveringTombstone(const Slice
& key
);
233 void ScanForwardToVisibleTombstone();
234 void ScanBackwardToVisibleTombstone();
235 bool ValidPos() const {
236 return Valid() && seq_pos_
!= tombstones_
->seq_iter(pos_
->seq_end_idx
);
239 const RangeTombstoneStackStartComparator tombstone_start_cmp_
;
240 const RangeTombstoneStackEndComparator tombstone_end_cmp_
;
241 const InternalKeyComparator
* icmp_
;
242 const Comparator
* ucmp_
;
243 std::shared_ptr
<const FragmentedRangeTombstoneList
> tombstones_ref_
;
244 const FragmentedRangeTombstoneList
* tombstones_
;
245 SequenceNumber upper_bound_
;
246 SequenceNumber lower_bound_
;
247 std::vector
<RangeTombstoneStack
>::const_iterator pos_
;
248 std::vector
<SequenceNumber
>::const_iterator seq_pos_
;
249 mutable std::vector
<RangeTombstoneStack
>::const_iterator pinned_pos_
;
250 mutable std::vector
<SequenceNumber
>::const_iterator pinned_seq_pos_
;
251 mutable InternalKey current_start_key_
;
254 } // namespace rocksdb