1 // Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
14 #include "db/compaction_iteration_stats.h"
15 #include "db/dbformat.h"
16 #include "db/pinned_iterators_manager.h"
17 #include "db/version_edit.h"
18 #include "include/rocksdb/comparator.h"
19 #include "include/rocksdb/types.h"
20 #include "table/internal_iterator.h"
21 #include "table/scoped_arena_iterator.h"
22 #include "table/table_builder.h"
23 #include "util/kv_map.h"
27 // RangeDelMaps maintain position across calls to ShouldDelete. The caller may
28 // wish to specify a mode to optimize positioning the iterator during the next
29 // call to ShouldDelete. The non-kFullScan modes are only available when
30 // deletion collapsing is enabled.
32 // For example, if we invoke Next() on an iterator, kForwardTraversal should be
33 // specified to advance one-by-one through deletions until one is found with its
34 // interval containing the key. This will typically be faster than doing a full
35 // binary search (kBinarySearch).
36 enum class RangeDelPositioningMode
{
37 kFullScan
, // used iff collapse_deletions_ == false
43 // A RangeDelIterator iterates over range deletion tombstones.
44 class RangeDelIterator
{
46 virtual ~RangeDelIterator() = default;
48 virtual bool Valid() const = 0;
49 virtual void Next() = 0;
50 virtual void Seek(const Slice
& target
) = 0;
51 virtual RangeTombstone
Tombstone() const = 0;
54 // A RangeDelMap keeps track of range deletion tombstones within a snapshot
57 // RangeDelMaps are used internally by RangeDelAggregator. They are not intended
58 // to be used directly.
61 virtual ~RangeDelMap() = default;
63 virtual bool ShouldDelete(const ParsedInternalKey
& parsed
,
64 RangeDelPositioningMode mode
) = 0;
65 virtual bool IsRangeOverlapped(const Slice
& start
, const Slice
& end
) = 0;
66 virtual void InvalidatePosition() = 0;
68 virtual size_t Size() const = 0;
69 bool IsEmpty() const { return Size() == 0; }
71 virtual void AddTombstone(RangeTombstone tombstone
) = 0;
72 virtual std::unique_ptr
<RangeDelIterator
> NewIterator() = 0;
75 // A RangeDelAggregator aggregates range deletion tombstones as they are
76 // encountered in memtables/SST files. It provides methods that check whether a
77 // key is covered by range tombstones or write the relevant tombstones to a new
79 class RangeDelAggregator
{
81 // @param snapshots These are used to organize the tombstones into snapshot
82 // stripes, which is the seqnum range between consecutive snapshots,
83 // including the higher snapshot and excluding the lower one. Currently,
84 // this is used by ShouldDelete() to prevent deletion of keys that are
85 // covered by range tombstones in other snapshot stripes. This constructor
86 // is used for writes (flush/compaction). All DB snapshots are provided
87 // such that no keys are removed that are uncovered according to any DB
89 // Note this overload does not lazily initialize Rep.
90 RangeDelAggregator(const InternalKeyComparator
& icmp
,
91 const std::vector
<SequenceNumber
>& snapshots
,
92 bool collapse_deletions
= true);
94 // @param upper_bound Similar to snapshots above, except with a single
95 // snapshot, which allows us to store the snapshot on the stack and defer
96 // initialization of heap-allocating members (in Rep) until the first range
97 // deletion is encountered. This constructor is used in case of reads (get/
98 // iterator), for which only the user snapshot (upper_bound) is provided
99 // such that the seqnum space is divided into two stripes. Only the older
100 // stripe will be used by ShouldDelete().
101 RangeDelAggregator(const InternalKeyComparator
& icmp
,
102 SequenceNumber upper_bound
,
103 bool collapse_deletions
= false);
105 // Returns whether the key should be deleted, which is the case when it is
106 // covered by a range tombstone residing in the same snapshot stripe.
107 // @param mode If collapse_deletions_ is true, this dictates how we will find
108 // the deletion whose interval contains this key. Otherwise, its
109 // value must be kFullScan indicating linear scan from beginning.
111 const ParsedInternalKey
& parsed
,
112 RangeDelPositioningMode mode
= RangeDelPositioningMode::kFullScan
) {
113 if (rep_
== nullptr) {
116 return ShouldDeleteImpl(parsed
, mode
);
119 const Slice
& internal_key
,
120 RangeDelPositioningMode mode
= RangeDelPositioningMode::kFullScan
) {
121 if (rep_
== nullptr) {
124 return ShouldDeleteImpl(internal_key
, mode
);
126 bool ShouldDeleteImpl(const ParsedInternalKey
& parsed
,
127 RangeDelPositioningMode mode
);
128 bool ShouldDeleteImpl(const Slice
& internal_key
,
129 RangeDelPositioningMode mode
);
131 // Checks whether range deletions cover any keys between `start` and `end`,
134 // @param start User key representing beginning of range to check for overlap.
135 // @param end User key representing end of range to check for overlap. This
136 // argument is inclusive, so the existence of a range deletion covering
137 // `end` causes this to return true.
138 bool IsRangeOverlapped(const Slice
& start
, const Slice
& end
);
140 // Adds tombstones to the tombstone aggregation structure maintained by this
141 // object. Tombstones are truncated to smallest and largest. If smallest (or
142 // largest) is null, it is not used for truncation. When adding range
143 // tombstones present in an sstable, smallest and largest should be set to
144 // the smallest and largest keys from the sstable file metadata. Note that
145 // tombstones end keys are exclusive while largest is inclusive.
146 // @return non-OK status if any of the tombstone keys are corrupted.
147 Status
AddTombstones(std::unique_ptr
<InternalIterator
> input
,
148 const InternalKey
* smallest
= nullptr,
149 const InternalKey
* largest
= nullptr);
151 // Resets iterators maintained across calls to ShouldDelete(). This may be
152 // called when the tombstones change, or the owner may call explicitly, e.g.,
153 // if it's an iterator that just seeked to an arbitrary position. The effect
154 // of invalidation is that the following call to ShouldDelete() will binary
155 // search for its tombstone.
156 void InvalidateRangeDelMapPositions();
159 bool AddFile(uint64_t file_number
);
161 // Create a new iterator over the range deletion tombstones in all of the
162 // snapshot stripes in this aggregator. Tombstones are presented in start key
163 // order. Tombstones with the same start key are presented in arbitrary order.
165 // The iterator is invalidated after any call to AddTombstones. It is the
166 // caller's responsibility to avoid using invalid iterators.
167 std::unique_ptr
<RangeDelIterator
> NewIterator();
170 // Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e.,
171 // their seqnums are greater than the next smaller snapshot's seqnum.
172 typedef std::map
<SequenceNumber
, std::unique_ptr
<RangeDelMap
>> StripeMap
;
175 StripeMap stripe_map_
;
176 PinnedIteratorsManager pinned_iters_mgr_
;
177 std::list
<std::string
> pinned_slices_
;
178 std::set
<uint64_t> added_files_
;
180 // Initializes rep_ lazily. This aggregator object is constructed for every
181 // read, so expensive members should only be created when necessary, i.e.,
182 // once the first range deletion is encountered.
183 void InitRep(const std::vector
<SequenceNumber
>& snapshots
);
185 std::unique_ptr
<RangeDelMap
> NewRangeDelMap();
186 RangeDelMap
& GetRangeDelMap(SequenceNumber seq
);
188 SequenceNumber upper_bound_
;
189 std::unique_ptr
<Rep
> rep_
;
190 const InternalKeyComparator
& icmp_
;
191 // collapse range deletions so they're binary searchable
192 const bool collapse_deletions_
;
195 } // namespace rocksdb