]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/range_del_aggregator.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / db / range_del_aggregator.h
1 // Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7
8 #include <list>
9 #include <map>
10 #include <set>
11 #include <string>
12 #include <vector>
13
14 #include "db/compaction_iteration_stats.h"
15 #include "db/dbformat.h"
16 #include "db/pinned_iterators_manager.h"
17 #include "db/version_edit.h"
18 #include "include/rocksdb/comparator.h"
19 #include "include/rocksdb/types.h"
20 #include "table/internal_iterator.h"
21 #include "table/scoped_arena_iterator.h"
22 #include "table/table_builder.h"
23 #include "util/kv_map.h"
24
25 namespace rocksdb {
26
27 // RangeDelMaps maintain position across calls to ShouldDelete. The caller may
28 // wish to specify a mode to optimize positioning the iterator during the next
29 // call to ShouldDelete. The non-kFullScan modes are only available when
30 // deletion collapsing is enabled.
31 //
32 // For example, if we invoke Next() on an iterator, kForwardTraversal should be
33 // specified to advance one-by-one through deletions until one is found with its
34 // interval containing the key. This will typically be faster than doing a full
35 // binary search (kBinarySearch).
36 enum class RangeDelPositioningMode {
37 kFullScan, // used iff collapse_deletions_ == false
38 kForwardTraversal,
39 kBackwardTraversal,
40 kBinarySearch,
41 };
42
43 // A RangeDelIterator iterates over range deletion tombstones.
44 class RangeDelIterator {
45 public:
46 virtual ~RangeDelIterator() = default;
47
48 virtual bool Valid() const = 0;
49 virtual void Next() = 0;
50 virtual void Seek(const Slice& target) = 0;
51 virtual RangeTombstone Tombstone() const = 0;
52 };
53
54 // A RangeDelMap keeps track of range deletion tombstones within a snapshot
55 // stripe.
56 //
57 // RangeDelMaps are used internally by RangeDelAggregator. They are not intended
58 // to be used directly.
59 class RangeDelMap {
60 public:
61 virtual ~RangeDelMap() = default;
62
63 virtual bool ShouldDelete(const ParsedInternalKey& parsed,
64 RangeDelPositioningMode mode) = 0;
65 virtual bool IsRangeOverlapped(const Slice& start, const Slice& end) = 0;
66 virtual void InvalidatePosition() = 0;
67
68 virtual size_t Size() const = 0;
69 bool IsEmpty() const { return Size() == 0; }
70
71 virtual void AddTombstone(RangeTombstone tombstone) = 0;
72 virtual std::unique_ptr<RangeDelIterator> NewIterator() = 0;
73 };
74
75 // A RangeDelAggregator aggregates range deletion tombstones as they are
76 // encountered in memtables/SST files. It provides methods that check whether a
77 // key is covered by range tombstones or write the relevant tombstones to a new
78 // SST file.
79 class RangeDelAggregator {
80 public:
81 // @param snapshots These are used to organize the tombstones into snapshot
82 // stripes, which is the seqnum range between consecutive snapshots,
83 // including the higher snapshot and excluding the lower one. Currently,
84 // this is used by ShouldDelete() to prevent deletion of keys that are
85 // covered by range tombstones in other snapshot stripes. This constructor
86 // is used for writes (flush/compaction). All DB snapshots are provided
87 // such that no keys are removed that are uncovered according to any DB
88 // snapshot.
89 // Note this overload does not lazily initialize Rep.
90 RangeDelAggregator(const InternalKeyComparator& icmp,
91 const std::vector<SequenceNumber>& snapshots,
92 bool collapse_deletions = true);
93
94 // @param upper_bound Similar to snapshots above, except with a single
95 // snapshot, which allows us to store the snapshot on the stack and defer
96 // initialization of heap-allocating members (in Rep) until the first range
97 // deletion is encountered. This constructor is used in case of reads (get/
98 // iterator), for which only the user snapshot (upper_bound) is provided
99 // such that the seqnum space is divided into two stripes. Only the older
100 // stripe will be used by ShouldDelete().
101 RangeDelAggregator(const InternalKeyComparator& icmp,
102 SequenceNumber upper_bound,
103 bool collapse_deletions = false);
104
105 // Returns whether the key should be deleted, which is the case when it is
106 // covered by a range tombstone residing in the same snapshot stripe.
107 // @param mode If collapse_deletions_ is true, this dictates how we will find
108 // the deletion whose interval contains this key. Otherwise, its
109 // value must be kFullScan indicating linear scan from beginning.
110 bool ShouldDelete(
111 const ParsedInternalKey& parsed,
112 RangeDelPositioningMode mode = RangeDelPositioningMode::kFullScan) {
113 if (rep_ == nullptr) {
114 return false;
115 }
116 return ShouldDeleteImpl(parsed, mode);
117 }
118 bool ShouldDelete(
119 const Slice& internal_key,
120 RangeDelPositioningMode mode = RangeDelPositioningMode::kFullScan) {
121 if (rep_ == nullptr) {
122 return false;
123 }
124 return ShouldDeleteImpl(internal_key, mode);
125 }
126 bool ShouldDeleteImpl(const ParsedInternalKey& parsed,
127 RangeDelPositioningMode mode);
128 bool ShouldDeleteImpl(const Slice& internal_key,
129 RangeDelPositioningMode mode);
130
131 // Checks whether range deletions cover any keys between `start` and `end`,
132 // inclusive.
133 //
134 // @param start User key representing beginning of range to check for overlap.
135 // @param end User key representing end of range to check for overlap. This
136 // argument is inclusive, so the existence of a range deletion covering
137 // `end` causes this to return true.
138 bool IsRangeOverlapped(const Slice& start, const Slice& end);
139
140 // Adds tombstones to the tombstone aggregation structure maintained by this
141 // object. Tombstones are truncated to smallest and largest. If smallest (or
142 // largest) is null, it is not used for truncation. When adding range
143 // tombstones present in an sstable, smallest and largest should be set to
144 // the smallest and largest keys from the sstable file metadata. Note that
145 // tombstones end keys are exclusive while largest is inclusive.
146 // @return non-OK status if any of the tombstone keys are corrupted.
147 Status AddTombstones(std::unique_ptr<InternalIterator> input,
148 const InternalKey* smallest = nullptr,
149 const InternalKey* largest = nullptr);
150
151 // Resets iterators maintained across calls to ShouldDelete(). This may be
152 // called when the tombstones change, or the owner may call explicitly, e.g.,
153 // if it's an iterator that just seeked to an arbitrary position. The effect
154 // of invalidation is that the following call to ShouldDelete() will binary
155 // search for its tombstone.
156 void InvalidateRangeDelMapPositions();
157
158 bool IsEmpty();
159 bool AddFile(uint64_t file_number);
160
161 // Create a new iterator over the range deletion tombstones in all of the
162 // snapshot stripes in this aggregator. Tombstones are presented in start key
163 // order. Tombstones with the same start key are presented in arbitrary order.
164 //
165 // The iterator is invalidated after any call to AddTombstones. It is the
166 // caller's responsibility to avoid using invalid iterators.
167 std::unique_ptr<RangeDelIterator> NewIterator();
168
169 private:
170 // Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e.,
171 // their seqnums are greater than the next smaller snapshot's seqnum.
172 typedef std::map<SequenceNumber, std::unique_ptr<RangeDelMap>> StripeMap;
173
174 struct Rep {
175 StripeMap stripe_map_;
176 PinnedIteratorsManager pinned_iters_mgr_;
177 std::list<std::string> pinned_slices_;
178 std::set<uint64_t> added_files_;
179 };
180 // Initializes rep_ lazily. This aggregator object is constructed for every
181 // read, so expensive members should only be created when necessary, i.e.,
182 // once the first range deletion is encountered.
183 void InitRep(const std::vector<SequenceNumber>& snapshots);
184
185 std::unique_ptr<RangeDelMap> NewRangeDelMap();
186 RangeDelMap& GetRangeDelMap(SequenceNumber seq);
187
188 SequenceNumber upper_bound_;
189 std::unique_ptr<Rep> rep_;
190 const InternalKeyComparator& icmp_;
191 // collapse range deletions so they're binary searchable
192 const bool collapse_deletions_;
193 };
194
195 } // namespace rocksdb