]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/include/rocksdb/utilities/write_batch_with_index.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / rocksdb / include / rocksdb / utilities / write_batch_with_index.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 //
9 // A WriteBatchWithIndex with a binary searchable index built for all the keys
10 // inserted.
11 #pragma once
12
13 #ifndef ROCKSDB_LITE
14
15 #include <memory>
16 #include <string>
17 #include <vector>
18
19 #include "rocksdb/comparator.h"
20 #include "rocksdb/iterator.h"
21 #include "rocksdb/slice.h"
22 #include "rocksdb/status.h"
23 #include "rocksdb/write_batch.h"
24 #include "rocksdb/write_batch_base.h"
25
26 namespace ROCKSDB_NAMESPACE {
27
28 class ColumnFamilyHandle;
29 class Comparator;
30 class DB;
31 class ReadCallback;
32 struct ReadOptions;
33 struct DBOptions;
34
35 enum WriteType {
36 kPutRecord,
37 kMergeRecord,
38 kDeleteRecord,
39 kSingleDeleteRecord,
40 kDeleteRangeRecord,
41 kLogDataRecord,
42 kXIDRecord,
43 kUnknownRecord,
44 };
45
46 // an entry for Put, Merge, Delete, or SingleDelete entry for write batches.
47 // Used in WBWIIterator.
48 struct WriteEntry {
49 WriteType type = kUnknownRecord;
50 Slice key;
51 Slice value;
52 };
53
54 // Iterator of one column family out of a WriteBatchWithIndex.
55 class WBWIIterator {
56 public:
57 virtual ~WBWIIterator() {}
58
59 virtual bool Valid() const = 0;
60
61 virtual void SeekToFirst() = 0;
62
63 virtual void SeekToLast() = 0;
64
65 virtual void Seek(const Slice& key) = 0;
66
67 virtual void SeekForPrev(const Slice& key) = 0;
68
69 virtual void Next() = 0;
70
71 virtual void Prev() = 0;
72
73 // the return WriteEntry is only valid until the next mutation of
74 // WriteBatchWithIndex
75 virtual WriteEntry Entry() const = 0;
76
77 virtual Status status() const = 0;
78 };
79
80 // A WriteBatchWithIndex with a binary searchable index built for all the keys
81 // inserted.
82 // In Put(), Merge() Delete(), or SingleDelete(), the same function of the
83 // wrapped will be called. At the same time, indexes will be built.
84 // By calling GetWriteBatch(), a user will get the WriteBatch for the data
85 // they inserted, which can be used for DB::Write().
86 // A user can call NewIterator() to create an iterator.
87 class WriteBatchWithIndex : public WriteBatchBase {
88 public:
89 // backup_index_comparator: the backup comparator used to compare keys
90 // within the same column family, if column family is not given in the
91 // interface, or we can't find a column family from the column family handle
92 // passed in, backup_index_comparator will be used for the column family.
93 // reserved_bytes: reserved bytes in underlying WriteBatch
94 // max_bytes: maximum size of underlying WriteBatch in bytes
95 // overwrite_key: if true, overwrite the key in the index when inserting
96 // the same key as previously, so iterator will never
97 // show two entries with the same key.
98 explicit WriteBatchWithIndex(
99 const Comparator* backup_index_comparator = BytewiseComparator(),
100 size_t reserved_bytes = 0, bool overwrite_key = false,
101 size_t max_bytes = 0, size_t protection_bytes_per_key = 0);
102
103 ~WriteBatchWithIndex() override;
104 WriteBatchWithIndex(WriteBatchWithIndex&&);
105 WriteBatchWithIndex& operator=(WriteBatchWithIndex&&);
106
107 using WriteBatchBase::Put;
108 Status Put(ColumnFamilyHandle* column_family, const Slice& key,
109 const Slice& value) override;
110
111 Status Put(const Slice& key, const Slice& value) override;
112
113 Status Put(ColumnFamilyHandle* column_family, const Slice& key,
114 const Slice& ts, const Slice& value) override;
115
116 Status PutEntity(ColumnFamilyHandle* column_family, const Slice& /* key */,
117 const WideColumns& /* columns */) override {
118 if (!column_family) {
119 return Status::InvalidArgument(
120 "Cannot call this method without a column family handle");
121 }
122
123 return Status::NotSupported(
124 "PutEntity not supported by WriteBatchWithIndex");
125 }
126
127 using WriteBatchBase::Merge;
128 Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
129 const Slice& value) override;
130
131 Status Merge(const Slice& key, const Slice& value) override;
132 Status Merge(ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
133 const Slice& /*ts*/, const Slice& /*value*/) override {
134 return Status::NotSupported(
135 "Merge does not support user-defined timestamp");
136 }
137
138 using WriteBatchBase::Delete;
139 Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
140 Status Delete(const Slice& key) override;
141 Status Delete(ColumnFamilyHandle* column_family, const Slice& key,
142 const Slice& ts) override;
143
144 using WriteBatchBase::SingleDelete;
145 Status SingleDelete(ColumnFamilyHandle* column_family,
146 const Slice& key) override;
147 Status SingleDelete(const Slice& key) override;
148 Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key,
149 const Slice& ts) override;
150
151 using WriteBatchBase::DeleteRange;
152 Status DeleteRange(ColumnFamilyHandle* /* column_family */,
153 const Slice& /* begin_key */,
154 const Slice& /* end_key */) override {
155 return Status::NotSupported(
156 "DeleteRange unsupported in WriteBatchWithIndex");
157 }
158 Status DeleteRange(const Slice& /* begin_key */,
159 const Slice& /* end_key */) override {
160 return Status::NotSupported(
161 "DeleteRange unsupported in WriteBatchWithIndex");
162 }
163 Status DeleteRange(ColumnFamilyHandle* /*column_family*/,
164 const Slice& /*begin_key*/, const Slice& /*end_key*/,
165 const Slice& /*ts*/) override {
166 return Status::NotSupported(
167 "DeleteRange unsupported in WriteBatchWithIndex");
168 }
169
170 using WriteBatchBase::PutLogData;
171 Status PutLogData(const Slice& blob) override;
172
173 using WriteBatchBase::Clear;
174 void Clear() override;
175
176 using WriteBatchBase::GetWriteBatch;
177 WriteBatch* GetWriteBatch() override;
178
179 // Create an iterator of a column family. User can call iterator.Seek() to
180 // search to the next entry of or after a key. Keys will be iterated in the
181 // order given by index_comparator. For multiple updates on the same key,
182 // each update will be returned as a separate entry, in the order of update
183 // time.
184 //
185 // The returned iterator should be deleted by the caller.
186 WBWIIterator* NewIterator(ColumnFamilyHandle* column_family);
187 // Create an iterator of the default column family.
188 WBWIIterator* NewIterator();
189
190 // Will create a new Iterator that will use WBWIIterator as a delta and
191 // base_iterator as base.
192 //
193 // This function is only supported if the WriteBatchWithIndex was
194 // constructed with overwrite_key=true.
195 //
196 // The returned iterator should be deleted by the caller.
197 // The base_iterator is now 'owned' by the returned iterator. Deleting the
198 // returned iterator will also delete the base_iterator.
199 //
200 // Updating write batch with the current key of the iterator is not safe.
201 // We strongly recommend users not to do it. It will invalidate the current
202 // key() and value() of the iterator. This invalidation happens even before
203 // the write batch update finishes. The state may recover after Next() is
204 // called.
205 Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
206 Iterator* base_iterator,
207 const ReadOptions* opts = nullptr);
208 // default column family
209 Iterator* NewIteratorWithBase(Iterator* base_iterator);
210
211 // Similar to DB::Get() but will only read the key from this batch.
212 // If the batch does not have enough data to resolve Merge operations,
213 // MergeInProgress status may be returned.
214 Status GetFromBatch(ColumnFamilyHandle* column_family,
215 const DBOptions& options, const Slice& key,
216 std::string* value);
217
218 // Similar to previous function but does not require a column_family.
219 // Note: An InvalidArgument status will be returned if there are any Merge
220 // operators for this key. Use previous method instead.
221 Status GetFromBatch(const DBOptions& options, const Slice& key,
222 std::string* value) {
223 return GetFromBatch(nullptr, options, key, value);
224 }
225
226 // Similar to DB::Get() but will also read writes from this batch.
227 //
228 // This function will query both this batch and the DB and then merge
229 // the results using the DB's merge operator (if the batch contains any
230 // merge requests).
231 //
232 // Setting read_options.snapshot will affect what is read from the DB
233 // but will NOT change which keys are read from the batch (the keys in
234 // this batch do not yet belong to any snapshot and will be fetched
235 // regardless).
236 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
237 const Slice& key, std::string* value);
238
239 // An overload of the above method that receives a PinnableSlice
240 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
241 const Slice& key, PinnableSlice* value);
242
243 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
244 ColumnFamilyHandle* column_family, const Slice& key,
245 std::string* value);
246
247 // An overload of the above method that receives a PinnableSlice
248 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
249 ColumnFamilyHandle* column_family, const Slice& key,
250 PinnableSlice* value);
251
252 void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
253 ColumnFamilyHandle* column_family,
254 const size_t num_keys, const Slice* keys,
255 PinnableSlice* values, Status* statuses,
256 bool sorted_input);
257
258 // Records the state of the batch for future calls to RollbackToSavePoint().
259 // May be called multiple times to set multiple save points.
260 void SetSavePoint() override;
261
262 // Remove all entries in this batch (Put, Merge, Delete, SingleDelete,
263 // PutLogData) since the most recent call to SetSavePoint() and removes the
264 // most recent save point.
265 // If there is no previous call to SetSavePoint(), behaves the same as
266 // Clear().
267 //
268 // Calling RollbackToSavePoint invalidates any open iterators on this batch.
269 //
270 // Returns Status::OK() on success,
271 // Status::NotFound() if no previous call to SetSavePoint(),
272 // or other Status on corruption.
273 Status RollbackToSavePoint() override;
274
275 // Pop the most recent save point.
276 // If there is no previous call to SetSavePoint(), Status::NotFound()
277 // will be returned.
278 // Otherwise returns Status::OK().
279 Status PopSavePoint() override;
280
281 void SetMaxBytes(size_t max_bytes) override;
282 size_t GetDataSize() const;
283
284 private:
285 friend class PessimisticTransactionDB;
286 friend class WritePreparedTxn;
287 friend class WriteUnpreparedTxn;
288 friend class WriteBatchWithIndex_SubBatchCnt_Test;
289 friend class WriteBatchWithIndexInternal;
290 // Returns the number of sub-batches inside the write batch. A sub-batch
291 // starts right before inserting a key that is a duplicate of a key in the
292 // last sub-batch.
293 size_t SubBatchCnt();
294
295 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
296 ColumnFamilyHandle* column_family, const Slice& key,
297 PinnableSlice* value, ReadCallback* callback);
298 void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
299 ColumnFamilyHandle* column_family,
300 const size_t num_keys, const Slice* keys,
301 PinnableSlice* values, Status* statuses,
302 bool sorted_input, ReadCallback* callback);
303 struct Rep;
304 std::unique_ptr<Rep> rep;
305 };
306
307 } // namespace ROCKSDB_NAMESPACE
308
309 #endif // !ROCKSDB_LITE