]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/write_batch.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / include / rocksdb / write_batch.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6// Use of this source code is governed by a BSD-style license that can be
7// found in the LICENSE file. See the AUTHORS file for names of contributors.
8//
9// WriteBatch holds a collection of updates to apply atomically to a DB.
10//
11// The updates are applied in the order in which they are added
12// to the WriteBatch. For example, the value of "key" will be "v3"
13// after the following batch is written:
14//
15// batch.Put("key", "v1");
16// batch.Delete("key");
17// batch.Put("key", "v2");
18// batch.Put("key", "v3");
19//
20// Multiple threads can invoke const methods on a WriteBatch without
21// external synchronization, but if any of the threads may call a
22// non-const method, all threads accessing the same WriteBatch must use
23// external synchronization.
24
11fdf7f2 25#pragma once
7c673cae
FG
26
27#include <atomic>
28#include <stack>
29#include <string>
30#include <stdint.h>
31#include "rocksdb/status.h"
32#include "rocksdb/write_batch_base.h"
33
34namespace rocksdb {
35
36class Slice;
37class ColumnFamilyHandle;
38struct SavePoints;
39struct SliceParts;
40
41struct SavePoint {
42 size_t size; // size of rep_
43 int count; // count of elements in rep_
44 uint32_t content_flags;
45
46 SavePoint() : size(0), count(0), content_flags(0) {}
47
48 SavePoint(size_t _size, int _count, uint32_t _flags)
49 : size(_size), count(_count), content_flags(_flags) {}
50
51 void clear() {
52 size = 0;
53 count = 0;
54 content_flags = 0;
55 }
56
57 bool is_cleared() const { return (size | count | content_flags) == 0; }
58};
59
60class WriteBatch : public WriteBatchBase {
61 public:
62 explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0);
11fdf7f2 63 ~WriteBatch() override;
7c673cae
FG
64
65 using WriteBatchBase::Put;
66 // Store the mapping "key->value" in the database.
67 Status Put(ColumnFamilyHandle* column_family, const Slice& key,
68 const Slice& value) override;
69 Status Put(const Slice& key, const Slice& value) override {
70 return Put(nullptr, key, value);
71 }
72
73 // Variant of Put() that gathers output like writev(2). The key and value
11fdf7f2 74 // that will be written to the database are concatenations of arrays of
7c673cae
FG
75 // slices.
76 Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
77 const SliceParts& value) override;
78 Status Put(const SliceParts& key, const SliceParts& value) override {
79 return Put(nullptr, key, value);
80 }
81
82 using WriteBatchBase::Delete;
83 // If the database contains a mapping for "key", erase it. Else do nothing.
84 Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
85 Status Delete(const Slice& key) override { return Delete(nullptr, key); }
86
87 // variant that takes SliceParts
88 Status Delete(ColumnFamilyHandle* column_family,
89 const SliceParts& key) override;
90 Status Delete(const SliceParts& key) override { return Delete(nullptr, key); }
91
92 using WriteBatchBase::SingleDelete;
93 // WriteBatch implementation of DB::SingleDelete(). See db.h.
94 Status SingleDelete(ColumnFamilyHandle* column_family,
95 const Slice& key) override;
96 Status SingleDelete(const Slice& key) override {
97 return SingleDelete(nullptr, key);
98 }
99
100 // variant that takes SliceParts
101 Status SingleDelete(ColumnFamilyHandle* column_family,
102 const SliceParts& key) override;
103 Status SingleDelete(const SliceParts& key) override {
104 return SingleDelete(nullptr, key);
105 }
106
107 using WriteBatchBase::DeleteRange;
108 // WriteBatch implementation of DB::DeleteRange(). See db.h.
109 Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
110 const Slice& end_key) override;
111 Status DeleteRange(const Slice& begin_key, const Slice& end_key) override {
112 return DeleteRange(nullptr, begin_key, end_key);
113 }
114
115 // variant that takes SliceParts
116 Status DeleteRange(ColumnFamilyHandle* column_family,
117 const SliceParts& begin_key,
118 const SliceParts& end_key) override;
119 Status DeleteRange(const SliceParts& begin_key,
120 const SliceParts& end_key) override {
121 return DeleteRange(nullptr, begin_key, end_key);
122 }
123
124 using WriteBatchBase::Merge;
125 // Merge "value" with the existing value of "key" in the database.
126 // "key->merge(existing, value)"
127 Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
128 const Slice& value) override;
129 Status Merge(const Slice& key, const Slice& value) override {
130 return Merge(nullptr, key, value);
131 }
132
133 // variant that takes SliceParts
134 Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key,
135 const SliceParts& value) override;
136 Status Merge(const SliceParts& key, const SliceParts& value) override {
137 return Merge(nullptr, key, value);
138 }
139
140 using WriteBatchBase::PutLogData;
141 // Append a blob of arbitrary size to the records in this batch. The blob will
142 // be stored in the transaction log but not in any other file. In particular,
143 // it will not be persisted to the SST files. When iterating over this
144 // WriteBatch, WriteBatch::Handler::LogData will be called with the contents
145 // of the blob as it is encountered. Blobs, puts, deletes, and merges will be
11fdf7f2 146 // encountered in the same order in which they were inserted. The blob will
7c673cae
FG
147 // NOT consume sequence number(s) and will NOT increase the count of the batch
148 //
149 // Example application: add timestamps to the transaction log for use in
150 // replication.
151 Status PutLogData(const Slice& blob) override;
152
153 using WriteBatchBase::Clear;
154 // Clear all updates buffered in this batch.
155 void Clear() override;
156
157 // Records the state of the batch for future calls to RollbackToSavePoint().
158 // May be called multiple times to set multiple save points.
159 void SetSavePoint() override;
160
161 // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
162 // most recent call to SetSavePoint() and removes the most recent save point.
163 // If there is no previous call to SetSavePoint(), Status::NotFound()
164 // will be returned.
165 // Otherwise returns Status::OK().
166 Status RollbackToSavePoint() override;
167
11fdf7f2
TL
168 // Pop the most recent save point.
169 // If there is no previous call to SetSavePoint(), Status::NotFound()
170 // will be returned.
171 // Otherwise returns Status::OK().
172 Status PopSavePoint() override;
173
7c673cae
FG
174 // Support for iterating over the contents of a batch.
175 class Handler {
176 public:
177 virtual ~Handler();
178 // All handler functions in this class provide default implementations so
179 // we won't break existing clients of Handler on a source code level when
180 // adding a new member function.
181
182 // default implementation will just call Put without column family for
183 // backwards compatibility. If the column family is not default,
184 // the function is noop
185 virtual Status PutCF(uint32_t column_family_id, const Slice& key,
186 const Slice& value) {
187 if (column_family_id == 0) {
188 // Put() historically doesn't return status. We didn't want to be
189 // backwards incompatible so we didn't change the return status
190 // (this is a public API). We do an ordinary get and return Status::OK()
191 Put(key, value);
192 return Status::OK();
193 }
194 return Status::InvalidArgument(
195 "non-default column family and PutCF not implemented");
196 }
197 virtual void Put(const Slice& /*key*/, const Slice& /*value*/) {}
198
199 virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
200 if (column_family_id == 0) {
201 Delete(key);
202 return Status::OK();
203 }
204 return Status::InvalidArgument(
205 "non-default column family and DeleteCF not implemented");
206 }
207 virtual void Delete(const Slice& /*key*/) {}
208
209 virtual Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) {
210 if (column_family_id == 0) {
211 SingleDelete(key);
212 return Status::OK();
213 }
214 return Status::InvalidArgument(
215 "non-default column family and SingleDeleteCF not implemented");
216 }
217 virtual void SingleDelete(const Slice& /*key*/) {}
218
11fdf7f2
TL
219 virtual Status DeleteRangeCF(uint32_t /*column_family_id*/,
220 const Slice& /*begin_key*/,
221 const Slice& /*end_key*/) {
7c673cae
FG
222 return Status::InvalidArgument("DeleteRangeCF not implemented");
223 }
224
225 virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
226 const Slice& value) {
227 if (column_family_id == 0) {
228 Merge(key, value);
229 return Status::OK();
230 }
231 return Status::InvalidArgument(
232 "non-default column family and MergeCF not implemented");
233 }
234 virtual void Merge(const Slice& /*key*/, const Slice& /*value*/) {}
235
11fdf7f2
TL
236 virtual Status PutBlobIndexCF(uint32_t /*column_family_id*/,
237 const Slice& /*key*/,
238 const Slice& /*value*/) {
239 return Status::InvalidArgument("PutBlobIndexCF not implemented");
240 }
241
7c673cae
FG
242 // The default implementation of LogData does nothing.
243 virtual void LogData(const Slice& blob);
244
11fdf7f2 245 virtual Status MarkBeginPrepare(bool = false) {
7c673cae
FG
246 return Status::InvalidArgument("MarkBeginPrepare() handler not defined.");
247 }
248
11fdf7f2 249 virtual Status MarkEndPrepare(const Slice& /*xid*/) {
7c673cae
FG
250 return Status::InvalidArgument("MarkEndPrepare() handler not defined.");
251 }
252
11fdf7f2
TL
253 virtual Status MarkNoop(bool /*empty_batch*/) {
254 return Status::InvalidArgument("MarkNoop() handler not defined.");
255 }
256
257 virtual Status MarkRollback(const Slice& /*xid*/) {
7c673cae
FG
258 return Status::InvalidArgument(
259 "MarkRollbackPrepare() handler not defined.");
260 }
261
11fdf7f2 262 virtual Status MarkCommit(const Slice& /*xid*/) {
7c673cae
FG
263 return Status::InvalidArgument("MarkCommit() handler not defined.");
264 }
265
266 // Continue is called by WriteBatch::Iterate. If it returns false,
267 // iteration is halted. Otherwise, it continues iterating. The default
268 // implementation always returns true.
269 virtual bool Continue();
11fdf7f2
TL
270
271 protected:
272 friend class WriteBatch;
273 virtual bool WriteAfterCommit() const { return true; }
274 virtual bool WriteBeforePrepare() const { return false; }
7c673cae
FG
275 };
276 Status Iterate(Handler* handler) const;
277
278 // Retrieve the serialized version of this batch.
279 const std::string& Data() const { return rep_; }
280
281 // Retrieve data size of the batch.
282 size_t GetDataSize() const { return rep_.size(); }
283
284 // Returns the number of updates in the batch
285 int Count() const;
286
287 // Returns true if PutCF will be called during Iterate
288 bool HasPut() const;
289
290 // Returns true if DeleteCF will be called during Iterate
291 bool HasDelete() const;
292
293 // Returns true if SingleDeleteCF will be called during Iterate
294 bool HasSingleDelete() const;
295
296 // Returns true if DeleteRangeCF will be called during Iterate
297 bool HasDeleteRange() const;
298
299 // Returns true if MergeCF will be called during Iterate
300 bool HasMerge() const;
301
302 // Returns true if MarkBeginPrepare will be called during Iterate
303 bool HasBeginPrepare() const;
304
305 // Returns true if MarkEndPrepare will be called during Iterate
306 bool HasEndPrepare() const;
307
308 // Returns trie if MarkCommit will be called during Iterate
309 bool HasCommit() const;
310
311 // Returns trie if MarkRollback will be called during Iterate
312 bool HasRollback() const;
313
314 using WriteBatchBase::GetWriteBatch;
315 WriteBatch* GetWriteBatch() override { return this; }
316
317 // Constructor with a serialized string object
318 explicit WriteBatch(const std::string& rep);
11fdf7f2 319 explicit WriteBatch(std::string&& rep);
7c673cae
FG
320
321 WriteBatch(const WriteBatch& src);
11fdf7f2 322 WriteBatch(WriteBatch&& src) noexcept;
7c673cae
FG
323 WriteBatch& operator=(const WriteBatch& src);
324 WriteBatch& operator=(WriteBatch&& src);
325
326 // marks this point in the WriteBatch as the last record to
327 // be inserted into the WAL, provided the WAL is enabled
328 void MarkWalTerminationPoint();
329 const SavePoint& GetWalTerminationPoint() const { return wal_term_point_; }
330
331 void SetMaxBytes(size_t max_bytes) override { max_bytes_ = max_bytes; }
332
333 private:
334 friend class WriteBatchInternal;
335 friend class LocalSavePoint;
11fdf7f2
TL
336 // TODO(myabandeh): this is needed for a hack to collapse the write batch and
337 // remove duplicate keys. Remove it when the hack is replaced with a proper
338 // solution.
339 friend class WriteBatchWithIndex;
7c673cae
FG
340 SavePoints* save_points_;
341
342 // When sending a WriteBatch through WriteImpl we might want to
343 // specify that only the first x records of the batch be written to
344 // the WAL.
345 SavePoint wal_term_point_;
346
347 // For HasXYZ. Mutable to allow lazy computation of results
348 mutable std::atomic<uint32_t> content_flags_;
349
350 // Performs deferred computation of content_flags if necessary
351 uint32_t ComputeContentFlags() const;
352
353 // Maximum size of rep_.
354 size_t max_bytes_;
355
11fdf7f2
TL
356 // Is the content of the batch the application's latest state that meant only
357 // to be used for recovery? Refer to
358 // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery for
359 // more details.
360 bool is_latest_persistent_state_ = false;
361
7c673cae
FG
362 protected:
363 std::string rep_; // See comment in write_batch.cc for the format of rep_
364
365 // Intentionally copyable
366};
367
368} // namespace rocksdb