]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
6 | // Use of this source code is governed by a BSD-style license that can be | |
7 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
8 | // | |
9 | // WriteBatch holds a collection of updates to apply atomically to a DB. | |
10 | // | |
11 | // The updates are applied in the order in which they are added | |
12 | // to the WriteBatch. For example, the value of "key" will be "v3" | |
13 | // after the following batch is written: | |
14 | // | |
15 | // batch.Put("key", "v1"); | |
16 | // batch.Delete("key"); | |
17 | // batch.Put("key", "v2"); | |
18 | // batch.Put("key", "v3"); | |
19 | // | |
20 | // Multiple threads can invoke const methods on a WriteBatch without | |
21 | // external synchronization, but if any of the threads may call a | |
22 | // non-const method, all threads accessing the same WriteBatch must use | |
23 | // external synchronization. | |
24 | ||
11fdf7f2 | 25 | #pragma once |
7c673cae FG |
26 | |
27 | #include <atomic> | |
28 | #include <stack> | |
29 | #include <string> | |
30 | #include <stdint.h> | |
31 | #include "rocksdb/status.h" | |
32 | #include "rocksdb/write_batch_base.h" | |
33 | ||
34 | namespace rocksdb { | |
35 | ||
36 | class Slice; | |
37 | class ColumnFamilyHandle; | |
38 | struct SavePoints; | |
39 | struct SliceParts; | |
40 | ||
41 | struct SavePoint { | |
42 | size_t size; // size of rep_ | |
43 | int count; // count of elements in rep_ | |
44 | uint32_t content_flags; | |
45 | ||
46 | SavePoint() : size(0), count(0), content_flags(0) {} | |
47 | ||
48 | SavePoint(size_t _size, int _count, uint32_t _flags) | |
49 | : size(_size), count(_count), content_flags(_flags) {} | |
50 | ||
51 | void clear() { | |
52 | size = 0; | |
53 | count = 0; | |
54 | content_flags = 0; | |
55 | } | |
56 | ||
57 | bool is_cleared() const { return (size | count | content_flags) == 0; } | |
58 | }; | |
59 | ||
60 | class WriteBatch : public WriteBatchBase { | |
61 | public: | |
62 | explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0); | |
11fdf7f2 | 63 | ~WriteBatch() override; |
7c673cae FG |
64 | |
65 | using WriteBatchBase::Put; | |
66 | // Store the mapping "key->value" in the database. | |
67 | Status Put(ColumnFamilyHandle* column_family, const Slice& key, | |
68 | const Slice& value) override; | |
69 | Status Put(const Slice& key, const Slice& value) override { | |
70 | return Put(nullptr, key, value); | |
71 | } | |
72 | ||
73 | // Variant of Put() that gathers output like writev(2). The key and value | |
11fdf7f2 | 74 | // that will be written to the database are concatenations of arrays of |
7c673cae FG |
75 | // slices. |
76 | Status Put(ColumnFamilyHandle* column_family, const SliceParts& key, | |
77 | const SliceParts& value) override; | |
78 | Status Put(const SliceParts& key, const SliceParts& value) override { | |
79 | return Put(nullptr, key, value); | |
80 | } | |
81 | ||
82 | using WriteBatchBase::Delete; | |
83 | // If the database contains a mapping for "key", erase it. Else do nothing. | |
84 | Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override; | |
85 | Status Delete(const Slice& key) override { return Delete(nullptr, key); } | |
86 | ||
87 | // variant that takes SliceParts | |
88 | Status Delete(ColumnFamilyHandle* column_family, | |
89 | const SliceParts& key) override; | |
90 | Status Delete(const SliceParts& key) override { return Delete(nullptr, key); } | |
91 | ||
92 | using WriteBatchBase::SingleDelete; | |
93 | // WriteBatch implementation of DB::SingleDelete(). See db.h. | |
94 | Status SingleDelete(ColumnFamilyHandle* column_family, | |
95 | const Slice& key) override; | |
96 | Status SingleDelete(const Slice& key) override { | |
97 | return SingleDelete(nullptr, key); | |
98 | } | |
99 | ||
100 | // variant that takes SliceParts | |
101 | Status SingleDelete(ColumnFamilyHandle* column_family, | |
102 | const SliceParts& key) override; | |
103 | Status SingleDelete(const SliceParts& key) override { | |
104 | return SingleDelete(nullptr, key); | |
105 | } | |
106 | ||
107 | using WriteBatchBase::DeleteRange; | |
108 | // WriteBatch implementation of DB::DeleteRange(). See db.h. | |
109 | Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key, | |
110 | const Slice& end_key) override; | |
111 | Status DeleteRange(const Slice& begin_key, const Slice& end_key) override { | |
112 | return DeleteRange(nullptr, begin_key, end_key); | |
113 | } | |
114 | ||
115 | // variant that takes SliceParts | |
116 | Status DeleteRange(ColumnFamilyHandle* column_family, | |
117 | const SliceParts& begin_key, | |
118 | const SliceParts& end_key) override; | |
119 | Status DeleteRange(const SliceParts& begin_key, | |
120 | const SliceParts& end_key) override { | |
121 | return DeleteRange(nullptr, begin_key, end_key); | |
122 | } | |
123 | ||
124 | using WriteBatchBase::Merge; | |
125 | // Merge "value" with the existing value of "key" in the database. | |
126 | // "key->merge(existing, value)" | |
127 | Status Merge(ColumnFamilyHandle* column_family, const Slice& key, | |
128 | const Slice& value) override; | |
129 | Status Merge(const Slice& key, const Slice& value) override { | |
130 | return Merge(nullptr, key, value); | |
131 | } | |
132 | ||
133 | // variant that takes SliceParts | |
134 | Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key, | |
135 | const SliceParts& value) override; | |
136 | Status Merge(const SliceParts& key, const SliceParts& value) override { | |
137 | return Merge(nullptr, key, value); | |
138 | } | |
139 | ||
140 | using WriteBatchBase::PutLogData; | |
141 | // Append a blob of arbitrary size to the records in this batch. The blob will | |
142 | // be stored in the transaction log but not in any other file. In particular, | |
143 | // it will not be persisted to the SST files. When iterating over this | |
144 | // WriteBatch, WriteBatch::Handler::LogData will be called with the contents | |
145 | // of the blob as it is encountered. Blobs, puts, deletes, and merges will be | |
11fdf7f2 | 146 | // encountered in the same order in which they were inserted. The blob will |
7c673cae FG |
147 | // NOT consume sequence number(s) and will NOT increase the count of the batch |
148 | // | |
149 | // Example application: add timestamps to the transaction log for use in | |
150 | // replication. | |
151 | Status PutLogData(const Slice& blob) override; | |
152 | ||
153 | using WriteBatchBase::Clear; | |
154 | // Clear all updates buffered in this batch. | |
155 | void Clear() override; | |
156 | ||
157 | // Records the state of the batch for future calls to RollbackToSavePoint(). | |
158 | // May be called multiple times to set multiple save points. | |
159 | void SetSavePoint() override; | |
160 | ||
161 | // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the | |
162 | // most recent call to SetSavePoint() and removes the most recent save point. | |
163 | // If there is no previous call to SetSavePoint(), Status::NotFound() | |
164 | // will be returned. | |
165 | // Otherwise returns Status::OK(). | |
166 | Status RollbackToSavePoint() override; | |
167 | ||
11fdf7f2 TL |
168 | // Pop the most recent save point. |
169 | // If there is no previous call to SetSavePoint(), Status::NotFound() | |
170 | // will be returned. | |
171 | // Otherwise returns Status::OK(). | |
172 | Status PopSavePoint() override; | |
173 | ||
7c673cae FG |
174 | // Support for iterating over the contents of a batch. |
175 | class Handler { | |
176 | public: | |
177 | virtual ~Handler(); | |
178 | // All handler functions in this class provide default implementations so | |
179 | // we won't break existing clients of Handler on a source code level when | |
180 | // adding a new member function. | |
181 | ||
182 | // default implementation will just call Put without column family for | |
183 | // backwards compatibility. If the column family is not default, | |
184 | // the function is noop | |
185 | virtual Status PutCF(uint32_t column_family_id, const Slice& key, | |
186 | const Slice& value) { | |
187 | if (column_family_id == 0) { | |
188 | // Put() historically doesn't return status. We didn't want to be | |
189 | // backwards incompatible so we didn't change the return status | |
190 | // (this is a public API). We do an ordinary get and return Status::OK() | |
191 | Put(key, value); | |
192 | return Status::OK(); | |
193 | } | |
194 | return Status::InvalidArgument( | |
195 | "non-default column family and PutCF not implemented"); | |
196 | } | |
197 | virtual void Put(const Slice& /*key*/, const Slice& /*value*/) {} | |
198 | ||
199 | virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) { | |
200 | if (column_family_id == 0) { | |
201 | Delete(key); | |
202 | return Status::OK(); | |
203 | } | |
204 | return Status::InvalidArgument( | |
205 | "non-default column family and DeleteCF not implemented"); | |
206 | } | |
207 | virtual void Delete(const Slice& /*key*/) {} | |
208 | ||
209 | virtual Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) { | |
210 | if (column_family_id == 0) { | |
211 | SingleDelete(key); | |
212 | return Status::OK(); | |
213 | } | |
214 | return Status::InvalidArgument( | |
215 | "non-default column family and SingleDeleteCF not implemented"); | |
216 | } | |
217 | virtual void SingleDelete(const Slice& /*key*/) {} | |
218 | ||
11fdf7f2 TL |
219 | virtual Status DeleteRangeCF(uint32_t /*column_family_id*/, |
220 | const Slice& /*begin_key*/, | |
221 | const Slice& /*end_key*/) { | |
7c673cae FG |
222 | return Status::InvalidArgument("DeleteRangeCF not implemented"); |
223 | } | |
224 | ||
225 | virtual Status MergeCF(uint32_t column_family_id, const Slice& key, | |
226 | const Slice& value) { | |
227 | if (column_family_id == 0) { | |
228 | Merge(key, value); | |
229 | return Status::OK(); | |
230 | } | |
231 | return Status::InvalidArgument( | |
232 | "non-default column family and MergeCF not implemented"); | |
233 | } | |
234 | virtual void Merge(const Slice& /*key*/, const Slice& /*value*/) {} | |
235 | ||
11fdf7f2 TL |
236 | virtual Status PutBlobIndexCF(uint32_t /*column_family_id*/, |
237 | const Slice& /*key*/, | |
238 | const Slice& /*value*/) { | |
239 | return Status::InvalidArgument("PutBlobIndexCF not implemented"); | |
240 | } | |
241 | ||
7c673cae FG |
242 | // The default implementation of LogData does nothing. |
243 | virtual void LogData(const Slice& blob); | |
244 | ||
11fdf7f2 | 245 | virtual Status MarkBeginPrepare(bool = false) { |
7c673cae FG |
246 | return Status::InvalidArgument("MarkBeginPrepare() handler not defined."); |
247 | } | |
248 | ||
11fdf7f2 | 249 | virtual Status MarkEndPrepare(const Slice& /*xid*/) { |
7c673cae FG |
250 | return Status::InvalidArgument("MarkEndPrepare() handler not defined."); |
251 | } | |
252 | ||
11fdf7f2 TL |
253 | virtual Status MarkNoop(bool /*empty_batch*/) { |
254 | return Status::InvalidArgument("MarkNoop() handler not defined."); | |
255 | } | |
256 | ||
257 | virtual Status MarkRollback(const Slice& /*xid*/) { | |
7c673cae FG |
258 | return Status::InvalidArgument( |
259 | "MarkRollbackPrepare() handler not defined."); | |
260 | } | |
261 | ||
11fdf7f2 | 262 | virtual Status MarkCommit(const Slice& /*xid*/) { |
7c673cae FG |
263 | return Status::InvalidArgument("MarkCommit() handler not defined."); |
264 | } | |
265 | ||
266 | // Continue is called by WriteBatch::Iterate. If it returns false, | |
267 | // iteration is halted. Otherwise, it continues iterating. The default | |
268 | // implementation always returns true. | |
269 | virtual bool Continue(); | |
11fdf7f2 TL |
270 | |
271 | protected: | |
272 | friend class WriteBatch; | |
273 | virtual bool WriteAfterCommit() const { return true; } | |
274 | virtual bool WriteBeforePrepare() const { return false; } | |
7c673cae FG |
275 | }; |
276 | Status Iterate(Handler* handler) const; | |
277 | ||
278 | // Retrieve the serialized version of this batch. | |
279 | const std::string& Data() const { return rep_; } | |
280 | ||
281 | // Retrieve data size of the batch. | |
282 | size_t GetDataSize() const { return rep_.size(); } | |
283 | ||
284 | // Returns the number of updates in the batch | |
285 | int Count() const; | |
286 | ||
287 | // Returns true if PutCF will be called during Iterate | |
288 | bool HasPut() const; | |
289 | ||
290 | // Returns true if DeleteCF will be called during Iterate | |
291 | bool HasDelete() const; | |
292 | ||
293 | // Returns true if SingleDeleteCF will be called during Iterate | |
294 | bool HasSingleDelete() const; | |
295 | ||
296 | // Returns true if DeleteRangeCF will be called during Iterate | |
297 | bool HasDeleteRange() const; | |
298 | ||
299 | // Returns true if MergeCF will be called during Iterate | |
300 | bool HasMerge() const; | |
301 | ||
302 | // Returns true if MarkBeginPrepare will be called during Iterate | |
303 | bool HasBeginPrepare() const; | |
304 | ||
305 | // Returns true if MarkEndPrepare will be called during Iterate | |
306 | bool HasEndPrepare() const; | |
307 | ||
308 | // Returns trie if MarkCommit will be called during Iterate | |
309 | bool HasCommit() const; | |
310 | ||
311 | // Returns trie if MarkRollback will be called during Iterate | |
312 | bool HasRollback() const; | |
313 | ||
314 | using WriteBatchBase::GetWriteBatch; | |
315 | WriteBatch* GetWriteBatch() override { return this; } | |
316 | ||
317 | // Constructor with a serialized string object | |
318 | explicit WriteBatch(const std::string& rep); | |
11fdf7f2 | 319 | explicit WriteBatch(std::string&& rep); |
7c673cae FG |
320 | |
321 | WriteBatch(const WriteBatch& src); | |
11fdf7f2 | 322 | WriteBatch(WriteBatch&& src) noexcept; |
7c673cae FG |
323 | WriteBatch& operator=(const WriteBatch& src); |
324 | WriteBatch& operator=(WriteBatch&& src); | |
325 | ||
326 | // marks this point in the WriteBatch as the last record to | |
327 | // be inserted into the WAL, provided the WAL is enabled | |
328 | void MarkWalTerminationPoint(); | |
329 | const SavePoint& GetWalTerminationPoint() const { return wal_term_point_; } | |
330 | ||
331 | void SetMaxBytes(size_t max_bytes) override { max_bytes_ = max_bytes; } | |
332 | ||
333 | private: | |
334 | friend class WriteBatchInternal; | |
335 | friend class LocalSavePoint; | |
11fdf7f2 TL |
336 | // TODO(myabandeh): this is needed for a hack to collapse the write batch and |
337 | // remove duplicate keys. Remove it when the hack is replaced with a proper | |
338 | // solution. | |
339 | friend class WriteBatchWithIndex; | |
7c673cae FG |
340 | SavePoints* save_points_; |
341 | ||
342 | // When sending a WriteBatch through WriteImpl we might want to | |
343 | // specify that only the first x records of the batch be written to | |
344 | // the WAL. | |
345 | SavePoint wal_term_point_; | |
346 | ||
347 | // For HasXYZ. Mutable to allow lazy computation of results | |
348 | mutable std::atomic<uint32_t> content_flags_; | |
349 | ||
350 | // Performs deferred computation of content_flags if necessary | |
351 | uint32_t ComputeContentFlags() const; | |
352 | ||
353 | // Maximum size of rep_. | |
354 | size_t max_bytes_; | |
355 | ||
11fdf7f2 TL |
356 | // Is the content of the batch the application's latest state that meant only |
357 | // to be used for recovery? Refer to | |
358 | // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery for | |
359 | // more details. | |
360 | bool is_latest_persistent_state_ = false; | |
361 | ||
7c673cae FG |
362 | protected: |
363 | std::string rep_; // See comment in write_batch.cc for the format of rep_ | |
364 | ||
365 | // Intentionally copyable | |
366 | }; | |
367 | ||
368 | } // namespace rocksdb |