]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/utilities/write_batch_with_index.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / include / rocksdb / utilities / write_batch_with_index.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6// Use of this source code is governed by a BSD-style license that can be
7// found in the LICENSE file. See the AUTHORS file for names of contributors.
8//
9// A WriteBatchWithIndex with a binary searchable index built for all the keys
10// inserted.
11#pragma once
12
13#ifndef ROCKSDB_LITE
14
15#include <memory>
16#include <string>
17
18#include "rocksdb/comparator.h"
19#include "rocksdb/iterator.h"
20#include "rocksdb/slice.h"
21#include "rocksdb/status.h"
22#include "rocksdb/write_batch.h"
23#include "rocksdb/write_batch_base.h"
24
25namespace rocksdb {
26
27class ColumnFamilyHandle;
28class Comparator;
29class DB;
11fdf7f2 30class ReadCallback;
7c673cae
FG
31struct ReadOptions;
32struct DBOptions;
33
34enum WriteType {
35 kPutRecord,
36 kMergeRecord,
37 kDeleteRecord,
38 kSingleDeleteRecord,
39 kDeleteRangeRecord,
40 kLogDataRecord,
41 kXIDRecord,
42};
43
44// an entry for Put, Merge, Delete, or SingleDelete entry for write batches.
45// Used in WBWIIterator.
46struct WriteEntry {
47 WriteType type;
48 Slice key;
49 Slice value;
50};
51
52// Iterator of one column family out of a WriteBatchWithIndex.
53class WBWIIterator {
54 public:
55 virtual ~WBWIIterator() {}
56
57 virtual bool Valid() const = 0;
58
59 virtual void SeekToFirst() = 0;
60
61 virtual void SeekToLast() = 0;
62
63 virtual void Seek(const Slice& key) = 0;
64
65 virtual void SeekForPrev(const Slice& key) = 0;
66
67 virtual void Next() = 0;
68
69 virtual void Prev() = 0;
70
71 // the return WriteEntry is only valid until the next mutation of
72 // WriteBatchWithIndex
73 virtual WriteEntry Entry() const = 0;
74
75 virtual Status status() const = 0;
76};
77
78// A WriteBatchWithIndex with a binary searchable index built for all the keys
79// inserted.
80// In Put(), Merge() Delete(), or SingleDelete(), the same function of the
81// wrapped will be called. At the same time, indexes will be built.
82// By calling GetWriteBatch(), a user will get the WriteBatch for the data
83// they inserted, which can be used for DB::Write().
84// A user can call NewIterator() to create an iterator.
85class WriteBatchWithIndex : public WriteBatchBase {
86 public:
87 // backup_index_comparator: the backup comparator used to compare keys
88 // within the same column family, if column family is not given in the
89 // interface, or we can't find a column family from the column family handle
90 // passed in, backup_index_comparator will be used for the column family.
91 // reserved_bytes: reserved bytes in underlying WriteBatch
92 // max_bytes: maximum size of underlying WriteBatch in bytes
93 // overwrite_key: if true, overwrite the key in the index when inserting
94 // the same key as previously, so iterator will never
95 // show two entries with the same key.
96 explicit WriteBatchWithIndex(
97 const Comparator* backup_index_comparator = BytewiseComparator(),
98 size_t reserved_bytes = 0, bool overwrite_key = false,
99 size_t max_bytes = 0);
100
11fdf7f2 101 ~WriteBatchWithIndex() override;
7c673cae
FG
102
103 using WriteBatchBase::Put;
104 Status Put(ColumnFamilyHandle* column_family, const Slice& key,
105 const Slice& value) override;
106
107 Status Put(const Slice& key, const Slice& value) override;
108
109 using WriteBatchBase::Merge;
110 Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
111 const Slice& value) override;
112
113 Status Merge(const Slice& key, const Slice& value) override;
114
115 using WriteBatchBase::Delete;
116 Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
117 Status Delete(const Slice& key) override;
118
119 using WriteBatchBase::SingleDelete;
120 Status SingleDelete(ColumnFamilyHandle* column_family,
121 const Slice& key) override;
122 Status SingleDelete(const Slice& key) override;
123
124 using WriteBatchBase::DeleteRange;
125 Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
126 const Slice& end_key) override;
127 Status DeleteRange(const Slice& begin_key, const Slice& end_key) override;
128
129 using WriteBatchBase::PutLogData;
130 Status PutLogData(const Slice& blob) override;
131
132 using WriteBatchBase::Clear;
133 void Clear() override;
134
135 using WriteBatchBase::GetWriteBatch;
136 WriteBatch* GetWriteBatch() override;
137
138 // Create an iterator of a column family. User can call iterator.Seek() to
139 // search to the next entry of or after a key. Keys will be iterated in the
140 // order given by index_comparator. For multiple updates on the same key,
141 // each update will be returned as a separate entry, in the order of update
142 // time.
143 //
144 // The returned iterator should be deleted by the caller.
145 WBWIIterator* NewIterator(ColumnFamilyHandle* column_family);
146 // Create an iterator of the default column family.
147 WBWIIterator* NewIterator();
148
149 // Will create a new Iterator that will use WBWIIterator as a delta and
150 // base_iterator as base.
151 //
152 // This function is only supported if the WriteBatchWithIndex was
153 // constructed with overwrite_key=true.
154 //
155 // The returned iterator should be deleted by the caller.
156 // The base_iterator is now 'owned' by the returned iterator. Deleting the
157 // returned iterator will also delete the base_iterator.
11fdf7f2
TL
158 //
159 // Updating write batch with the current key of the iterator is not safe.
160 // We strongly recommand users not to do it. It will invalidate the current
161 // key() and value() of the iterator. This invalidation happens even before
162 // the write batch update finishes. The state may recover after Next() is
163 // called.
7c673cae
FG
164 Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
165 Iterator* base_iterator);
166 // default column family
167 Iterator* NewIteratorWithBase(Iterator* base_iterator);
168
169 // Similar to DB::Get() but will only read the key from this batch.
170 // If the batch does not have enough data to resolve Merge operations,
171 // MergeInProgress status may be returned.
172 Status GetFromBatch(ColumnFamilyHandle* column_family,
173 const DBOptions& options, const Slice& key,
174 std::string* value);
175
176 // Similar to previous function but does not require a column_family.
177 // Note: An InvalidArgument status will be returned if there are any Merge
178 // operators for this key. Use previous method instead.
179 Status GetFromBatch(const DBOptions& options, const Slice& key,
180 std::string* value) {
181 return GetFromBatch(nullptr, options, key, value);
182 }
183
184 // Similar to DB::Get() but will also read writes from this batch.
185 //
186 // This function will query both this batch and the DB and then merge
187 // the results using the DB's merge operator (if the batch contains any
188 // merge requests).
189 //
190 // Setting read_options.snapshot will affect what is read from the DB
191 // but will NOT change which keys are read from the batch (the keys in
192 // this batch do not yet belong to any snapshot and will be fetched
193 // regardless).
194 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
195 const Slice& key, std::string* value);
11fdf7f2
TL
196
197 // An overload of the above method that receives a PinnableSlice
198 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
199 const Slice& key, PinnableSlice* value);
200
7c673cae
FG
201 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
202 ColumnFamilyHandle* column_family, const Slice& key,
203 std::string* value);
204
11fdf7f2
TL
205 // An overload of the above method that receives a PinnableSlice
206 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
207 ColumnFamilyHandle* column_family, const Slice& key,
208 PinnableSlice* value);
209
7c673cae
FG
210 // Records the state of the batch for future calls to RollbackToSavePoint().
211 // May be called multiple times to set multiple save points.
212 void SetSavePoint() override;
213
214 // Remove all entries in this batch (Put, Merge, Delete, SingleDelete,
215 // PutLogData) since the most recent call to SetSavePoint() and removes the
216 // most recent save point.
217 // If there is no previous call to SetSavePoint(), behaves the same as
218 // Clear().
219 //
220 // Calling RollbackToSavePoint invalidates any open iterators on this batch.
221 //
222 // Returns Status::OK() on success,
223 // Status::NotFound() if no previous call to SetSavePoint(),
224 // or other Status on corruption.
225 Status RollbackToSavePoint() override;
226
11fdf7f2
TL
227 // Pop the most recent save point.
228 // If there is no previous call to SetSavePoint(), Status::NotFound()
229 // will be returned.
230 // Otherwise returns Status::OK().
231 Status PopSavePoint() override;
232
7c673cae 233 void SetMaxBytes(size_t max_bytes) override;
11fdf7f2 234 size_t GetDataSize() const;
7c673cae
FG
235
236 private:
11fdf7f2
TL
237 friend class PessimisticTransactionDB;
238 friend class WritePreparedTxn;
239 friend class WriteUnpreparedTxn;
240 friend class WriteBatchWithIndex_SubBatchCnt_Test;
241 // Returns the number of sub-batches inside the write batch. A sub-batch
242 // starts right before inserting a key that is a duplicate of a key in the
243 // last sub-batch.
244 size_t SubBatchCnt();
245
246 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
247 ColumnFamilyHandle* column_family, const Slice& key,
248 PinnableSlice* value, ReadCallback* callback);
7c673cae
FG
249 struct Rep;
250 std::unique_ptr<Rep> rep;
251};
252
253} // namespace rocksdb
254
255#endif // !ROCKSDB_LITE