]> git.proxmox.com Git - ceph.git/blob - ceph/src/kv/KeyValueDB.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / kv / KeyValueDB.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef KEY_VALUE_DB_H
4 #define KEY_VALUE_DB_H
5
6 #include "include/buffer.h"
7 #include <ostream>
8 #include <set>
9 #include <map>
10 #include <optional>
11 #include <string>
12 #include <boost/scoped_ptr.hpp>
13 #include "include/encoding.h"
14 #include "common/Formatter.h"
15 #include "common/perf_counters.h"
16 #include "common/PriorityCache.h"
17
18 /**
19 * Defines virtual interface to be implemented by key value store
20 *
21 * Kyoto Cabinet should implement this
22 */
23 class KeyValueDB {
24 public:
25 class TransactionImpl {
26 public:
27 /// Set Keys
28 void set(
29 const std::string &prefix, ///< [in] Prefix for keys, or CF name
30 const std::map<std::string, ceph::buffer::list> &to_set ///< [in] keys/values to set
31 ) {
32 for (auto it = to_set.cbegin(); it != to_set.cend(); ++it)
33 set(prefix, it->first, it->second);
34 }
35
36 /// Set Keys (via encoded ceph::buffer::list)
37 void set(
38 const std::string &prefix, ///< [in] prefix, or CF name
39 ceph::buffer::list& to_set_bl ///< [in] encoded key/values to set
40 ) {
41 using ceph::decode;
42 auto p = std::cbegin(to_set_bl);
43 uint32_t num;
44 decode(num, p);
45 while (num--) {
46 std::string key;
47 ceph::buffer::list value;
48 decode(key, p);
49 decode(value, p);
50 set(prefix, key, value);
51 }
52 }
53
54 /// Set Key
55 virtual void set(
56 const std::string &prefix, ///< [in] Prefix or CF for the key
57 const std::string &k, ///< [in] Key to set
58 const ceph::buffer::list &bl ///< [in] Value to set
59 ) = 0;
60 virtual void set(
61 const std::string &prefix,
62 const char *k,
63 size_t keylen,
64 const ceph::buffer::list& bl) {
65 set(prefix, std::string(k, keylen), bl);
66 }
67
68 /// Removes Keys (via encoded ceph::buffer::list)
69 void rmkeys(
70 const std::string &prefix, ///< [in] Prefix or CF to search for
71 ceph::buffer::list &keys_bl ///< [in] Keys to remove
72 ) {
73 using ceph::decode;
74 auto p = std::cbegin(keys_bl);
75 uint32_t num;
76 decode(num, p);
77 while (num--) {
78 std::string key;
79 decode(key, p);
80 rmkey(prefix, key);
81 }
82 }
83
84 /// Removes Keys
85 void rmkeys(
86 const std::string &prefix, ///< [in] Prefix/CF to search for
87 const std::set<std::string> &keys ///< [in] Keys to remove
88 ) {
89 for (auto it = keys.cbegin(); it != keys.cend(); ++it)
90 rmkey(prefix, *it);
91 }
92
93 /// Remove Key
94 virtual void rmkey(
95 const std::string &prefix, ///< [in] Prefix/CF to search for
96 const std::string &k ///< [in] Key to remove
97 ) = 0;
98 virtual void rmkey(
99 const std::string &prefix, ///< [in] Prefix to search for
100 const char *k, ///< [in] Key to remove
101 size_t keylen
102 ) {
103 rmkey(prefix, std::string(k, keylen));
104 }
105
106 /// Remove Single Key which exists and was not overwritten.
107 /// This API is only related to performance optimization, and should only be
108 /// re-implemented by log-insert-merge tree based keyvalue stores(such as RocksDB).
109 /// If a key is overwritten (by calling set multiple times), then the result
110 /// of calling rm_single_key on this key is undefined.
111 virtual void rm_single_key(
112 const std::string &prefix, ///< [in] Prefix/CF to search for
113 const std::string &k ///< [in] Key to remove
114 ) { return rmkey(prefix, k);}
115
116 /// Removes keys beginning with prefix
117 virtual void rmkeys_by_prefix(
118 const std::string &prefix ///< [in] Prefix/CF by which to remove keys
119 ) = 0;
120
121 virtual void rm_range_keys(
122 const std::string &prefix, ///< [in] Prefix by which to remove keys
123 const std::string &start, ///< [in] The start bound of remove keys
124 const std::string &end ///< [in] The start bound of remove keys
125 ) = 0;
126
127 /// Merge value into key
128 virtual void merge(
129 const std::string &prefix, ///< [in] Prefix/CF ==> MUST match some established merge operator
130 const std::string &key, ///< [in] Key to be merged
131 const ceph::buffer::list &value ///< [in] value to be merged into key
132 ) { ceph_abort_msg("Not implemented"); }
133
134 virtual ~TransactionImpl() {}
135 };
136 typedef std::shared_ptr< TransactionImpl > Transaction;
137
138 /// create a new instance
139 static KeyValueDB *create(CephContext *cct, const std::string& type,
140 const std::string& dir,
141 std::map<std::string,std::string> options = {},
142 void *p = NULL);
143
144 /// test whether we can successfully initialize; may have side effects (e.g., create)
145 static int test_init(const std::string& type, const std::string& dir);
146 virtual int init(std::string option_str="") = 0;
147 virtual int open(std::ostream &out, const std::string& cfs="") = 0;
148 // std::vector cfs contains column families to be created when db is created.
149 virtual int create_and_open(std::ostream &out, const std::string& cfs="") = 0;
150
151 virtual int open_read_only(std::ostream &out, const std::string& cfs="") {
152 return -ENOTSUP;
153 }
154
155 virtual void close() { }
156
157 /// Try to repair K/V database. rocksdb requires that database must be not opened.
158 virtual int repair(std::ostream &out) { return 0; }
159
160 virtual Transaction get_transaction() = 0;
161 virtual int submit_transaction(Transaction) = 0;
162 virtual int submit_transaction_sync(Transaction t) {
163 return submit_transaction(t);
164 }
165
166 /// Retrieve Keys
167 virtual int get(
168 const std::string &prefix, ///< [in] Prefix/CF for key
169 const std::set<std::string> &key, ///< [in] Key to retrieve
170 std::map<std::string, ceph::buffer::list> *out ///< [out] Key value retrieved
171 ) = 0;
172 virtual int get(const std::string &prefix, ///< [in] prefix or CF name
173 const std::string &key, ///< [in] key
174 ceph::buffer::list *value) { ///< [out] value
175 std::set<std::string> ks;
176 ks.insert(key);
177 std::map<std::string,ceph::buffer::list> om;
178 int r = get(prefix, ks, &om);
179 if (om.find(key) != om.end()) {
180 *value = std::move(om[key]);
181 } else {
182 *value = ceph::buffer::list();
183 r = -ENOENT;
184 }
185 return r;
186 }
187 virtual int get(const std::string &prefix,
188 const char *key, size_t keylen,
189 ceph::buffer::list *value) {
190 return get(prefix, std::string(key, keylen), value);
191 }
192
193 // This superclass is used both by kv iterators *and* by the ObjectMap
194 // omap iterator. The class hierarchies are unfortunately tied together
195 // by the legacy DBOjectMap implementation :(.
196 class SimplestIteratorImpl {
197 public:
198 virtual int seek_to_first() = 0;
199 virtual int upper_bound(const std::string &after) = 0;
200 virtual int lower_bound(const std::string &to) = 0;
201 virtual bool valid() = 0;
202 virtual int next() = 0;
203 virtual std::string key() = 0;
204 virtual std::string tail_key() {
205 return "";
206 }
207 virtual ceph::buffer::list value() = 0;
208 virtual int status() = 0;
209 virtual ~SimplestIteratorImpl() {}
210 };
211
212 class IteratorImpl : public SimplestIteratorImpl {
213 public:
214 virtual ~IteratorImpl() {}
215 virtual int seek_to_last() = 0;
216 virtual int prev() = 0;
217 virtual std::pair<std::string, std::string> raw_key() = 0;
218 virtual ceph::buffer::ptr value_as_ptr() {
219 ceph::buffer::list bl = value();
220 if (bl.length() == 1) {
221 return *bl.buffers().begin();
222 } else if (bl.length() == 0) {
223 return ceph::buffer::ptr();
224 } else {
225 ceph_abort();
226 }
227 }
228 };
229 typedef std::shared_ptr< IteratorImpl > Iterator;
230
231 // This is the low-level iterator implemented by the underlying KV store.
232 class WholeSpaceIteratorImpl {
233 public:
234 virtual int seek_to_first() = 0;
235 virtual int seek_to_first(const std::string &prefix) = 0;
236 virtual int seek_to_last() = 0;
237 virtual int seek_to_last(const std::string &prefix) = 0;
238 virtual int upper_bound(const std::string &prefix, const std::string &after) = 0;
239 virtual int lower_bound(const std::string &prefix, const std::string &to) = 0;
240 virtual bool valid() = 0;
241 virtual int next() = 0;
242 virtual int prev() = 0;
243 virtual std::string key() = 0;
244 virtual std::pair<std::string,std::string> raw_key() = 0;
245 virtual bool raw_key_is_prefixed(const std::string &prefix) = 0;
246 virtual ceph::buffer::list value() = 0;
247 virtual ceph::buffer::ptr value_as_ptr() {
248 ceph::buffer::list bl = value();
249 if (bl.length()) {
250 return *bl.buffers().begin();
251 } else {
252 return ceph::buffer::ptr();
253 }
254 }
255 virtual int status() = 0;
256 virtual size_t key_size() {
257 return 0;
258 }
259 virtual size_t value_size() {
260 return 0;
261 }
262 virtual ~WholeSpaceIteratorImpl() { }
263 };
264 typedef std::shared_ptr< WholeSpaceIteratorImpl > WholeSpaceIterator;
265
266 private:
267 // This class filters a WholeSpaceIterator by a prefix.
268 // Performs as a dummy wrapper over WholeSpaceIterator
269 // if prefix is empty
270 class PrefixIteratorImpl : public IteratorImpl {
271 const std::string prefix;
272 WholeSpaceIterator generic_iter;
273 public:
274 PrefixIteratorImpl(const std::string &prefix, WholeSpaceIterator iter) :
275 prefix(prefix), generic_iter(iter) { }
276 ~PrefixIteratorImpl() override { }
277
278 int seek_to_first() override {
279 return prefix.empty() ?
280 generic_iter->seek_to_first() :
281 generic_iter->seek_to_first(prefix);
282 }
283 int seek_to_last() override {
284 return prefix.empty() ?
285 generic_iter->seek_to_last() :
286 generic_iter->seek_to_last(prefix);
287 }
288 int upper_bound(const std::string &after) override {
289 return generic_iter->upper_bound(prefix, after);
290 }
291 int lower_bound(const std::string &to) override {
292 return generic_iter->lower_bound(prefix, to);
293 }
294 bool valid() override {
295 if (!generic_iter->valid())
296 return false;
297 if (prefix.empty())
298 return true;
299 return prefix.empty() ?
300 true :
301 generic_iter->raw_key_is_prefixed(prefix);
302 }
303 int next() override {
304 return generic_iter->next();
305 }
306 int prev() override {
307 return generic_iter->prev();
308 }
309 std::string key() override {
310 return generic_iter->key();
311 }
312 std::pair<std::string, std::string> raw_key() override {
313 return generic_iter->raw_key();
314 }
315 ceph::buffer::list value() override {
316 return generic_iter->value();
317 }
318 ceph::buffer::ptr value_as_ptr() override {
319 return generic_iter->value_as_ptr();
320 }
321 int status() override {
322 return generic_iter->status();
323 }
324 };
325 protected:
326 Iterator make_iterator(const std::string &prefix, WholeSpaceIterator w_iter) {
327 return std::make_shared<PrefixIteratorImpl>(
328 prefix,
329 w_iter);
330 }
331 public:
332 typedef uint32_t IteratorOpts;
333 static const uint32_t ITERATOR_NOCACHE = 1;
334
335 struct IteratorBounds {
336 std::optional<std::string> lower_bound;
337 std::optional<std::string> upper_bound;
338 };
339
340 virtual WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) = 0;
341 virtual Iterator get_iterator(const std::string &prefix, IteratorOpts opts = 0, IteratorBounds bounds = IteratorBounds()) {
342 return make_iterator(prefix,
343 get_wholespace_iterator(opts));
344 }
345
346 virtual uint64_t get_estimated_size(std::map<std::string,uint64_t> &extra) = 0;
347 virtual int get_statfs(struct store_statfs_t *buf) {
348 return -EOPNOTSUPP;
349 }
350
351 virtual int set_cache_size(uint64_t) {
352 return -EOPNOTSUPP;
353 }
354
355 virtual int set_cache_high_pri_pool_ratio(double ratio) {
356 return -EOPNOTSUPP;
357 }
358
359 virtual int64_t get_cache_usage() const {
360 return -EOPNOTSUPP;
361 }
362
363 virtual int64_t get_cache_usage(std::string prefix) const {
364 return -EOPNOTSUPP;
365 }
366
367 virtual std::shared_ptr<PriorityCache::PriCache> get_priority_cache() const {
368 return nullptr;
369 }
370
371 virtual std::shared_ptr<PriorityCache::PriCache> get_priority_cache(std::string prefix) const {
372 return nullptr;
373 }
374
375
376
377 virtual ~KeyValueDB() {}
378
379 /// estimate space utilization for a prefix (in bytes)
380 virtual int64_t estimate_prefix_size(const std::string& prefix,
381 const std::string& key_prefix) {
382 return 0;
383 }
384
385 /// compact the underlying store
386 virtual void compact() {}
387
388 /// compact the underlying store in async mode
389 virtual void compact_async() {}
390
391 /// compact db for all keys with a given prefix
392 virtual void compact_prefix(const std::string& prefix) {}
393 /// compact db for all keys with a given prefix, async
394 virtual void compact_prefix_async(const std::string& prefix) {}
395 virtual void compact_range(const std::string& prefix,
396 const std::string& start, const std::string& end) {}
397 virtual void compact_range_async(const std::string& prefix,
398 const std::string& start, const std::string& end) {}
399
400 // See RocksDB merge operator definition, we support the basic
401 // associative merge only right now.
402 class MergeOperator {
403 public:
404 /// Merge into a key that doesn't exist
405 virtual void merge_nonexistent(
406 const char *rdata, size_t rlen,
407 std::string *new_value) = 0;
408 /// Merge into a key that does exist
409 virtual void merge(
410 const char *ldata, size_t llen,
411 const char *rdata, size_t rlen,
412 std::string *new_value) = 0;
413 /// We use each operator name and each prefix to construct the overall RocksDB operator name for consistency check at open time.
414 virtual const char *name() const = 0;
415
416 virtual ~MergeOperator() {}
417 };
418
419 /// Setup one or more operators, this needs to be done BEFORE the DB is opened.
420 virtual int set_merge_operator(const std::string& prefix,
421 std::shared_ptr<MergeOperator> mop) {
422 return -EOPNOTSUPP;
423 }
424
425 virtual void get_statistics(ceph::Formatter *f) {
426 return;
427 }
428
429 /**
430 * Return your perf counters if you have any. Subclasses are not
431 * required to implement this, and callers must respect a null return
432 * value.
433 */
434 virtual PerfCounters *get_perf_counters() {
435 return nullptr;
436 }
437
438 /**
439 * Access implementation specific integral property corresponding
440 * to passed property and prefic.
441 * Return value is true if property is valid for prefix, populates out.
442 */
443 virtual bool get_property(
444 const std::string &property,
445 uint64_t *out) {
446 return false;
447 }
448 protected:
449 /// List of matching prefixes/ColumnFamilies and merge operators
450 std::vector<std::pair<std::string,
451 std::shared_ptr<MergeOperator> > > merge_ops;
452
453 };
454
455 #endif