]> git.proxmox.com Git - ceph.git/blame - ceph/src/kv/KeyValueDB.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / kv / KeyValueDB.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#ifndef KEY_VALUE_DB_H
4#define KEY_VALUE_DB_H
5
6#include "include/buffer.h"
7#include <ostream>
8#include <set>
9#include <map>
10#include <string>
7c673cae
FG
11#include <boost/scoped_ptr.hpp>
12#include "include/encoding.h"
13#include "common/Formatter.h"
3efd9988 14#include "common/perf_counters.h"
91327a77 15#include "common/PriorityCache.h"
7c673cae
FG
16
17using std::string;
11fdf7f2 18using std::vector;
7c673cae
FG
19/**
20 * Defines virtual interface to be implemented by key value store
21 *
22 * Kyoto Cabinet or LevelDB should implement this
23 */
11fdf7f2 24class KeyValueDB {
7c673cae 25public:
11fdf7f2
TL
26 /*
27 * See RocksDB's definition of a column family(CF) and how to use it.
28 * The interfaces of KeyValueDB is extended, when a column family is created.
29 * Prefix will be the name of column family to use.
30 */
31 struct ColumnFamily {
32 string name; //< name of this individual column family
33 string option; //< configure option string for this CF
34 ColumnFamily(const string &name, const string &option)
35 : name(name), option(option) {}
36 };
37
7c673cae
FG
38 class TransactionImpl {
39 public:
40 /// Set Keys
41 void set(
11fdf7f2 42 const std::string &prefix, ///< [in] Prefix for keys, or CF name
9f95a23c 43 const std::map<std::string, ceph::buffer::list> &to_set ///< [in] keys/values to set
7c673cae 44 ) {
9f95a23c 45 for (auto it = to_set.cbegin(); it != to_set.cend(); ++it)
7c673cae
FG
46 set(prefix, it->first, it->second);
47 }
48
9f95a23c 49 /// Set Keys (via encoded ceph::buffer::list)
7c673cae 50 void set(
11fdf7f2 51 const std::string &prefix, ///< [in] prefix, or CF name
9f95a23c 52 ceph::buffer::list& to_set_bl ///< [in] encoded key/values to set
7c673cae 53 ) {
9f95a23c 54 using ceph::decode;
11fdf7f2 55 auto p = std::cbegin(to_set_bl);
7c673cae 56 uint32_t num;
11fdf7f2 57 decode(num, p);
7c673cae
FG
58 while (num--) {
59 string key;
9f95a23c 60 ceph::buffer::list value;
11fdf7f2
TL
61 decode(key, p);
62 decode(value, p);
7c673cae
FG
63 set(prefix, key, value);
64 }
65 }
66
67 /// Set Key
68 virtual void set(
11fdf7f2 69 const std::string &prefix, ///< [in] Prefix or CF for the key
7c673cae 70 const std::string &k, ///< [in] Key to set
9f95a23c 71 const ceph::buffer::list &bl ///< [in] Value to set
7c673cae
FG
72 ) = 0;
73 virtual void set(
74 const std::string &prefix,
75 const char *k,
76 size_t keylen,
9f95a23c 77 const ceph::buffer::list& bl) {
7c673cae
FG
78 set(prefix, string(k, keylen), bl);
79 }
80
9f95a23c 81 /// Removes Keys (via encoded ceph::buffer::list)
7c673cae 82 void rmkeys(
11fdf7f2 83 const std::string &prefix, ///< [in] Prefix or CF to search for
9f95a23c 84 ceph::buffer::list &keys_bl ///< [in] Keys to remove
7c673cae 85 ) {
9f95a23c 86 using ceph::decode;
11fdf7f2 87 auto p = std::cbegin(keys_bl);
7c673cae 88 uint32_t num;
11fdf7f2 89 decode(num, p);
7c673cae
FG
90 while (num--) {
91 string key;
11fdf7f2 92 decode(key, p);
7c673cae
FG
93 rmkey(prefix, key);
94 }
95 }
96
97 /// Removes Keys
98 void rmkeys(
11fdf7f2 99 const std::string &prefix, ///< [in] Prefix/CF to search for
7c673cae
FG
100 const std::set<std::string> &keys ///< [in] Keys to remove
101 ) {
9f95a23c 102 for (auto it = keys.cbegin(); it != keys.cend(); ++it)
7c673cae
FG
103 rmkey(prefix, *it);
104 }
105
106 /// Remove Key
107 virtual void rmkey(
11fdf7f2
TL
108 const std::string &prefix, ///< [in] Prefix/CF to search for
109 const std::string &k ///< [in] Key to remove
7c673cae
FG
110 ) = 0;
111 virtual void rmkey(
112 const std::string &prefix, ///< [in] Prefix to search for
113 const char *k, ///< [in] Key to remove
114 size_t keylen
115 ) {
116 rmkey(prefix, string(k, keylen));
117 }
118
119 /// Remove Single Key which exists and was not overwritten.
120 /// This API is only related to performance optimization, and should only be
121 /// re-implemented by log-insert-merge tree based keyvalue stores(such as RocksDB).
122 /// If a key is overwritten (by calling set multiple times), then the result
123 /// of calling rm_single_key on this key is undefined.
124 virtual void rm_single_key(
11fdf7f2 125 const std::string &prefix, ///< [in] Prefix/CF to search for
7c673cae
FG
126 const std::string &k ///< [in] Key to remove
127 ) { return rmkey(prefix, k);}
128
129 /// Removes keys beginning with prefix
130 virtual void rmkeys_by_prefix(
11fdf7f2 131 const std::string &prefix ///< [in] Prefix/CF by which to remove keys
7c673cae
FG
132 ) = 0;
133
134 virtual void rm_range_keys(
135 const string &prefix, ///< [in] Prefix by which to remove keys
136 const string &start, ///< [in] The start bound of remove keys
137 const string &end ///< [in] The start bound of remove keys
138 ) = 0;
139
140 /// Merge value into key
141 virtual void merge(
11fdf7f2 142 const std::string &prefix, ///< [in] Prefix/CF ==> MUST match some established merge operator
7c673cae 143 const std::string &key, ///< [in] Key to be merged
9f95a23c 144 const ceph::buffer::list &value ///< [in] value to be merged into key
11fdf7f2 145 ) { ceph_abort_msg("Not implemented"); }
7c673cae
FG
146
147 virtual ~TransactionImpl() {}
148 };
11fdf7f2 149 typedef std::shared_ptr< TransactionImpl > Transaction;
7c673cae
FG
150
151 /// create a new instance
152 static KeyValueDB *create(CephContext *cct, const std::string& type,
153 const std::string& dir,
9f95a23c 154 std::map<std::string,std::string> options = {},
7c673cae
FG
155 void *p = NULL);
156
157 /// test whether we can successfully initialize; may have side effects (e.g., create)
158 static int test_init(const std::string& type, const std::string& dir);
159 virtual int init(string option_str="") = 0;
9f95a23c
TL
160 virtual int open(std::ostream &out, const std::vector<ColumnFamily>& cfs = {}) = 0;
161 // std::vector cfs contains column families to be created when db is created.
11fdf7f2 162 virtual int create_and_open(std::ostream &out,
9f95a23c 163 const std::vector<ColumnFamily>& cfs = {}) = 0;
11fdf7f2 164
9f95a23c 165 virtual int open_read_only(std::ostream &out, const std::vector<ColumnFamily>& cfs = {}) {
11fdf7f2
TL
166 return -ENOTSUP;
167 }
168
7c673cae
FG
169 virtual void close() { }
170
11fdf7f2
TL
171 /// Try to repair K/V database. leveldb and rocksdb require that database must be not opened.
172 virtual int repair(std::ostream &out) { return 0; }
173
7c673cae
FG
174 virtual Transaction get_transaction() = 0;
175 virtual int submit_transaction(Transaction) = 0;
176 virtual int submit_transaction_sync(Transaction t) {
177 return submit_transaction(t);
178 }
179
180 /// Retrieve Keys
181 virtual int get(
11fdf7f2
TL
182 const std::string &prefix, ///< [in] Prefix/CF for key
183 const std::set<std::string> &key, ///< [in] Key to retrieve
9f95a23c 184 std::map<std::string, ceph::buffer::list> *out ///< [out] Key value retrieved
7c673cae 185 ) = 0;
11fdf7f2 186 virtual int get(const std::string &prefix, ///< [in] prefix or CF name
7c673cae 187 const std::string &key, ///< [in] key
9f95a23c 188 ceph::buffer::list *value) { ///< [out] value
7c673cae
FG
189 std::set<std::string> ks;
190 ks.insert(key);
9f95a23c 191 std::map<std::string,ceph::buffer::list> om;
7c673cae
FG
192 int r = get(prefix, ks, &om);
193 if (om.find(key) != om.end()) {
11fdf7f2 194 *value = std::move(om[key]);
7c673cae 195 } else {
9f95a23c 196 *value = ceph::buffer::list();
7c673cae
FG
197 r = -ENOENT;
198 }
199 return r;
200 }
201 virtual int get(const string &prefix,
202 const char *key, size_t keylen,
9f95a23c 203 ceph::buffer::list *value) {
7c673cae
FG
204 return get(prefix, string(key, keylen), value);
205 }
206
11fdf7f2
TL
207 // This superclass is used both by kv iterators *and* by the ObjectMap
208 // omap iterator. The class hierarchies are unfortunately tied together
209 // by the legacy DBOjectMap implementation :(.
210 class SimplestIteratorImpl {
7c673cae
FG
211 public:
212 virtual int seek_to_first() = 0;
213 virtual int upper_bound(const std::string &after) = 0;
214 virtual int lower_bound(const std::string &to) = 0;
215 virtual bool valid() = 0;
11fdf7f2 216 virtual int next() = 0;
7c673cae 217 virtual std::string key() = 0;
9f95a23c
TL
218 virtual std::string tail_key() {
219 return "";
220 }
221 virtual ceph::buffer::list value() = 0;
7c673cae 222 virtual int status() = 0;
11fdf7f2
TL
223 virtual ~SimplestIteratorImpl() {}
224 };
225
226 class IteratorImpl : public SimplestIteratorImpl {
227 public:
228 virtual ~IteratorImpl() {}
229 virtual int seek_to_last() = 0;
230 virtual int prev() = 0;
231 virtual std::pair<std::string, std::string> raw_key() = 0;
9f95a23c
TL
232 virtual ceph::buffer::ptr value_as_ptr() {
233 ceph::buffer::list bl = value();
11fdf7f2
TL
234 if (bl.length() == 1) {
235 return *bl.buffers().begin();
236 } else if (bl.length() == 0) {
9f95a23c 237 return ceph::buffer::ptr();
11fdf7f2
TL
238 } else {
239 ceph_abort();
240 }
241 }
7c673cae 242 };
11fdf7f2 243 typedef std::shared_ptr< IteratorImpl > Iterator;
7c673cae 244
11fdf7f2 245 // This is the low-level iterator implemented by the underlying KV store.
7c673cae
FG
246 class WholeSpaceIteratorImpl {
247 public:
248 virtual int seek_to_first() = 0;
249 virtual int seek_to_first(const std::string &prefix) = 0;
250 virtual int seek_to_last() = 0;
251 virtual int seek_to_last(const std::string &prefix) = 0;
252 virtual int upper_bound(const std::string &prefix, const std::string &after) = 0;
253 virtual int lower_bound(const std::string &prefix, const std::string &to) = 0;
254 virtual bool valid() = 0;
255 virtual int next() = 0;
256 virtual int prev() = 0;
257 virtual std::string key() = 0;
258 virtual std::pair<std::string,std::string> raw_key() = 0;
259 virtual bool raw_key_is_prefixed(const std::string &prefix) = 0;
9f95a23c
TL
260 virtual ceph::buffer::list value() = 0;
261 virtual ceph::buffer::ptr value_as_ptr() {
262 ceph::buffer::list bl = value();
7c673cae
FG
263 if (bl.length()) {
264 return *bl.buffers().begin();
265 } else {
9f95a23c 266 return ceph::buffer::ptr();
7c673cae
FG
267 }
268 }
269 virtual int status() = 0;
270 virtual size_t key_size() {
271 return 0;
272 }
273 virtual size_t value_size() {
274 return 0;
275 }
276 virtual ~WholeSpaceIteratorImpl() { }
277 };
11fdf7f2 278 typedef std::shared_ptr< WholeSpaceIteratorImpl > WholeSpaceIterator;
7c673cae 279
11fdf7f2
TL
280private:
281 // This class filters a WholeSpaceIterator by a prefix.
282 class PrefixIteratorImpl : public IteratorImpl {
7c673cae
FG
283 const std::string prefix;
284 WholeSpaceIterator generic_iter;
285 public:
11fdf7f2 286 PrefixIteratorImpl(const std::string &prefix, WholeSpaceIterator iter) :
7c673cae 287 prefix(prefix), generic_iter(iter) { }
11fdf7f2 288 ~PrefixIteratorImpl() override { }
7c673cae
FG
289
290 int seek_to_first() override {
291 return generic_iter->seek_to_first(prefix);
292 }
11fdf7f2 293 int seek_to_last() override {
7c673cae
FG
294 return generic_iter->seek_to_last(prefix);
295 }
296 int upper_bound(const std::string &after) override {
297 return generic_iter->upper_bound(prefix, after);
298 }
299 int lower_bound(const std::string &to) override {
300 return generic_iter->lower_bound(prefix, to);
301 }
302 bool valid() override {
303 if (!generic_iter->valid())
304 return false;
305 return generic_iter->raw_key_is_prefixed(prefix);
306 }
11fdf7f2
TL
307 int next() override {
308 return generic_iter->next();
7c673cae 309 }
11fdf7f2
TL
310 int prev() override {
311 return generic_iter->prev();
7c673cae
FG
312 }
313 std::string key() override {
314 return generic_iter->key();
315 }
11fdf7f2 316 std::pair<std::string, std::string> raw_key() override {
7c673cae
FG
317 return generic_iter->raw_key();
318 }
9f95a23c 319 ceph::buffer::list value() override {
7c673cae
FG
320 return generic_iter->value();
321 }
9f95a23c 322 ceph::buffer::ptr value_as_ptr() override {
7c673cae
FG
323 return generic_iter->value_as_ptr();
324 }
325 int status() override {
326 return generic_iter->status();
327 }
328 };
11fdf7f2 329public:
7c673cae 330
11fdf7f2
TL
331 virtual WholeSpaceIterator get_wholespace_iterator() = 0;
332 virtual Iterator get_iterator(const std::string &prefix) {
333 return std::make_shared<PrefixIteratorImpl>(
334 prefix,
335 get_wholespace_iterator());
336 }
7c673cae 337
11fdf7f2
TL
338 void add_column_family(const std::string& cf_name, void *handle) {
339 cf_handles.insert(std::make_pair(cf_name, handle));
7c673cae
FG
340 }
341
11fdf7f2
TL
342 bool is_column_family(const std::string& prefix) {
343 return cf_handles.count(prefix);
7c673cae
FG
344 }
345
346 virtual uint64_t get_estimated_size(std::map<std::string,uint64_t> &extra) = 0;
347 virtual int get_statfs(struct store_statfs_t *buf) {
348 return -EOPNOTSUPP;
349 }
350
31f18b77
FG
351 virtual int set_cache_size(uint64_t) {
352 return -EOPNOTSUPP;
353 }
354
11fdf7f2 355 virtual int set_cache_high_pri_pool_ratio(double ratio) {
91327a77
AA
356 return -EOPNOTSUPP;
357 }
358
11fdf7f2 359 virtual int64_t get_cache_usage() const {
91327a77
AA
360 return -EOPNOTSUPP;
361 }
362
11fdf7f2
TL
363 virtual std::shared_ptr<PriorityCache::PriCache> get_priority_cache() const {
364 return nullptr;
91327a77
AA
365 }
366
11fdf7f2 367 virtual ~KeyValueDB() {}
91327a77 368
11fdf7f2 369 /// estimate space utilization for a prefix (in bytes)
9f95a23c
TL
370 virtual int64_t estimate_prefix_size(const string& prefix,
371 const string& key_prefix) {
11fdf7f2 372 return 0;
91327a77
AA
373 }
374
7c673cae
FG
375 /// compact the underlying store
376 virtual void compact() {}
377
11fdf7f2
TL
378 /// compact the underlying store in async mode
379 virtual void compact_async() {}
380
7c673cae
FG
381 /// compact db for all keys with a given prefix
382 virtual void compact_prefix(const std::string& prefix) {}
383 /// compact db for all keys with a given prefix, async
384 virtual void compact_prefix_async(const std::string& prefix) {}
385 virtual void compact_range(const std::string& prefix,
386 const std::string& start, const std::string& end) {}
387 virtual void compact_range_async(const std::string& prefix,
388 const std::string& start, const std::string& end) {}
389
390 // See RocksDB merge operator definition, we support the basic
391 // associative merge only right now.
392 class MergeOperator {
393 public:
394 /// Merge into a key that doesn't exist
395 virtual void merge_nonexistent(
396 const char *rdata, size_t rlen,
397 std::string *new_value) = 0;
398 /// Merge into a key that does exist
399 virtual void merge(
400 const char *ldata, size_t llen,
401 const char *rdata, size_t rlen,
402 std::string *new_value) = 0;
403 /// We use each operator name and each prefix to construct the overall RocksDB operator name for consistency check at open time.
91327a77 404 virtual const char *name() const = 0;
7c673cae
FG
405
406 virtual ~MergeOperator() {}
407 };
408
409 /// Setup one or more operators, this needs to be done BEFORE the DB is opened.
410 virtual int set_merge_operator(const std::string& prefix,
411 std::shared_ptr<MergeOperator> mop) {
412 return -EOPNOTSUPP;
413 }
414
9f95a23c 415 virtual void get_statistics(ceph::Formatter *f) {
7c673cae
FG
416 return;
417 }
3efd9988
FG
418
419 /**
420 * Return your perf counters if you have any. Subclasses are not
421 * required to implement this, and callers must respect a null return
422 * value.
423 */
424 virtual PerfCounters *get_perf_counters() {
425 return nullptr;
426 }
9f95a23c
TL
427
428 /**
429 * Access implementation specific integral property corresponding
430 * to passed property and prefic.
431 * Return value is true if property is valid for prefix, populates out.
432 */
433 virtual bool get_property(
434 const std::string &property,
435 uint64_t *out) {
436 return false;
437 }
7c673cae 438protected:
11fdf7f2 439 /// List of matching prefixes/ColumnFamilies and merge operators
7c673cae
FG
440 std::vector<std::pair<std::string,
441 std::shared_ptr<MergeOperator> > > merge_ops;
442
11fdf7f2
TL
443 /// column families in use, name->handle
444 std::unordered_map<std::string, void *> cf_handles;
7c673cae
FG
445};
446
447#endif