]> git.proxmox.com Git - ceph.git/blame - ceph/src/kv/KeyValueDB.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / kv / KeyValueDB.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#ifndef KEY_VALUE_DB_H
4#define KEY_VALUE_DB_H
5
6#include "include/buffer.h"
7#include <ostream>
8#include <set>
9#include <map>
10#include <string>
7c673cae
FG
11#include <boost/scoped_ptr.hpp>
12#include "include/encoding.h"
13#include "common/Formatter.h"
3efd9988 14#include "common/perf_counters.h"
91327a77 15#include "common/PriorityCache.h"
7c673cae 16
7c673cae
FG
17/**
18 * Defines virtual interface to be implemented by key value store
19 *
20 * Kyoto Cabinet or LevelDB should implement this
21 */
11fdf7f2 22class KeyValueDB {
7c673cae
FG
23public:
24 class TransactionImpl {
25 public:
26 /// Set Keys
27 void set(
11fdf7f2 28 const std::string &prefix, ///< [in] Prefix for keys, or CF name
9f95a23c 29 const std::map<std::string, ceph::buffer::list> &to_set ///< [in] keys/values to set
7c673cae 30 ) {
9f95a23c 31 for (auto it = to_set.cbegin(); it != to_set.cend(); ++it)
7c673cae
FG
32 set(prefix, it->first, it->second);
33 }
34
9f95a23c 35 /// Set Keys (via encoded ceph::buffer::list)
7c673cae 36 void set(
11fdf7f2 37 const std::string &prefix, ///< [in] prefix, or CF name
9f95a23c 38 ceph::buffer::list& to_set_bl ///< [in] encoded key/values to set
7c673cae 39 ) {
9f95a23c 40 using ceph::decode;
11fdf7f2 41 auto p = std::cbegin(to_set_bl);
7c673cae 42 uint32_t num;
11fdf7f2 43 decode(num, p);
7c673cae 44 while (num--) {
f67539c2 45 std::string key;
9f95a23c 46 ceph::buffer::list value;
11fdf7f2
TL
47 decode(key, p);
48 decode(value, p);
7c673cae
FG
49 set(prefix, key, value);
50 }
51 }
52
53 /// Set Key
54 virtual void set(
11fdf7f2 55 const std::string &prefix, ///< [in] Prefix or CF for the key
7c673cae 56 const std::string &k, ///< [in] Key to set
9f95a23c 57 const ceph::buffer::list &bl ///< [in] Value to set
7c673cae
FG
58 ) = 0;
59 virtual void set(
60 const std::string &prefix,
61 const char *k,
62 size_t keylen,
9f95a23c 63 const ceph::buffer::list& bl) {
f67539c2 64 set(prefix, std::string(k, keylen), bl);
7c673cae
FG
65 }
66
9f95a23c 67 /// Removes Keys (via encoded ceph::buffer::list)
7c673cae 68 void rmkeys(
11fdf7f2 69 const std::string &prefix, ///< [in] Prefix or CF to search for
9f95a23c 70 ceph::buffer::list &keys_bl ///< [in] Keys to remove
7c673cae 71 ) {
9f95a23c 72 using ceph::decode;
11fdf7f2 73 auto p = std::cbegin(keys_bl);
7c673cae 74 uint32_t num;
11fdf7f2 75 decode(num, p);
7c673cae 76 while (num--) {
f67539c2 77 std::string key;
11fdf7f2 78 decode(key, p);
7c673cae
FG
79 rmkey(prefix, key);
80 }
81 }
82
83 /// Removes Keys
84 void rmkeys(
11fdf7f2 85 const std::string &prefix, ///< [in] Prefix/CF to search for
7c673cae
FG
86 const std::set<std::string> &keys ///< [in] Keys to remove
87 ) {
9f95a23c 88 for (auto it = keys.cbegin(); it != keys.cend(); ++it)
7c673cae
FG
89 rmkey(prefix, *it);
90 }
91
92 /// Remove Key
93 virtual void rmkey(
11fdf7f2
TL
94 const std::string &prefix, ///< [in] Prefix/CF to search for
95 const std::string &k ///< [in] Key to remove
7c673cae
FG
96 ) = 0;
97 virtual void rmkey(
98 const std::string &prefix, ///< [in] Prefix to search for
99 const char *k, ///< [in] Key to remove
100 size_t keylen
101 ) {
f67539c2 102 rmkey(prefix, std::string(k, keylen));
7c673cae
FG
103 }
104
105 /// Remove Single Key which exists and was not overwritten.
106 /// This API is only related to performance optimization, and should only be
107 /// re-implemented by log-insert-merge tree based keyvalue stores(such as RocksDB).
108 /// If a key is overwritten (by calling set multiple times), then the result
109 /// of calling rm_single_key on this key is undefined.
110 virtual void rm_single_key(
11fdf7f2 111 const std::string &prefix, ///< [in] Prefix/CF to search for
7c673cae
FG
112 const std::string &k ///< [in] Key to remove
113 ) { return rmkey(prefix, k);}
114
115 /// Removes keys beginning with prefix
116 virtual void rmkeys_by_prefix(
11fdf7f2 117 const std::string &prefix ///< [in] Prefix/CF by which to remove keys
7c673cae
FG
118 ) = 0;
119
120 virtual void rm_range_keys(
f67539c2
TL
121 const std::string &prefix, ///< [in] Prefix by which to remove keys
122 const std::string &start, ///< [in] The start bound of remove keys
123 const std::string &end ///< [in] The start bound of remove keys
7c673cae
FG
124 ) = 0;
125
126 /// Merge value into key
127 virtual void merge(
11fdf7f2 128 const std::string &prefix, ///< [in] Prefix/CF ==> MUST match some established merge operator
7c673cae 129 const std::string &key, ///< [in] Key to be merged
9f95a23c 130 const ceph::buffer::list &value ///< [in] value to be merged into key
11fdf7f2 131 ) { ceph_abort_msg("Not implemented"); }
7c673cae
FG
132
133 virtual ~TransactionImpl() {}
134 };
11fdf7f2 135 typedef std::shared_ptr< TransactionImpl > Transaction;
7c673cae
FG
136
137 /// create a new instance
138 static KeyValueDB *create(CephContext *cct, const std::string& type,
139 const std::string& dir,
9f95a23c 140 std::map<std::string,std::string> options = {},
7c673cae
FG
141 void *p = NULL);
142
143 /// test whether we can successfully initialize; may have side effects (e.g., create)
144 static int test_init(const std::string& type, const std::string& dir);
f67539c2
TL
145 virtual int init(std::string option_str="") = 0;
146 virtual int open(std::ostream &out, const std::string& cfs="") = 0;
9f95a23c 147 // std::vector cfs contains column families to be created when db is created.
f67539c2 148 virtual int create_and_open(std::ostream &out, const std::string& cfs="") = 0;
11fdf7f2 149
f67539c2 150 virtual int open_read_only(std::ostream &out, const std::string& cfs="") {
11fdf7f2
TL
151 return -ENOTSUP;
152 }
153
7c673cae
FG
154 virtual void close() { }
155
11fdf7f2
TL
156 /// Try to repair K/V database. leveldb and rocksdb require that database must be not opened.
157 virtual int repair(std::ostream &out) { return 0; }
158
7c673cae
FG
159 virtual Transaction get_transaction() = 0;
160 virtual int submit_transaction(Transaction) = 0;
161 virtual int submit_transaction_sync(Transaction t) {
162 return submit_transaction(t);
163 }
164
165 /// Retrieve Keys
166 virtual int get(
11fdf7f2
TL
167 const std::string &prefix, ///< [in] Prefix/CF for key
168 const std::set<std::string> &key, ///< [in] Key to retrieve
9f95a23c 169 std::map<std::string, ceph::buffer::list> *out ///< [out] Key value retrieved
7c673cae 170 ) = 0;
11fdf7f2 171 virtual int get(const std::string &prefix, ///< [in] prefix or CF name
7c673cae 172 const std::string &key, ///< [in] key
9f95a23c 173 ceph::buffer::list *value) { ///< [out] value
7c673cae
FG
174 std::set<std::string> ks;
175 ks.insert(key);
9f95a23c 176 std::map<std::string,ceph::buffer::list> om;
7c673cae
FG
177 int r = get(prefix, ks, &om);
178 if (om.find(key) != om.end()) {
11fdf7f2 179 *value = std::move(om[key]);
7c673cae 180 } else {
9f95a23c 181 *value = ceph::buffer::list();
7c673cae
FG
182 r = -ENOENT;
183 }
184 return r;
185 }
f67539c2 186 virtual int get(const std::string &prefix,
7c673cae 187 const char *key, size_t keylen,
9f95a23c 188 ceph::buffer::list *value) {
f67539c2 189 return get(prefix, std::string(key, keylen), value);
7c673cae
FG
190 }
191
11fdf7f2
TL
192 // This superclass is used both by kv iterators *and* by the ObjectMap
193 // omap iterator. The class hierarchies are unfortunately tied together
194 // by the legacy DBOjectMap implementation :(.
195 class SimplestIteratorImpl {
7c673cae
FG
196 public:
197 virtual int seek_to_first() = 0;
198 virtual int upper_bound(const std::string &after) = 0;
199 virtual int lower_bound(const std::string &to) = 0;
200 virtual bool valid() = 0;
11fdf7f2 201 virtual int next() = 0;
7c673cae 202 virtual std::string key() = 0;
9f95a23c
TL
203 virtual std::string tail_key() {
204 return "";
205 }
206 virtual ceph::buffer::list value() = 0;
7c673cae 207 virtual int status() = 0;
11fdf7f2
TL
208 virtual ~SimplestIteratorImpl() {}
209 };
210
211 class IteratorImpl : public SimplestIteratorImpl {
212 public:
213 virtual ~IteratorImpl() {}
214 virtual int seek_to_last() = 0;
215 virtual int prev() = 0;
216 virtual std::pair<std::string, std::string> raw_key() = 0;
9f95a23c
TL
217 virtual ceph::buffer::ptr value_as_ptr() {
218 ceph::buffer::list bl = value();
11fdf7f2
TL
219 if (bl.length() == 1) {
220 return *bl.buffers().begin();
221 } else if (bl.length() == 0) {
9f95a23c 222 return ceph::buffer::ptr();
11fdf7f2
TL
223 } else {
224 ceph_abort();
225 }
226 }
7c673cae 227 };
11fdf7f2 228 typedef std::shared_ptr< IteratorImpl > Iterator;
7c673cae 229
11fdf7f2 230 // This is the low-level iterator implemented by the underlying KV store.
7c673cae
FG
231 class WholeSpaceIteratorImpl {
232 public:
233 virtual int seek_to_first() = 0;
234 virtual int seek_to_first(const std::string &prefix) = 0;
235 virtual int seek_to_last() = 0;
236 virtual int seek_to_last(const std::string &prefix) = 0;
237 virtual int upper_bound(const std::string &prefix, const std::string &after) = 0;
238 virtual int lower_bound(const std::string &prefix, const std::string &to) = 0;
239 virtual bool valid() = 0;
240 virtual int next() = 0;
241 virtual int prev() = 0;
242 virtual std::string key() = 0;
243 virtual std::pair<std::string,std::string> raw_key() = 0;
244 virtual bool raw_key_is_prefixed(const std::string &prefix) = 0;
9f95a23c
TL
245 virtual ceph::buffer::list value() = 0;
246 virtual ceph::buffer::ptr value_as_ptr() {
247 ceph::buffer::list bl = value();
7c673cae
FG
248 if (bl.length()) {
249 return *bl.buffers().begin();
250 } else {
9f95a23c 251 return ceph::buffer::ptr();
7c673cae
FG
252 }
253 }
254 virtual int status() = 0;
255 virtual size_t key_size() {
256 return 0;
257 }
258 virtual size_t value_size() {
259 return 0;
260 }
261 virtual ~WholeSpaceIteratorImpl() { }
262 };
11fdf7f2 263 typedef std::shared_ptr< WholeSpaceIteratorImpl > WholeSpaceIterator;
7c673cae 264
11fdf7f2
TL
265private:
266 // This class filters a WholeSpaceIterator by a prefix.
267 class PrefixIteratorImpl : public IteratorImpl {
7c673cae
FG
268 const std::string prefix;
269 WholeSpaceIterator generic_iter;
270 public:
11fdf7f2 271 PrefixIteratorImpl(const std::string &prefix, WholeSpaceIterator iter) :
7c673cae 272 prefix(prefix), generic_iter(iter) { }
11fdf7f2 273 ~PrefixIteratorImpl() override { }
7c673cae
FG
274
275 int seek_to_first() override {
276 return generic_iter->seek_to_first(prefix);
277 }
11fdf7f2 278 int seek_to_last() override {
7c673cae
FG
279 return generic_iter->seek_to_last(prefix);
280 }
281 int upper_bound(const std::string &after) override {
282 return generic_iter->upper_bound(prefix, after);
283 }
284 int lower_bound(const std::string &to) override {
285 return generic_iter->lower_bound(prefix, to);
286 }
287 bool valid() override {
288 if (!generic_iter->valid())
289 return false;
290 return generic_iter->raw_key_is_prefixed(prefix);
291 }
11fdf7f2
TL
292 int next() override {
293 return generic_iter->next();
7c673cae 294 }
11fdf7f2
TL
295 int prev() override {
296 return generic_iter->prev();
7c673cae
FG
297 }
298 std::string key() override {
299 return generic_iter->key();
300 }
11fdf7f2 301 std::pair<std::string, std::string> raw_key() override {
7c673cae
FG
302 return generic_iter->raw_key();
303 }
9f95a23c 304 ceph::buffer::list value() override {
7c673cae
FG
305 return generic_iter->value();
306 }
9f95a23c 307 ceph::buffer::ptr value_as_ptr() override {
7c673cae
FG
308 return generic_iter->value_as_ptr();
309 }
310 int status() override {
311 return generic_iter->status();
312 }
313 };
11fdf7f2 314public:
f67539c2
TL
315 typedef uint32_t IteratorOpts;
316 static const uint32_t ITERATOR_NOCACHE = 1;
317 virtual WholeSpaceIterator get_wholespace_iterator(IteratorOpts opts = 0) = 0;
318 virtual Iterator get_iterator(const std::string &prefix, IteratorOpts opts = 0) {
11fdf7f2
TL
319 return std::make_shared<PrefixIteratorImpl>(
320 prefix,
f67539c2 321 get_wholespace_iterator(opts));
7c673cae
FG
322 }
323
324 virtual uint64_t get_estimated_size(std::map<std::string,uint64_t> &extra) = 0;
325 virtual int get_statfs(struct store_statfs_t *buf) {
326 return -EOPNOTSUPP;
327 }
328
31f18b77
FG
329 virtual int set_cache_size(uint64_t) {
330 return -EOPNOTSUPP;
331 }
332
11fdf7f2 333 virtual int set_cache_high_pri_pool_ratio(double ratio) {
91327a77
AA
334 return -EOPNOTSUPP;
335 }
336
11fdf7f2 337 virtual int64_t get_cache_usage() const {
91327a77
AA
338 return -EOPNOTSUPP;
339 }
340
f67539c2
TL
341 virtual int64_t get_cache_usage(std::string prefix) const {
342 return -EOPNOTSUPP;
343 }
344
11fdf7f2
TL
345 virtual std::shared_ptr<PriorityCache::PriCache> get_priority_cache() const {
346 return nullptr;
91327a77
AA
347 }
348
f67539c2
TL
349 virtual std::shared_ptr<PriorityCache::PriCache> get_priority_cache(std::string prefix) const {
350 return nullptr;
351 }
352
353
354
11fdf7f2 355 virtual ~KeyValueDB() {}
91327a77 356
11fdf7f2 357 /// estimate space utilization for a prefix (in bytes)
f67539c2
TL
358 virtual int64_t estimate_prefix_size(const std::string& prefix,
359 const std::string& key_prefix) {
11fdf7f2 360 return 0;
91327a77
AA
361 }
362
7c673cae
FG
363 /// compact the underlying store
364 virtual void compact() {}
365
11fdf7f2
TL
366 /// compact the underlying store in async mode
367 virtual void compact_async() {}
368
7c673cae
FG
369 /// compact db for all keys with a given prefix
370 virtual void compact_prefix(const std::string& prefix) {}
371 /// compact db for all keys with a given prefix, async
372 virtual void compact_prefix_async(const std::string& prefix) {}
373 virtual void compact_range(const std::string& prefix,
374 const std::string& start, const std::string& end) {}
375 virtual void compact_range_async(const std::string& prefix,
376 const std::string& start, const std::string& end) {}
377
378 // See RocksDB merge operator definition, we support the basic
379 // associative merge only right now.
380 class MergeOperator {
381 public:
382 /// Merge into a key that doesn't exist
383 virtual void merge_nonexistent(
384 const char *rdata, size_t rlen,
385 std::string *new_value) = 0;
386 /// Merge into a key that does exist
387 virtual void merge(
388 const char *ldata, size_t llen,
389 const char *rdata, size_t rlen,
390 std::string *new_value) = 0;
391 /// We use each operator name and each prefix to construct the overall RocksDB operator name for consistency check at open time.
91327a77 392 virtual const char *name() const = 0;
7c673cae
FG
393
394 virtual ~MergeOperator() {}
395 };
396
397 /// Setup one or more operators, this needs to be done BEFORE the DB is opened.
398 virtual int set_merge_operator(const std::string& prefix,
399 std::shared_ptr<MergeOperator> mop) {
400 return -EOPNOTSUPP;
401 }
402
9f95a23c 403 virtual void get_statistics(ceph::Formatter *f) {
7c673cae
FG
404 return;
405 }
3efd9988
FG
406
407 /**
408 * Return your perf counters if you have any. Subclasses are not
409 * required to implement this, and callers must respect a null return
410 * value.
411 */
412 virtual PerfCounters *get_perf_counters() {
413 return nullptr;
414 }
9f95a23c
TL
415
416 /**
417 * Access implementation specific integral property corresponding
418 * to passed property and prefic.
419 * Return value is true if property is valid for prefix, populates out.
420 */
421 virtual bool get_property(
422 const std::string &property,
423 uint64_t *out) {
424 return false;
425 }
7c673cae 426protected:
11fdf7f2 427 /// List of matching prefixes/ColumnFamilies and merge operators
7c673cae
FG
428 std::vector<std::pair<std::string,
429 std::shared_ptr<MergeOperator> > > merge_ops;
430
7c673cae
FG
431};
432
433#endif