1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include "include/buffer.h"
12 #include <boost/scoped_ptr.hpp>
13 #include "include/encoding.h"
14 #include "common/Formatter.h"
15 #include "common/perf_counters.h"
16 #include "common/PriorityCache.h"
19 * Defines virtual interface to be implemented by key value store
21 * Kyoto Cabinet should implement this
25 class TransactionImpl
{
29 const std::string
&prefix
, ///< [in] Prefix for keys, or CF name
30 const std::map
<std::string
, ceph::buffer::list
> &to_set
///< [in] keys/values to set
32 for (auto it
= to_set
.cbegin(); it
!= to_set
.cend(); ++it
)
33 set(prefix
, it
->first
, it
->second
);
36 /// Set Keys (via encoded ceph::buffer::list)
38 const std::string
&prefix
, ///< [in] prefix, or CF name
39 ceph::buffer::list
& to_set_bl
///< [in] encoded key/values to set
42 auto p
= std::cbegin(to_set_bl
);
47 ceph::buffer::list value
;
50 set(prefix
, key
, value
);
56 const std::string
&prefix
, ///< [in] Prefix or CF for the key
57 const std::string
&k
, ///< [in] Key to set
58 const ceph::buffer::list
&bl
///< [in] Value to set
61 const std::string
&prefix
,
64 const ceph::buffer::list
& bl
) {
65 set(prefix
, std::string(k
, keylen
), bl
);
68 /// Removes Keys (via encoded ceph::buffer::list)
70 const std::string
&prefix
, ///< [in] Prefix or CF to search for
71 ceph::buffer::list
&keys_bl
///< [in] Keys to remove
74 auto p
= std::cbegin(keys_bl
);
86 const std::string
&prefix
, ///< [in] Prefix/CF to search for
87 const std::set
<std::string
> &keys
///< [in] Keys to remove
89 for (auto it
= keys
.cbegin(); it
!= keys
.cend(); ++it
)
95 const std::string
&prefix
, ///< [in] Prefix/CF to search for
96 const std::string
&k
///< [in] Key to remove
99 const std::string
&prefix
, ///< [in] Prefix to search for
100 const char *k
, ///< [in] Key to remove
103 rmkey(prefix
, std::string(k
, keylen
));
106 /// Remove Single Key which exists and was not overwritten.
107 /// This API is only related to performance optimization, and should only be
108 /// re-implemented by log-insert-merge tree based keyvalue stores(such as RocksDB).
109 /// If a key is overwritten (by calling set multiple times), then the result
110 /// of calling rm_single_key on this key is undefined.
111 virtual void rm_single_key(
112 const std::string
&prefix
, ///< [in] Prefix/CF to search for
113 const std::string
&k
///< [in] Key to remove
114 ) { return rmkey(prefix
, k
);}
116 /// Removes keys beginning with prefix
117 virtual void rmkeys_by_prefix(
118 const std::string
&prefix
///< [in] Prefix/CF by which to remove keys
121 virtual void rm_range_keys(
122 const std::string
&prefix
, ///< [in] Prefix by which to remove keys
123 const std::string
&start
, ///< [in] The start bound of remove keys
124 const std::string
&end
///< [in] The start bound of remove keys
127 /// Merge value into key
129 const std::string
&prefix
, ///< [in] Prefix/CF ==> MUST match some established merge operator
130 const std::string
&key
, ///< [in] Key to be merged
131 const ceph::buffer::list
&value
///< [in] value to be merged into key
132 ) { ceph_abort_msg("Not implemented"); }
134 virtual ~TransactionImpl() {}
136 typedef std::shared_ptr
< TransactionImpl
> Transaction
;
138 /// create a new instance
139 static KeyValueDB
*create(CephContext
*cct
, const std::string
& type
,
140 const std::string
& dir
,
141 std::map
<std::string
,std::string
> options
= {},
144 /// test whether we can successfully initialize; may have side effects (e.g., create)
145 static int test_init(const std::string
& type
, const std::string
& dir
);
146 virtual int init(std::string option_str
="") = 0;
147 virtual int open(std::ostream
&out
, const std::string
& cfs
="") = 0;
148 // std::vector cfs contains column families to be created when db is created.
149 virtual int create_and_open(std::ostream
&out
, const std::string
& cfs
="") = 0;
151 virtual int open_read_only(std::ostream
&out
, const std::string
& cfs
="") {
155 virtual void close() { }
157 /// Try to repair K/V database. rocksdb requires that database must be not opened.
158 virtual int repair(std::ostream
&out
) { return 0; }
160 virtual Transaction
get_transaction() = 0;
161 virtual int submit_transaction(Transaction
) = 0;
162 virtual int submit_transaction_sync(Transaction t
) {
163 return submit_transaction(t
);
168 const std::string
&prefix
, ///< [in] Prefix/CF for key
169 const std::set
<std::string
> &key
, ///< [in] Key to retrieve
170 std::map
<std::string
, ceph::buffer::list
> *out
///< [out] Key value retrieved
172 virtual int get(const std::string
&prefix
, ///< [in] prefix or CF name
173 const std::string
&key
, ///< [in] key
174 ceph::buffer::list
*value
) { ///< [out] value
175 std::set
<std::string
> ks
;
177 std::map
<std::string
,ceph::buffer::list
> om
;
178 int r
= get(prefix
, ks
, &om
);
179 if (om
.find(key
) != om
.end()) {
180 *value
= std::move(om
[key
]);
182 *value
= ceph::buffer::list();
187 virtual int get(const std::string
&prefix
,
188 const char *key
, size_t keylen
,
189 ceph::buffer::list
*value
) {
190 return get(prefix
, std::string(key
, keylen
), value
);
193 // This superclass is used both by kv iterators *and* by the ObjectMap
194 // omap iterator. The class hierarchies are unfortunately tied together
195 // by the legacy DBOjectMap implementation :(.
196 class SimplestIteratorImpl
{
198 virtual int seek_to_first() = 0;
199 virtual int upper_bound(const std::string
&after
) = 0;
200 virtual int lower_bound(const std::string
&to
) = 0;
201 virtual bool valid() = 0;
202 virtual int next() = 0;
203 virtual std::string
key() = 0;
204 virtual std::string
tail_key() {
207 virtual ceph::buffer::list
value() = 0;
208 virtual int status() = 0;
209 virtual ~SimplestIteratorImpl() {}
212 class IteratorImpl
: public SimplestIteratorImpl
{
214 virtual ~IteratorImpl() {}
215 virtual int seek_to_last() = 0;
216 virtual int prev() = 0;
217 virtual std::pair
<std::string
, std::string
> raw_key() = 0;
218 virtual ceph::buffer::ptr
value_as_ptr() {
219 ceph::buffer::list bl
= value();
220 if (bl
.length() == 1) {
221 return *bl
.buffers().begin();
222 } else if (bl
.length() == 0) {
223 return ceph::buffer::ptr();
229 typedef std::shared_ptr
< IteratorImpl
> Iterator
;
231 // This is the low-level iterator implemented by the underlying KV store.
232 class WholeSpaceIteratorImpl
{
234 virtual int seek_to_first() = 0;
235 virtual int seek_to_first(const std::string
&prefix
) = 0;
236 virtual int seek_to_last() = 0;
237 virtual int seek_to_last(const std::string
&prefix
) = 0;
238 virtual int upper_bound(const std::string
&prefix
, const std::string
&after
) = 0;
239 virtual int lower_bound(const std::string
&prefix
, const std::string
&to
) = 0;
240 virtual bool valid() = 0;
241 virtual int next() = 0;
242 virtual int prev() = 0;
243 virtual std::string
key() = 0;
244 virtual std::pair
<std::string
,std::string
> raw_key() = 0;
245 virtual bool raw_key_is_prefixed(const std::string
&prefix
) = 0;
246 virtual ceph::buffer::list
value() = 0;
247 virtual ceph::buffer::ptr
value_as_ptr() {
248 ceph::buffer::list bl
= value();
250 return *bl
.buffers().begin();
252 return ceph::buffer::ptr();
255 virtual int status() = 0;
256 virtual size_t key_size() {
259 virtual size_t value_size() {
262 virtual ~WholeSpaceIteratorImpl() { }
264 typedef std::shared_ptr
< WholeSpaceIteratorImpl
> WholeSpaceIterator
;
267 // This class filters a WholeSpaceIterator by a prefix.
268 // Performs as a dummy wrapper over WholeSpaceIterator
269 // if prefix is empty
270 class PrefixIteratorImpl
: public IteratorImpl
{
271 const std::string prefix
;
272 WholeSpaceIterator generic_iter
;
274 PrefixIteratorImpl(const std::string
&prefix
, WholeSpaceIterator iter
) :
275 prefix(prefix
), generic_iter(iter
) { }
276 ~PrefixIteratorImpl() override
{ }
278 int seek_to_first() override
{
279 return prefix
.empty() ?
280 generic_iter
->seek_to_first() :
281 generic_iter
->seek_to_first(prefix
);
283 int seek_to_last() override
{
284 return prefix
.empty() ?
285 generic_iter
->seek_to_last() :
286 generic_iter
->seek_to_last(prefix
);
288 int upper_bound(const std::string
&after
) override
{
289 return generic_iter
->upper_bound(prefix
, after
);
291 int lower_bound(const std::string
&to
) override
{
292 return generic_iter
->lower_bound(prefix
, to
);
294 bool valid() override
{
295 if (!generic_iter
->valid())
299 return prefix
.empty() ?
301 generic_iter
->raw_key_is_prefixed(prefix
);
303 int next() override
{
304 return generic_iter
->next();
306 int prev() override
{
307 return generic_iter
->prev();
309 std::string
key() override
{
310 return generic_iter
->key();
312 std::pair
<std::string
, std::string
> raw_key() override
{
313 return generic_iter
->raw_key();
315 ceph::buffer::list
value() override
{
316 return generic_iter
->value();
318 ceph::buffer::ptr
value_as_ptr() override
{
319 return generic_iter
->value_as_ptr();
321 int status() override
{
322 return generic_iter
->status();
326 Iterator
make_iterator(const std::string
&prefix
, WholeSpaceIterator w_iter
) {
327 return std::make_shared
<PrefixIteratorImpl
>(
332 typedef uint32_t IteratorOpts
;
333 static const uint32_t ITERATOR_NOCACHE
= 1;
335 struct IteratorBounds
{
336 std::optional
<std::string
> lower_bound
;
337 std::optional
<std::string
> upper_bound
;
340 virtual WholeSpaceIterator
get_wholespace_iterator(IteratorOpts opts
= 0) = 0;
341 virtual Iterator
get_iterator(const std::string
&prefix
, IteratorOpts opts
= 0, IteratorBounds bounds
= IteratorBounds()) {
342 return make_iterator(prefix
,
343 get_wholespace_iterator(opts
));
346 virtual uint64_t get_estimated_size(std::map
<std::string
,uint64_t> &extra
) = 0;
347 virtual int get_statfs(struct store_statfs_t
*buf
) {
351 virtual int set_cache_size(uint64_t) {
355 virtual int set_cache_high_pri_pool_ratio(double ratio
) {
359 virtual int64_t get_cache_usage() const {
363 virtual int64_t get_cache_usage(std::string prefix
) const {
367 virtual std::shared_ptr
<PriorityCache::PriCache
> get_priority_cache() const {
371 virtual std::shared_ptr
<PriorityCache::PriCache
> get_priority_cache(std::string prefix
) const {
377 virtual ~KeyValueDB() {}
379 /// estimate space utilization for a prefix (in bytes)
380 virtual int64_t estimate_prefix_size(const std::string
& prefix
,
381 const std::string
& key_prefix
) {
385 /// compact the underlying store
386 virtual void compact() {}
388 /// compact the underlying store in async mode
389 virtual void compact_async() {}
391 /// compact db for all keys with a given prefix
392 virtual void compact_prefix(const std::string
& prefix
) {}
393 /// compact db for all keys with a given prefix, async
394 virtual void compact_prefix_async(const std::string
& prefix
) {}
395 virtual void compact_range(const std::string
& prefix
,
396 const std::string
& start
, const std::string
& end
) {}
397 virtual void compact_range_async(const std::string
& prefix
,
398 const std::string
& start
, const std::string
& end
) {}
400 // See RocksDB merge operator definition, we support the basic
401 // associative merge only right now.
402 class MergeOperator
{
404 /// Merge into a key that doesn't exist
405 virtual void merge_nonexistent(
406 const char *rdata
, size_t rlen
,
407 std::string
*new_value
) = 0;
408 /// Merge into a key that does exist
410 const char *ldata
, size_t llen
,
411 const char *rdata
, size_t rlen
,
412 std::string
*new_value
) = 0;
413 /// We use each operator name and each prefix to construct the overall RocksDB operator name for consistency check at open time.
414 virtual const char *name() const = 0;
416 virtual ~MergeOperator() {}
419 /// Setup one or more operators, this needs to be done BEFORE the DB is opened.
420 virtual int set_merge_operator(const std::string
& prefix
,
421 std::shared_ptr
<MergeOperator
> mop
) {
425 virtual void get_statistics(ceph::Formatter
*f
) {
430 * Return your perf counters if you have any. Subclasses are not
431 * required to implement this, and callers must respect a null return
434 virtual PerfCounters
*get_perf_counters() {
439 * Access implementation specific integral property corresponding
440 * to passed property and prefic.
441 * Return value is true if property is valid for prefix, populates out.
443 virtual bool get_property(
444 const std::string
&property
,
449 /// List of matching prefixes/ColumnFamilies and merge operators
450 std::vector
<std::pair
<std::string
,
451 std::shared_ptr
<MergeOperator
> > > merge_ops
;