1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include "include/buffer.h"
12 #include <boost/scoped_ptr.hpp>
13 #include "include/encoding.h"
14 #include "common/Formatter.h"
15 #include "common/perf_counters.h"
16 #include "common/PriorityCache.h"
19 * Defines virtual interface to be implemented by key value store
21 * Kyoto Cabinet should implement this
25 class TransactionImpl
{
29 const std::string
&prefix
, ///< [in] Prefix for keys, or CF name
30 const std::map
<std::string
, ceph::buffer::list
> &to_set
///< [in] keys/values to set
32 for (auto it
= to_set
.cbegin(); it
!= to_set
.cend(); ++it
)
33 set(prefix
, it
->first
, it
->second
);
36 /// Set Keys (via encoded ceph::buffer::list)
38 const std::string
&prefix
, ///< [in] prefix, or CF name
39 ceph::buffer::list
& to_set_bl
///< [in] encoded key/values to set
42 auto p
= std::cbegin(to_set_bl
);
47 ceph::buffer::list value
;
50 set(prefix
, key
, value
);
56 const std::string
&prefix
, ///< [in] Prefix or CF for the key
57 const std::string
&k
, ///< [in] Key to set
58 const ceph::buffer::list
&bl
///< [in] Value to set
61 const std::string
&prefix
,
64 const ceph::buffer::list
& bl
) {
65 set(prefix
, std::string(k
, keylen
), bl
);
68 /// Removes Keys (via encoded ceph::buffer::list)
70 const std::string
&prefix
, ///< [in] Prefix or CF to search for
71 ceph::buffer::list
&keys_bl
///< [in] Keys to remove
74 auto p
= std::cbegin(keys_bl
);
86 const std::string
&prefix
, ///< [in] Prefix/CF to search for
87 const std::set
<std::string
> &keys
///< [in] Keys to remove
89 for (auto it
= keys
.cbegin(); it
!= keys
.cend(); ++it
)
95 const std::string
&prefix
, ///< [in] Prefix/CF to search for
96 const std::string
&k
///< [in] Key to remove
99 const std::string
&prefix
, ///< [in] Prefix to search for
100 const char *k
, ///< [in] Key to remove
103 rmkey(prefix
, std::string(k
, keylen
));
106 /// Remove Single Key which exists and was not overwritten.
107 /// This API is only related to performance optimization, and should only be
108 /// re-implemented by log-insert-merge tree based keyvalue stores(such as RocksDB).
109 /// If a key is overwritten (by calling set multiple times), then the result
110 /// of calling rm_single_key on this key is undefined.
111 virtual void rm_single_key(
112 const std::string
&prefix
, ///< [in] Prefix/CF to search for
113 const std::string
&k
///< [in] Key to remove
114 ) { return rmkey(prefix
, k
);}
116 /// Removes keys beginning with prefix
117 virtual void rmkeys_by_prefix(
118 const std::string
&prefix
///< [in] Prefix/CF by which to remove keys
121 virtual void rm_range_keys(
122 const std::string
&prefix
, ///< [in] Prefix by which to remove keys
123 const std::string
&start
, ///< [in] The start bound of remove keys
124 const std::string
&end
///< [in] The start bound of remove keys
127 /// Merge value into key
129 const std::string
&prefix
, ///< [in] Prefix/CF ==> MUST match some established merge operator
130 const std::string
&key
, ///< [in] Key to be merged
131 const ceph::buffer::list
&value
///< [in] value to be merged into key
132 ) { ceph_abort_msg("Not implemented"); }
134 virtual ~TransactionImpl() {}
136 typedef std::shared_ptr
< TransactionImpl
> Transaction
;
138 /// create a new instance
139 static KeyValueDB
*create(CephContext
*cct
, const std::string
& type
,
140 const std::string
& dir
,
141 std::map
<std::string
,std::string
> options
= {},
144 /// test whether we can successfully initialize; may have side effects (e.g., create)
145 static int test_init(const std::string
& type
, const std::string
& dir
);
146 virtual int init(std::string option_str
="") = 0;
147 virtual int open(std::ostream
&out
, const std::string
& cfs
="") = 0;
148 // std::vector cfs contains column families to be created when db is created.
149 virtual int create_and_open(std::ostream
&out
, const std::string
& cfs
="") = 0;
151 virtual int open_read_only(std::ostream
&out
, const std::string
& cfs
="") {
155 virtual void close() { }
157 /// Try to repair K/V database. rocksdb requires that database must be not opened.
158 virtual int repair(std::ostream
&out
) { return 0; }
160 virtual Transaction
get_transaction() = 0;
161 virtual int submit_transaction(Transaction
) = 0;
162 virtual int submit_transaction_sync(Transaction t
) {
163 return submit_transaction(t
);
168 const std::string
&prefix
, ///< [in] Prefix/CF for key
169 const std::set
<std::string
> &key
, ///< [in] Key to retrieve
170 std::map
<std::string
, ceph::buffer::list
> *out
///< [out] Key value retrieved
172 virtual int get(const std::string
&prefix
, ///< [in] prefix or CF name
173 const std::string
&key
, ///< [in] key
174 ceph::buffer::list
*value
) { ///< [out] value
175 std::set
<std::string
> ks
;
177 std::map
<std::string
,ceph::buffer::list
> om
;
178 int r
= get(prefix
, ks
, &om
);
179 if (om
.find(key
) != om
.end()) {
180 *value
= std::move(om
[key
]);
182 *value
= ceph::buffer::list();
187 virtual int get(const std::string
&prefix
,
188 const char *key
, size_t keylen
,
189 ceph::buffer::list
*value
) {
190 return get(prefix
, std::string(key
, keylen
), value
);
193 // This superclass is used both by kv iterators *and* by the ObjectMap
194 // omap iterator. The class hierarchies are unfortunately tied together
195 // by the legacy DBOjectMap implementation :(.
196 class SimplestIteratorImpl
{
198 virtual int seek_to_first() = 0;
199 virtual int upper_bound(const std::string
&after
) = 0;
200 virtual int lower_bound(const std::string
&to
) = 0;
201 virtual bool valid() = 0;
202 virtual int next() = 0;
203 virtual std::string
key() = 0;
204 virtual std::string
tail_key() {
207 virtual ceph::buffer::list
value() = 0;
208 virtual int status() = 0;
209 virtual ~SimplestIteratorImpl() {}
212 class IteratorImpl
: public SimplestIteratorImpl
{
214 virtual ~IteratorImpl() {}
215 virtual int seek_to_last() = 0;
216 virtual int prev() = 0;
217 virtual std::pair
<std::string
, std::string
> raw_key() = 0;
218 virtual ceph::buffer::ptr
value_as_ptr() {
219 ceph::buffer::list bl
= value();
220 if (bl
.length() == 1) {
221 return *bl
.buffers().begin();
222 } else if (bl
.length() == 0) {
223 return ceph::buffer::ptr();
229 typedef std::shared_ptr
< IteratorImpl
> Iterator
;
231 // This is the low-level iterator implemented by the underlying KV store.
232 class WholeSpaceIteratorImpl
{
234 virtual int seek_to_first() = 0;
235 virtual int seek_to_first(const std::string
&prefix
) = 0;
236 virtual int seek_to_last() = 0;
237 virtual int seek_to_last(const std::string
&prefix
) = 0;
238 virtual int upper_bound(const std::string
&prefix
, const std::string
&after
) = 0;
239 virtual int lower_bound(const std::string
&prefix
, const std::string
&to
) = 0;
240 virtual bool valid() = 0;
241 virtual int next() = 0;
242 virtual int prev() = 0;
243 virtual std::string
key() = 0;
244 virtual std::pair
<std::string
,std::string
> raw_key() = 0;
245 virtual bool raw_key_is_prefixed(const std::string
&prefix
) = 0;
246 virtual ceph::buffer::list
value() = 0;
247 virtual ceph::buffer::ptr
value_as_ptr() {
248 ceph::buffer::list bl
= value();
250 return *bl
.buffers().begin();
252 return ceph::buffer::ptr();
255 virtual int status() = 0;
256 virtual size_t key_size() {
259 virtual size_t value_size() {
262 virtual ~WholeSpaceIteratorImpl() { }
264 typedef std::shared_ptr
< WholeSpaceIteratorImpl
> WholeSpaceIterator
;
267 // This class filters a WholeSpaceIterator by a prefix.
268 // Performs as a dummy wrapper over WholeSpaceIterator
269 // if prefix is empty
270 class PrefixIteratorImpl
: public IteratorImpl
{
271 const std::string prefix
;
272 WholeSpaceIterator generic_iter
;
274 PrefixIteratorImpl(const std::string
&prefix
, WholeSpaceIterator iter
) :
275 prefix(prefix
), generic_iter(iter
) { }
276 ~PrefixIteratorImpl() override
{ }
278 int seek_to_first() override
{
279 return prefix
.empty() ?
280 generic_iter
->seek_to_first() :
281 generic_iter
->seek_to_first(prefix
);
283 int seek_to_last() override
{
284 return prefix
.empty() ?
285 generic_iter
->seek_to_last() :
286 generic_iter
->seek_to_last(prefix
);
288 int upper_bound(const std::string
&after
) override
{
289 return generic_iter
->upper_bound(prefix
, after
);
291 int lower_bound(const std::string
&to
) override
{
292 return generic_iter
->lower_bound(prefix
, to
);
294 bool valid() override
{
295 if (!generic_iter
->valid())
299 return prefix
.empty() ?
301 generic_iter
->raw_key_is_prefixed(prefix
);
303 int next() override
{
304 return generic_iter
->next();
306 int prev() override
{
307 return generic_iter
->prev();
309 std::string
key() override
{
310 return generic_iter
->key();
312 std::pair
<std::string
, std::string
> raw_key() override
{
313 return generic_iter
->raw_key();
315 ceph::buffer::list
value() override
{
316 return generic_iter
->value();
318 ceph::buffer::ptr
value_as_ptr() override
{
319 return generic_iter
->value_as_ptr();
321 int status() override
{
322 return generic_iter
->status();
326 typedef uint32_t IteratorOpts
;
327 static const uint32_t ITERATOR_NOCACHE
= 1;
329 struct IteratorBounds
{
330 std::optional
<std::string
> lower_bound
;
331 std::optional
<std::string
> upper_bound
;
334 virtual WholeSpaceIterator
get_wholespace_iterator(IteratorOpts opts
= 0) = 0;
335 virtual Iterator
get_iterator(const std::string
&prefix
, IteratorOpts opts
= 0, IteratorBounds bounds
= IteratorBounds()) {
336 return std::make_shared
<PrefixIteratorImpl
>(
338 get_wholespace_iterator(opts
));
341 virtual uint64_t get_estimated_size(std::map
<std::string
,uint64_t> &extra
) = 0;
342 virtual int get_statfs(struct store_statfs_t
*buf
) {
346 virtual int set_cache_size(uint64_t) {
350 virtual int set_cache_high_pri_pool_ratio(double ratio
) {
354 virtual int64_t get_cache_usage() const {
358 virtual int64_t get_cache_usage(std::string prefix
) const {
362 virtual std::shared_ptr
<PriorityCache::PriCache
> get_priority_cache() const {
366 virtual std::shared_ptr
<PriorityCache::PriCache
> get_priority_cache(std::string prefix
) const {
372 virtual ~KeyValueDB() {}
374 /// estimate space utilization for a prefix (in bytes)
375 virtual int64_t estimate_prefix_size(const std::string
& prefix
,
376 const std::string
& key_prefix
) {
380 /// compact the underlying store
381 virtual void compact() {}
383 /// compact the underlying store in async mode
384 virtual void compact_async() {}
386 /// compact db for all keys with a given prefix
387 virtual void compact_prefix(const std::string
& prefix
) {}
388 /// compact db for all keys with a given prefix, async
389 virtual void compact_prefix_async(const std::string
& prefix
) {}
390 virtual void compact_range(const std::string
& prefix
,
391 const std::string
& start
, const std::string
& end
) {}
392 virtual void compact_range_async(const std::string
& prefix
,
393 const std::string
& start
, const std::string
& end
) {}
395 // See RocksDB merge operator definition, we support the basic
396 // associative merge only right now.
397 class MergeOperator
{
399 /// Merge into a key that doesn't exist
400 virtual void merge_nonexistent(
401 const char *rdata
, size_t rlen
,
402 std::string
*new_value
) = 0;
403 /// Merge into a key that does exist
405 const char *ldata
, size_t llen
,
406 const char *rdata
, size_t rlen
,
407 std::string
*new_value
) = 0;
408 /// We use each operator name and each prefix to construct the overall RocksDB operator name for consistency check at open time.
409 virtual const char *name() const = 0;
411 virtual ~MergeOperator() {}
414 /// Setup one or more operators, this needs to be done BEFORE the DB is opened.
415 virtual int set_merge_operator(const std::string
& prefix
,
416 std::shared_ptr
<MergeOperator
> mop
) {
420 virtual void get_statistics(ceph::Formatter
*f
) {
425 * Return your perf counters if you have any. Subclasses are not
426 * required to implement this, and callers must respect a null return
429 virtual PerfCounters
*get_perf_counters() {
434 * Access implementation specific integral property corresponding
435 * to passed property and prefic.
436 * Return value is true if property is valid for prefix, populates out.
438 virtual bool get_property(
439 const std::string
&property
,
444 /// List of matching prefixes/ColumnFamilies and merge operators
445 std::vector
<std::pair
<std::string
,
446 std::shared_ptr
<MergeOperator
> > > merge_ops
;