]>
git.proxmox.com Git - ceph.git/blob - ceph/src/kv/MemDB.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * In-memory crash non-safe keyvalue db
5 * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
8 #include "include/compat.h"
15 #include <sys/types.h>
18 #include "common/perf_counters.h"
19 #include "common/debug.h"
20 #include "include/str_list.h"
21 #include "include/str_map.h"
22 #include "KeyValueDB.h"
25 #include "include/assert.h"
26 #include "common/debug.h"
27 #include "common/errno.h"
28 #include "include/compat.h"
30 #define dout_context g_ceph_context
31 #define dout_subsys ceph_subsys_memdb
33 #define dout_prefix *_dout << "memdb: "
34 #define dtrace dout(30)
38 static void split_key(const string
& raw_key
, string
*prefix
, string
*key
)
40 size_t pos
= raw_key
.find(KEY_DELIM
, 0);
41 assert(pos
!= std::string::npos
);
42 *prefix
= raw_key
.substr(0, pos
);
43 *key
= raw_key
.substr(pos
+ 1, raw_key
.length());
46 static string
make_key(const string
&prefix
, const string
&value
)
49 out
.push_back(KEY_DELIM
);
54 void MemDB::_encode(mdb_iter_t iter
, bufferlist
&bl
)
56 ::encode(iter
->first
, bl
);
57 ::encode(iter
->second
, bl
);
60 std::string
MemDB::_get_data_fn()
62 string fn
= m_db_path
+ "/" + "MemDB.db";
68 std::lock_guard
<std::mutex
> l(m_lock
);
69 dout(10) << __func__
<< " Saving MemDB to file: "<< _get_data_fn().c_str() << dendl
;
71 int fd
= TEMP_FAILURE_RETRY(::open(_get_data_fn().c_str(),
72 O_WRONLY
|O_CREAT
|O_TRUNC
, mode
));
75 cerr
<< "write_file(" << _get_data_fn().c_str() << "): failed to open file: "
76 << cpp_strerror(err
) << std::endl
;
80 mdb_iter_t iter
= m_map
.begin();
81 while (iter
!= m_map
.end()) {
82 dout(10) << __func__
<< " Key:"<< iter
->first
<< dendl
;
88 VOID_TEMP_FAILURE_RETRY(::close(fd
));
93 std::lock_guard
<std::mutex
> l(m_lock
);
94 dout(10) << __func__
<< " Reading MemDB from file: "<< _get_data_fn().c_str() << dendl
;
96 * Open file and read it in single shot.
98 int fd
= TEMP_FAILURE_RETRY(::open(_get_data_fn().c_str(), O_RDONLY
));
101 cerr
<< "can't open " << _get_data_fn().c_str() << ": "
102 << cpp_strerror(err
) << std::endl
;
107 memset(&st
, 0, sizeof(st
));
108 if (::fstat(fd
, &st
) < 0) {
110 cerr
<< "can't stat file " << _get_data_fn().c_str() << ": "
111 << cpp_strerror(err
) << std::endl
;
112 VOID_TEMP_FAILURE_RETRY(::close(fd
));
116 ssize_t file_size
= st
.st_size
;
117 ssize_t bytes_done
= 0;
118 while (bytes_done
< file_size
) {
122 bytes_done
+= ::decode_file(fd
, key
);
123 bytes_done
+= ::decode_file(fd
, datap
);
125 dout(10) << __func__
<< " Key:"<< key
<< dendl
;
127 m_total_bytes
+= datap
.length();
129 VOID_TEMP_FAILURE_RETRY(::close(fd
));
133 int MemDB::_init(bool create
)
136 dout(1) << __func__
<< dendl
;
138 r
= ::mkdir(m_db_path
.c_str(), 0700);
142 derr
<< __func__
<< " mkdir failed: " << cpp_strerror(r
) << dendl
;
145 return 0; // ignore EEXIST
154 int MemDB::set_merge_operator(
155 const string
& prefix
,
156 std::shared_ptr
<KeyValueDB::MergeOperator
> mop
)
158 merge_ops
.push_back(std::make_pair(prefix
, mop
));
162 int MemDB::do_open(ostream
&out
, bool create
)
165 m_allocated_bytes
= 1;
167 return _init(create
);
173 dout(10) << __func__
<< " Destroying MemDB instance: "<< dendl
;
179 * Save whatever in memory btree.
184 int MemDB::submit_transaction(KeyValueDB::Transaction t
)
186 MDBTransactionImpl
* mt
= static_cast<MDBTransactionImpl
*>(t
.get());
188 dtrace
<< __func__
<< " " << mt
->get_ops().size() << dendl
;
189 for(auto& op
: mt
->get_ops()) {
190 if(op
.first
== MDBTransactionImpl::WRITE
) {
191 ms_op_t set_op
= op
.second
;
193 } else if (op
.first
== MDBTransactionImpl::MERGE
) {
194 ms_op_t merge_op
= op
.second
;
197 ms_op_t rm_op
= op
.second
;
198 assert(op
.first
== MDBTransactionImpl::DELETE
);
206 int MemDB::submit_transaction_sync(KeyValueDB::Transaction tsync
)
208 dtrace
<< __func__
<< " " << dendl
;
209 submit_transaction(tsync
);
213 int MemDB::transaction_rollback(KeyValueDB::Transaction t
)
215 MDBTransactionImpl
* mt
= static_cast<MDBTransactionImpl
*>(t
.get());
220 void MemDB::MDBTransactionImpl::set(
221 const string
&prefix
, const string
&k
, const bufferlist
&to_set_bl
)
223 dtrace
<< __func__
<< " " << prefix
<< " " << k
<< dendl
;
224 ops
.push_back(make_pair(WRITE
, std::make_pair(std::make_pair(prefix
, k
),
228 void MemDB::MDBTransactionImpl::rmkey(const string
&prefix
,
231 dtrace
<< __func__
<< " " << prefix
<< " " << k
<< dendl
;
232 ops
.push_back(make_pair(DELETE
,
233 std::make_pair(std::make_pair(prefix
, k
),
237 void MemDB::MDBTransactionImpl::rmkeys_by_prefix(const string
&prefix
)
239 KeyValueDB::Iterator it
= m_db
->get_iterator(prefix
);
240 for (it
->seek_to_first(); it
->valid(); it
->next()) {
241 rmkey(prefix
, it
->key());
245 void MemDB::MDBTransactionImpl::rm_range_keys(const string
&prefix
, const string
&start
, const string
&end
)
247 KeyValueDB::Iterator it
= m_db
->get_iterator(prefix
);
248 it
->lower_bound(start
);
249 while (it
->valid()) {
250 if (it
->key() >= end
) {
253 rmkey(prefix
, it
->key());
258 void MemDB::MDBTransactionImpl::merge(
259 const std::string
&prefix
, const std::string
&key
, const bufferlist
&value
)
262 dtrace
<< __func__
<< " " << prefix
<< " " << key
<< dendl
;
263 ops
.push_back(make_pair(MERGE
, make_pair(std::make_pair(prefix
, key
), value
)));
267 int MemDB::_setkey(ms_op_t
&op
)
269 std::lock_guard
<std::mutex
> l(m_lock
);
270 std::string key
= make_key(op
.first
.first
, op
.first
.second
);
271 bufferlist bl
= op
.second
;
273 m_total_bytes
+= bl
.length();
276 if (_get(op
.first
.first
, op
.first
.second
, &bl_old
)) {
278 * delete and free existing key.
280 assert(m_total_bytes
>= bl_old
.length());
281 m_total_bytes
-= bl_old
.length();
285 m_map
[key
] = bufferptr((char *) bl
.c_str(), bl
.length());
290 int MemDB::_rmkey(ms_op_t
&op
)
292 std::lock_guard
<std::mutex
> l(m_lock
);
293 std::string key
= make_key(op
.first
.first
, op
.first
.second
);
296 if (_get(op
.first
.first
, op
.first
.second
, &bl_old
)) {
297 assert(m_total_bytes
>= bl_old
.length());
298 m_total_bytes
-= bl_old
.length();
302 * Erase will call the destructor for bufferptr.
304 return m_map
.erase(key
);
307 std::shared_ptr
<KeyValueDB::MergeOperator
> MemDB::_find_merge_op(std::string prefix
)
309 for (const auto& i
: merge_ops
) {
310 if (i
.first
== prefix
) {
315 dtrace
<< __func__
<< " No merge op for " << prefix
<< dendl
;
320 int MemDB::_merge(ms_op_t
&op
)
322 std::lock_guard
<std::mutex
> l(m_lock
);
323 std::string prefix
= op
.first
.first
;
324 std::string key
= make_key(op
.first
.first
, op
.first
.second
);
325 bufferlist bl
= op
.second
;
326 int64_t bytes_adjusted
= bl
.length();
329 * find the operator for this prefix
331 std::shared_ptr
<MergeOperator
> mop
= _find_merge_op(prefix
);
335 * call the merge operator with value and non value
338 if (_get(op
.first
.first
, op
.first
.second
, &bl_old
) == false) {
341 * Merge non existent.
343 mop
->merge_nonexistent(bl
.c_str(), bl
.length(), &new_val
);
344 m_map
[key
] = bufferptr(new_val
.c_str(), new_val
.length());
350 mop
->merge(bl_old
.c_str(), bl_old
.length(), bl
.c_str(), bl
.length(), &new_val
);
351 m_map
[key
] = bufferptr(new_val
.c_str(), new_val
.length());
352 bytes_adjusted
-= bl_old
.length();
356 assert((int64_t)m_total_bytes
+ bytes_adjusted
>= 0);
357 m_total_bytes
+= bytes_adjusted
;
363 * Caller take btree lock.
365 bool MemDB::_get(const string
&prefix
, const string
&k
, bufferlist
*out
)
367 string key
= make_key(prefix
, k
);
369 mdb_iter_t iter
= m_map
.find(key
);
370 if (iter
== m_map
.end()) {
374 out
->push_back((m_map
[key
].clone()));
378 bool MemDB::_get_locked(const string
&prefix
, const string
&k
, bufferlist
*out
)
380 std::lock_guard
<std::mutex
> l(m_lock
);
381 return _get(prefix
, k
, out
);
385 int MemDB::get(const string
&prefix
, const std::string
& key
,
388 if (_get_locked(prefix
, key
, out
)) {
394 int MemDB::get(const string
&prefix
, const std::set
<string
> &keys
,
395 std::map
<string
, bufferlist
> *out
)
397 for (const auto& i
: keys
) {
399 if (_get_locked(prefix
, i
, &bl
))
400 out
->insert(make_pair(i
, bl
));
406 void MemDB::MDBWholeSpaceIteratorImpl::fill_current()
409 bl
.append(m_iter
->second
.clone());
410 m_key_value
= std::make_pair(m_iter
->first
, bl
);
413 bool MemDB::MDBWholeSpaceIteratorImpl::valid()
415 if (m_key_value
.first
.empty()) {
421 bool MemDB::MDBWholeSpaceIteratorImpl::iterator_validate() {
423 if (this_seq_no
!= *global_seq_no
) {
424 auto key
= m_key_value
.first
;
425 assert(!key
.empty());
427 bool restart_iter
= false;
428 if (!m_using_btree
) {
430 * Map is modified and marker key does not exists,
431 * restart the iterator from next key.
433 if (m_map_p
->find(key
) == m_map_p
->end()) {
441 m_iter
= m_map_p
->lower_bound(key
);
442 if (m_iter
== m_map_p
->end()) {
448 * This iter is valid now.
450 this_seq_no
= *global_seq_no
;
457 MemDB::MDBWholeSpaceIteratorImpl::free_last()
459 m_key_value
.first
.clear();
460 m_key_value
.second
.clear();
463 string
MemDB::MDBWholeSpaceIteratorImpl::key()
465 dtrace
<< __func__
<< " " << m_key_value
.first
<< dendl
;
467 split_key(m_key_value
.first
, &prefix
, &key
);
471 pair
<string
,string
> MemDB::MDBWholeSpaceIteratorImpl::raw_key()
474 split_key(m_key_value
.first
, &prefix
, &key
);
475 return make_pair(prefix
, key
);
478 bool MemDB::MDBWholeSpaceIteratorImpl::raw_key_is_prefixed(
479 const string
&prefix
)
482 split_key(m_key_value
.first
, &p
, &k
);
483 return (p
== prefix
);
486 bufferlist
MemDB::MDBWholeSpaceIteratorImpl::value()
488 dtrace
<< __func__
<< " " << m_key_value
<< dendl
;
489 return m_key_value
.second
;
492 int MemDB::MDBWholeSpaceIteratorImpl::next()
494 std::lock_guard
<std::mutex
> l(*m_map_lock_p
);
495 if (!iterator_validate()) {
501 if (m_iter
!= m_map_p
->end()) {
509 int MemDB::MDBWholeSpaceIteratorImpl:: prev()
511 std::lock_guard
<std::mutex
> l(*m_map_lock_p
);
512 if (!iterator_validate()) {
517 if (m_iter
!= m_map_p
->begin()) {
527 * First key >= to given key, if key is null then first key in btree.
529 int MemDB::MDBWholeSpaceIteratorImpl::seek_to_first(const std::string
&k
)
531 std::lock_guard
<std::mutex
> l(*m_map_lock_p
);
534 m_iter
= m_map_p
->begin();
536 m_iter
= m_map_p
->lower_bound(k
);
539 if (m_iter
== m_map_p
->end()) {
546 int MemDB::MDBWholeSpaceIteratorImpl::seek_to_last(const std::string
&k
)
548 std::lock_guard
<std::mutex
> l(*m_map_lock_p
);
551 m_iter
= m_map_p
->end();
554 m_iter
= m_map_p
->lower_bound(k
);
557 if (m_iter
== m_map_p
->end()) {
564 MemDB::MDBWholeSpaceIteratorImpl::~MDBWholeSpaceIteratorImpl()
569 int MemDB::MDBWholeSpaceIteratorImpl::upper_bound(const std::string
&prefix
,
570 const std::string
&after
) {
572 std::lock_guard
<std::mutex
> l(*m_map_lock_p
);
574 dtrace
<< "upper_bound " << prefix
.c_str() << after
.c_str() << dendl
;
575 string k
= make_key(prefix
, after
);
576 m_iter
= m_map_p
->upper_bound(k
);
577 if (m_iter
!= m_map_p
->end()) {
584 int MemDB::MDBWholeSpaceIteratorImpl::lower_bound(const std::string
&prefix
,
585 const std::string
&to
) {
586 std::lock_guard
<std::mutex
> l(*m_map_lock_p
);
587 dtrace
<< "lower_bound " << prefix
.c_str() << to
.c_str() << dendl
;
588 string k
= make_key(prefix
, to
);
589 m_iter
= m_map_p
->lower_bound(k
);
590 if (m_iter
!= m_map_p
->end()) {