]> git.proxmox.com Git - ceph.git/blame - ceph/src/kv/MemDB.cc
update sources to 12.2.10
[ceph.git] / ceph / src / kv / MemDB.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * In-memory crash non-safe keyvalue db
5 * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
6 */
7
8#include "include/compat.h"
9#include <set>
10#include <map>
11#include <string>
12#include <memory>
13#include <errno.h>
14#include <unistd.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17
18#include "common/perf_counters.h"
19#include "common/debug.h"
20#include "include/str_list.h"
21#include "include/str_map.h"
22#include "KeyValueDB.h"
23#include "MemDB.h"
24
25#include "include/assert.h"
26#include "common/debug.h"
27#include "common/errno.h"
28#include "include/compat.h"
29
30#define dout_context g_ceph_context
31#define dout_subsys ceph_subsys_memdb
32#undef dout_prefix
33#define dout_prefix *_dout << "memdb: "
34#define dtrace dout(30)
35#define dwarn dout(0)
36#define dinfo dout(0)
37
38static void split_key(const string& raw_key, string *prefix, string *key)
39{
40 size_t pos = raw_key.find(KEY_DELIM, 0);
41 assert(pos != std::string::npos);
42 *prefix = raw_key.substr(0, pos);
43 *key = raw_key.substr(pos + 1, raw_key.length());
44}
45
46static string make_key(const string &prefix, const string &value)
47{
48 string out = prefix;
49 out.push_back(KEY_DELIM);
50 out.append(value);
51 return out;
52}
53
54void MemDB::_encode(mdb_iter_t iter, bufferlist &bl)
55{
56 ::encode(iter->first, bl);
57 ::encode(iter->second, bl);
58}
59
60std::string MemDB::_get_data_fn()
61{
62 string fn = m_db_path + "/" + "MemDB.db";
63 return fn;
64}
65
66void MemDB::_save()
67{
68 std::lock_guard<std::mutex> l(m_lock);
69 dout(10) << __func__ << " Saving MemDB to file: "<< _get_data_fn().c_str() << dendl;
70 int mode = 0644;
71 int fd = TEMP_FAILURE_RETRY(::open(_get_data_fn().c_str(),
91327a77 72 O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode));
7c673cae
FG
73 if (fd < 0) {
74 int err = errno;
75 cerr << "write_file(" << _get_data_fn().c_str() << "): failed to open file: "
76 << cpp_strerror(err) << std::endl;
77 return;
78 }
79 bufferlist bl;
80 mdb_iter_t iter = m_map.begin();
81 while (iter != m_map.end()) {
82 dout(10) << __func__ << " Key:"<< iter->first << dendl;
83 _encode(iter, bl);
84 ++iter;
85 }
86 bl.write_fd(fd);
87
88 VOID_TEMP_FAILURE_RETRY(::close(fd));
89}
90
91int MemDB::_load()
92{
93 std::lock_guard<std::mutex> l(m_lock);
94 dout(10) << __func__ << " Reading MemDB from file: "<< _get_data_fn().c_str() << dendl;
95 /*
96 * Open file and read it in single shot.
97 */
91327a77 98 int fd = TEMP_FAILURE_RETRY(::open(_get_data_fn().c_str(), O_RDONLY|O_CLOEXEC));
7c673cae
FG
99 if (fd < 0) {
100 int err = errno;
101 cerr << "can't open " << _get_data_fn().c_str() << ": "
102 << cpp_strerror(err) << std::endl;
103 return -err;
104 }
105
106 struct stat st;
107 memset(&st, 0, sizeof(st));
108 if (::fstat(fd, &st) < 0) {
109 int err = errno;
110 cerr << "can't stat file " << _get_data_fn().c_str() << ": "
111 << cpp_strerror(err) << std::endl;
112 VOID_TEMP_FAILURE_RETRY(::close(fd));
113 return -err;
114 }
115
116 ssize_t file_size = st.st_size;
117 ssize_t bytes_done = 0;
118 while (bytes_done < file_size) {
119 string key;
120 bufferptr datap;
121
122 bytes_done += ::decode_file(fd, key);
123 bytes_done += ::decode_file(fd, datap);
124
125 dout(10) << __func__ << " Key:"<< key << dendl;
126 m_map[key] = datap;
127 m_total_bytes += datap.length();
128 }
129 VOID_TEMP_FAILURE_RETRY(::close(fd));
130 return 0;
131}
132
133int MemDB::_init(bool create)
134{
135 int r;
136 dout(1) << __func__ << dendl;
137 if (create) {
138 r = ::mkdir(m_db_path.c_str(), 0700);
139 if (r < 0) {
140 r = -errno;
141 if (r != -EEXIST) {
142 derr << __func__ << " mkdir failed: " << cpp_strerror(r) << dendl;
143 return r;
144 }
145 return 0; // ignore EEXIST
146 }
147 } else {
148 r = _load();
149 }
150
151 return r;
152}
153
154int MemDB::set_merge_operator(
155 const string& prefix,
156 std::shared_ptr<KeyValueDB::MergeOperator> mop)
157{
158 merge_ops.push_back(std::make_pair(prefix, mop));
159 return 0;
160}
161
162int MemDB::do_open(ostream &out, bool create)
163{
164 m_total_bytes = 0;
165 m_allocated_bytes = 1;
166
167 return _init(create);
168}
169
170MemDB::~MemDB()
171{
172 close();
173 dout(10) << __func__ << " Destroying MemDB instance: "<< dendl;
174}
175
176void MemDB::close()
177{
178 /*
179 * Save whatever in memory btree.
180 */
181 _save();
182}
183
184int MemDB::submit_transaction(KeyValueDB::Transaction t)
185{
186 MDBTransactionImpl* mt = static_cast<MDBTransactionImpl*>(t.get());
187
188 dtrace << __func__ << " " << mt->get_ops().size() << dendl;
189 for(auto& op : mt->get_ops()) {
190 if(op.first == MDBTransactionImpl::WRITE) {
191 ms_op_t set_op = op.second;
192 _setkey(set_op);
193 } else if (op.first == MDBTransactionImpl::MERGE) {
194 ms_op_t merge_op = op.second;
195 _merge(merge_op);
196 } else {
197 ms_op_t rm_op = op.second;
198 assert(op.first == MDBTransactionImpl::DELETE);
199 _rmkey(rm_op);
200 }
201 }
202
203 return 0;
204}
205
206int MemDB::submit_transaction_sync(KeyValueDB::Transaction tsync)
207{
208 dtrace << __func__ << " " << dendl;
209 submit_transaction(tsync);
210 return 0;
211}
212
213int MemDB::transaction_rollback(KeyValueDB::Transaction t)
214{
215 MDBTransactionImpl* mt = static_cast<MDBTransactionImpl*>(t.get());
216 mt->clear();
217 return 0;
218}
219
220void MemDB::MDBTransactionImpl::set(
221 const string &prefix, const string &k, const bufferlist &to_set_bl)
222{
223 dtrace << __func__ << " " << prefix << " " << k << dendl;
224 ops.push_back(make_pair(WRITE, std::make_pair(std::make_pair(prefix, k),
225 to_set_bl)));
226}
227
228void MemDB::MDBTransactionImpl::rmkey(const string &prefix,
229 const string &k)
230{
231 dtrace << __func__ << " " << prefix << " " << k << dendl;
232 ops.push_back(make_pair(DELETE,
233 std::make_pair(std::make_pair(prefix, k),
234 bufferlist())));
235}
236
237void MemDB::MDBTransactionImpl::rmkeys_by_prefix(const string &prefix)
238{
239 KeyValueDB::Iterator it = m_db->get_iterator(prefix);
240 for (it->seek_to_first(); it->valid(); it->next()) {
241 rmkey(prefix, it->key());
242 }
243}
244
245void MemDB::MDBTransactionImpl::rm_range_keys(const string &prefix, const string &start, const string &end)
246{
247 KeyValueDB::Iterator it = m_db->get_iterator(prefix);
248 it->lower_bound(start);
249 while (it->valid()) {
250 if (it->key() >= end) {
251 break;
252 }
253 rmkey(prefix, it->key());
254 it->next();
255 }
256}
257
258void MemDB::MDBTransactionImpl::merge(
259 const std::string &prefix, const std::string &key, const bufferlist &value)
260{
261
262 dtrace << __func__ << " " << prefix << " " << key << dendl;
263 ops.push_back(make_pair(MERGE, make_pair(std::make_pair(prefix, key), value)));
264 return;
265}
266
267int MemDB::_setkey(ms_op_t &op)
268{
269 std::lock_guard<std::mutex> l(m_lock);
270 std::string key = make_key(op.first.first, op.first.second);
271 bufferlist bl = op.second;
272
273 m_total_bytes += bl.length();
274
275 bufferlist bl_old;
276 if (_get(op.first.first, op.first.second, &bl_old)) {
277 /*
278 * delete and free existing key.
279 */
280 assert(m_total_bytes >= bl_old.length());
281 m_total_bytes -= bl_old.length();
282 m_map.erase(key);
283 }
284
285 m_map[key] = bufferptr((char *) bl.c_str(), bl.length());
286 iterator_seq_no++;
287 return 0;
288}
289
290int MemDB::_rmkey(ms_op_t &op)
291{
292 std::lock_guard<std::mutex> l(m_lock);
293 std::string key = make_key(op.first.first, op.first.second);
294
295 bufferlist bl_old;
296 if (_get(op.first.first, op.first.second, &bl_old)) {
297 assert(m_total_bytes >= bl_old.length());
298 m_total_bytes -= bl_old.length();
299 }
300 iterator_seq_no++;
301 /*
302 * Erase will call the destructor for bufferptr.
303 */
304 return m_map.erase(key);
305}
306
307std::shared_ptr<KeyValueDB::MergeOperator> MemDB::_find_merge_op(std::string prefix)
308{
309 for (const auto& i : merge_ops) {
310 if (i.first == prefix) {
311 return i.second;
312 }
313 }
314
315 dtrace << __func__ << " No merge op for " << prefix << dendl;
316 return NULL;
317}
318
319
320int MemDB::_merge(ms_op_t &op)
321{
322 std::lock_guard<std::mutex> l(m_lock);
323 std::string prefix = op.first.first;
324 std::string key = make_key(op.first.first, op.first.second);
325 bufferlist bl = op.second;
326 int64_t bytes_adjusted = bl.length();
327
328 /*
329 * find the operator for this prefix
330 */
331 std::shared_ptr<MergeOperator> mop = _find_merge_op(prefix);
332 assert(mop);
333
334 /*
335 * call the merge operator with value and non value
336 */
337 bufferlist bl_old;
338 if (_get(op.first.first, op.first.second, &bl_old) == false) {
339 std::string new_val;
340 /*
341 * Merge non existent.
342 */
343 mop->merge_nonexistent(bl.c_str(), bl.length(), &new_val);
344 m_map[key] = bufferptr(new_val.c_str(), new_val.length());
345 } else {
346 /*
347 * Merge existing.
348 */
349 std::string new_val;
350 mop->merge(bl_old.c_str(), bl_old.length(), bl.c_str(), bl.length(), &new_val);
351 m_map[key] = bufferptr(new_val.c_str(), new_val.length());
352 bytes_adjusted -= bl_old.length();
353 bl_old.clear();
354 }
355
356 assert((int64_t)m_total_bytes + bytes_adjusted >= 0);
357 m_total_bytes += bytes_adjusted;
358 iterator_seq_no++;
359 return 0;
360}
361
362/*
363 * Caller take btree lock.
364 */
365bool MemDB::_get(const string &prefix, const string &k, bufferlist *out)
366{
367 string key = make_key(prefix, k);
368
369 mdb_iter_t iter = m_map.find(key);
370 if (iter == m_map.end()) {
371 return false;
372 }
373
374 out->push_back((m_map[key].clone()));
375 return true;
376}
377
378bool MemDB::_get_locked(const string &prefix, const string &k, bufferlist *out)
379{
380 std::lock_guard<std::mutex> l(m_lock);
381 return _get(prefix, k, out);
382}
383
384
385int MemDB::get(const string &prefix, const std::string& key,
386 bufferlist *out)
387{
388 if (_get_locked(prefix, key, out)) {
389 return 0;
390 }
391 return -ENOENT;
392}
393
394int MemDB::get(const string &prefix, const std::set<string> &keys,
395 std::map<string, bufferlist> *out)
396{
397 for (const auto& i : keys) {
398 bufferlist bl;
399 if (_get_locked(prefix, i, &bl))
400 out->insert(make_pair(i, bl));
401 }
402
403 return 0;
404}
405
406void MemDB::MDBWholeSpaceIteratorImpl::fill_current()
407{
408 bufferlist bl;
409 bl.append(m_iter->second.clone());
410 m_key_value = std::make_pair(m_iter->first, bl);
411}
412
413bool MemDB::MDBWholeSpaceIteratorImpl::valid()
414{
415 if (m_key_value.first.empty()) {
416 return false;
417 }
418 return true;
419}
420
421bool MemDB::MDBWholeSpaceIteratorImpl::iterator_validate() {
422
423 if (this_seq_no != *global_seq_no) {
424 auto key = m_key_value.first;
425 assert(!key.empty());
426
427 bool restart_iter = false;
428 if (!m_using_btree) {
429 /*
430 * Map is modified and marker key does not exists,
431 * restart the iterator from next key.
432 */
433 if (m_map_p->find(key) == m_map_p->end()) {
434 restart_iter = true;
435 }
436 } else {
437 restart_iter = true;
438 }
439
440 if (restart_iter) {
441 m_iter = m_map_p->lower_bound(key);
442 if (m_iter == m_map_p->end()) {
443 return false;
444 }
445 }
446
447 /*
448 * This iter is valid now.
449 */
450 this_seq_no = *global_seq_no;
451 }
452
453 return true;
454}
455
456void
457MemDB::MDBWholeSpaceIteratorImpl::free_last()
458{
459 m_key_value.first.clear();
460 m_key_value.second.clear();
461}
462
463string MemDB::MDBWholeSpaceIteratorImpl::key()
464{
465 dtrace << __func__ << " " << m_key_value.first << dendl;
466 string prefix, key;
467 split_key(m_key_value.first, &prefix, &key);
468 return key;
469}
470
471pair<string,string> MemDB::MDBWholeSpaceIteratorImpl::raw_key()
472{
473 string prefix, key;
474 split_key(m_key_value.first, &prefix, &key);
475 return make_pair(prefix, key);
476}
477
478bool MemDB::MDBWholeSpaceIteratorImpl::raw_key_is_prefixed(
479 const string &prefix)
480{
481 string p, k;
482 split_key(m_key_value.first, &p, &k);
483 return (p == prefix);
484}
485
486bufferlist MemDB::MDBWholeSpaceIteratorImpl::value()
487{
488 dtrace << __func__ << " " << m_key_value << dendl;
489 return m_key_value.second;
490}
491
492int MemDB::MDBWholeSpaceIteratorImpl::next()
493{
494 std::lock_guard<std::mutex> l(*m_map_lock_p);
495 if (!iterator_validate()) {
496 free_last();
497 return -1;
498 }
499 free_last();
500 ++m_iter;
501 if (m_iter != m_map_p->end()) {
502 fill_current();
503 return 0;
504 } else {
505 return -1;
506 }
507}
508
509int MemDB::MDBWholeSpaceIteratorImpl:: prev()
510{
511 std::lock_guard<std::mutex> l(*m_map_lock_p);
512 if (!iterator_validate()) {
513 free_last();
514 return -1;
515 }
516 free_last();
517 if (m_iter != m_map_p->begin()) {
518 --m_iter;
519 fill_current();
520 return 0;
521 } else {
522 return -1;
523 }
524}
525
526/*
527 * First key >= to given key, if key is null then first key in btree.
528 */
529int MemDB::MDBWholeSpaceIteratorImpl::seek_to_first(const std::string &k)
530{
531 std::lock_guard<std::mutex> l(*m_map_lock_p);
532 free_last();
533 if (k.empty()) {
534 m_iter = m_map_p->begin();
535 } else {
536 m_iter = m_map_p->lower_bound(k);
537 }
538
539 if (m_iter == m_map_p->end()) {
540 return -1;
541 }
542 fill_current();
543 return 0;
544}
545
546int MemDB::MDBWholeSpaceIteratorImpl::seek_to_last(const std::string &k)
547{
548 std::lock_guard<std::mutex> l(*m_map_lock_p);
549 free_last();
550 if (k.empty()) {
551 m_iter = m_map_p->end();
552 --m_iter;
553 } else {
554 m_iter = m_map_p->lower_bound(k);
555 }
556
557 if (m_iter == m_map_p->end()) {
558 return -1;
559 }
560 fill_current();
561 return 0;
562}
563
564MemDB::MDBWholeSpaceIteratorImpl::~MDBWholeSpaceIteratorImpl()
565{
566 free_last();
567}
568
569int MemDB::MDBWholeSpaceIteratorImpl::upper_bound(const std::string &prefix,
570 const std::string &after) {
571
572 std::lock_guard<std::mutex> l(*m_map_lock_p);
573
574 dtrace << "upper_bound " << prefix.c_str() << after.c_str() << dendl;
575 string k = make_key(prefix, after);
576 m_iter = m_map_p->upper_bound(k);
577 if (m_iter != m_map_p->end()) {
578 fill_current();
579 return 0;
580 }
581 return -1;
582}
583
584int MemDB::MDBWholeSpaceIteratorImpl::lower_bound(const std::string &prefix,
585 const std::string &to) {
586 std::lock_guard<std::mutex> l(*m_map_lock_p);
587 dtrace << "lower_bound " << prefix.c_str() << to.c_str() << dendl;
588 string k = make_key(prefix, to);
589 m_iter = m_map_p->lower_bound(k);
590 if (m_iter != m_map_p->end()) {
591 fill_current();
592 return 0;
593 }
594 return -1;
595}