1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #ifndef DBOBJECTMAP_DB_H
3 #define DBOBJECTMAP_DB_H
5 #include "include/buffer_fwd.h"
11 #include "include/memory.h"
12 #include <boost/scoped_ptr.hpp>
14 #include "os/ObjectMap.h"
15 #include "kv/KeyValueDB.h"
16 #include "osd/osd_types.h"
17 #include "common/Mutex.h"
18 #include "common/Cond.h"
19 #include "common/simple_cache.hpp"
20 #include <boost/optional/optional_io.hpp>
22 #include "SequencerPosition.h"
25 * DBObjectMap: Implements ObjectMap in terms of KeyValueDB
27 * Prefix space structure:
29 * @see complete_prefix
33 * - HOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->header.seq and
34 * corresponding omap header
35 * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number
39 * @see generate_new_header
40 * - USER_PREFIX + header_key(header->seq) + USER_PREFIX
41 * : key->value for header->seq
42 * - USER_PREFIX + header_key(header->seq) + COMPLETE_PREFIX: see below
43 * - USER_PREFIX + header_key(header->seq) + XATTR_PREFIX: xattrs
44 * - USER_PREFIX + header_key(header->seq) + SYS_PREFIX
45 * : USER_HEADER_KEY - omap header for header->seq
46 * : HEADER_KEY - encoding of header for header->seq
48 * For each node (represented by a header), we
49 * store three mappings: the key mapping, the complete mapping, and the parent.
50 * The complete mapping (COMPLETE_PREFIX space) is key->key. Each x->y entry in
51 * this mapping indicates that the key mapping contains all entries on [x,y).
52 * Note, max string is represented by "", so ""->"" indicates that the parent
53 * is unnecessary (@see rm_keys). When looking up a key not contained in the
54 * the complete set, we have to check the parent if we don't find it in the
55 * key set. During rm_keys, we copy keys from the parent and update the
56 * complete set to reflect the change @see rm_keys.
58 class DBObjectMap
: public ObjectMap
{
60 boost::scoped_ptr
<KeyValueDB
> db
;
63 * Serializes access to next_seq as well as the in_use set
70 * Set of headers currently in use
73 set
<ghobject_t
> map_header_in_use
;
76 * Takes the map_header_in_use entry in constructor, releases in
81 boost::optional
<ghobject_t
> locked
;
83 MapHeaderLock(const MapHeaderLock
&);
84 MapHeaderLock
&operator=(const MapHeaderLock
&);
86 explicit MapHeaderLock(DBObjectMap
*db
) : db(db
) {}
87 MapHeaderLock(DBObjectMap
*db
, const ghobject_t
&oid
) : db(db
), locked(oid
) {
88 Mutex::Locker
l(db
->header_lock
);
89 while (db
->map_header_in_use
.count(*locked
))
90 db
->map_header_cond
.Wait(db
->header_lock
);
91 db
->map_header_in_use
.insert(*locked
);
94 const ghobject_t
&get_locked() const {
99 void swap(MapHeaderLock
&o
) {
102 // centos6's boost optional doesn't seem to have swap :(
103 boost::optional
<ghobject_t
> _locked
= o
.locked
;
110 Mutex::Locker
l(db
->header_lock
);
111 assert(db
->map_header_in_use
.count(*locked
));
112 db
->map_header_cond
.Signal();
113 db
->map_header_in_use
.erase(*locked
);
118 DBObjectMap(CephContext
* cct
, KeyValueDB
*db
)
119 : ObjectMap(cct
), db(db
), header_lock("DBOBjectMap"),
120 cache_lock("DBObjectMap::CacheLock"),
121 caches(cct
->_conf
->filestore_omap_header_cache_size
)
125 const ghobject_t
&oid
,
126 const map
<string
, bufferlist
> &set
,
127 const SequencerPosition
*spos
=0
131 const ghobject_t
&oid
,
132 const bufferlist
&bl
,
133 const SequencerPosition
*spos
=0
137 const ghobject_t
&oid
,
142 const ghobject_t
&oid
,
143 const SequencerPosition
*spos
=0
146 int clear_keys_header(
147 const ghobject_t
&oid
,
148 const SequencerPosition
*spos
=0
152 const ghobject_t
&oid
,
153 const set
<string
> &to_clear
,
154 const SequencerPosition
*spos
=0
158 const ghobject_t
&oid
,
160 map
<string
, bufferlist
> *out
164 const ghobject_t
&oid
,
169 const ghobject_t
&oid
,
170 const set
<string
> &keys
,
171 map
<string
, bufferlist
> *out
175 const ghobject_t
&oid
,
176 const set
<string
> &keys
,
181 const ghobject_t
&oid
,
182 const set
<string
> &to_get
,
183 map
<string
, bufferlist
> *out
187 const ghobject_t
&oid
,
192 const ghobject_t
&oid
,
193 const map
<string
, bufferlist
> &to_set
,
194 const SequencerPosition
*spos
=0
198 const ghobject_t
&oid
,
199 const set
<string
> &to_remove
,
200 const SequencerPosition
*spos
=0
204 const ghobject_t
&oid
,
205 const ghobject_t
&target
,
206 const SequencerPosition
*spos
=0
210 const ghobject_t
&from
,
211 const ghobject_t
&to
,
212 const SequencerPosition
*spos
=0
216 const ghobject_t
&oid
,
217 const ghobject_t
&target
,
218 const SequencerPosition
*spos
=0
221 /// Read initial state from backing store
222 int init(bool upgrade
= false);
224 /// Upgrade store to current version
227 /// Consistency check, debug, there must be no parallel writes
228 int check(std::ostream
&out
, bool repair
= false) override
;
230 /// Ensure that all previous operations are durable
231 int sync(const ghobject_t
*oid
=0, const SequencerPosition
*spos
=0) override
;
233 void compact() override
{
238 /// Util, get all objects, there must be no other concurrent access
239 int list_objects(vector
<ghobject_t
> *objs
///< [out] objects
243 // Util, get all object headers, there must be no other concurrent access
244 int list_object_headers(vector
<_Header
> *out
///< [out] headers
247 ObjectMapIterator
get_iterator(const ghobject_t
&oid
) override
;
249 static const string USER_PREFIX
;
250 static const string XATTR_PREFIX
;
251 static const string SYS_PREFIX
;
252 static const string COMPLETE_PREFIX
;
253 static const string HEADER_KEY
;
254 static const string USER_HEADER_KEY
;
255 static const string GLOBAL_STATE_KEY
;
256 static const string HOBJECT_TO_SEQ
;
259 static const string LEAF_PREFIX
;
260 static const string REVERSE_LEAF_PREFIX
;
262 /// persistent state for store @see generate_header
266 State() : v(0), seq(1) {}
267 explicit State(uint64_t seq
) : v(0), seq(seq
) {}
269 void encode(bufferlist
&bl
) const {
270 ENCODE_START(2, 1, bl
);
276 void decode(bufferlist::iterator
&bl
) {
286 void dump(Formatter
*f
) const {
287 f
->dump_unsigned("seq", seq
);
290 static void generate_test_instances(list
<State
*> &o
) {
291 o
.push_back(new State(0));
292 o
.push_back(new State(20));
299 uint64_t num_children
;
303 SequencerPosition spos
;
305 void encode(bufferlist
&bl
) const {
307 ENCODE_START(2, 1, bl
);
309 ::encode(parent
, bl
);
310 ::encode(num_children
, bl
);
311 ::encode(unused
, bl
);
317 void decode(bufferlist::iterator
&bl
) {
321 ::decode(parent
, bl
);
322 ::decode(num_children
, bl
);
323 ::decode(unused
, bl
);
330 void dump(Formatter
*f
) const {
331 f
->dump_unsigned("seq", seq
);
332 f
->dump_unsigned("parent", parent
);
333 f
->dump_unsigned("num_children", num_children
);
334 f
->dump_stream("oid") << oid
;
337 static void generate_test_instances(list
<_Header
*> &o
) {
338 o
.push_back(new _Header
);
339 o
.push_back(new _Header
);
340 o
.back()->parent
= 20;
344 _Header() : seq(0), parent(0), num_children(1) {}
347 /// String munging (public for testing)
348 static string
ghobject_key(const ghobject_t
&oid
);
349 static string
ghobject_key_v0(coll_t c
, const ghobject_t
&oid
);
350 static int is_buggy_ghobject_key_v1(CephContext
* cct
,
353 /// Implicit lock on Header->seq
354 typedef ceph::shared_ptr
<_Header
> Header
;
356 SimpleLRU
<ghobject_t
, _Header
> caches
;
358 string
map_header_key(const ghobject_t
&oid
);
359 string
header_key(uint64_t seq
);
360 string
complete_prefix(Header header
);
361 string
user_prefix(Header header
);
362 string
sys_prefix(Header header
);
363 string
xattr_prefix(Header header
);
364 string
sys_parent_prefix(_Header header
);
365 string
sys_parent_prefix(Header header
) {
366 return sys_parent_prefix(*header
);
369 class EmptyIteratorImpl
: public ObjectMapIteratorImpl
{
371 int seek_to_first() override
{ return 0; }
372 int seek_to_last() { return 0; }
373 int upper_bound(const string
&after
) override
{ return 0; }
374 int lower_bound(const string
&to
) override
{ return 0; }
375 bool valid() override
{ return false; }
376 int next(bool validate
=true) override
{ ceph_abort(); return 0; }
377 string
key() override
{ ceph_abort(); return ""; }
378 bufferlist
value() override
{ ceph_abort(); return bufferlist(); }
379 int status() override
{ return 0; }
384 class DBObjectMapIteratorImpl
: public ObjectMapIteratorImpl
{
388 /// NOTE: implicit lock hlock->get_locked() when returned out of the class
390 /// NOTE: implicit lock on header->seq AND for all ancestors
393 /// parent_iter == NULL iff no parent
394 ceph::shared_ptr
<DBObjectMapIteratorImpl
> parent_iter
;
395 KeyValueDB::Iterator key_iter
;
396 KeyValueDB::Iterator complete_iter
;
398 /// cur_iter points to currently valid iterator
399 ceph::shared_ptr
<ObjectMapIteratorImpl
> cur_iter
;
402 /// init() called, key_iter, complete_iter, parent_iter filled in
407 DBObjectMapIteratorImpl(DBObjectMap
*map
, Header header
) :
408 map(map
), hlock(map
), header(header
), r(0), ready(false), invalid(true) {}
409 int seek_to_first() override
;
411 int upper_bound(const string
&after
) override
;
412 int lower_bound(const string
&to
) override
;
413 bool valid() override
;
414 int next(bool validate
=true) override
;
415 string
key() override
;
416 bufferlist
value() override
;
417 int status() override
;
420 return cur_iter
== parent_iter
;
423 /// skips to next valid parent entry
426 /// first parent() >= to
427 int lower_bound_parent(const string
&to
);
430 * Tests whether to_test is in complete region
432 * postcondition: complete_iter will be max s.t. complete_iter->value > to_test
434 int in_complete_region(const string
&to_test
, ///< [in] key to test
435 string
*begin
, ///< [out] beginning of region
436 string
*end
///< [out] end of region
437 ); ///< @returns true if to_test is in the complete region, else false
445 typedef ceph::shared_ptr
<DBObjectMapIteratorImpl
> DBObjectMapIterator
;
446 DBObjectMapIterator
_get_iterator(Header header
) {
447 return std::make_shared
<DBObjectMapIteratorImpl
>(this, header
);
452 /// Removes node corresponding to header
453 void clear_header(Header header
, KeyValueDB::Transaction t
);
455 /// Set node containing input to new contents
456 void set_header(Header input
, KeyValueDB::Transaction t
);
458 /// Remove leaf node corresponding to oid in c
459 void remove_map_header(
460 const MapHeaderLock
&l
,
461 const ghobject_t
&oid
,
463 KeyValueDB::Transaction t
);
465 /// Set leaf node for c and oid to the value of header
467 const MapHeaderLock
&l
,
468 const ghobject_t
&oid
, _Header header
,
469 KeyValueDB::Transaction t
);
471 /// Set leaf node for c and oid to the value of header
472 bool check_spos(const ghobject_t
&oid
,
474 const SequencerPosition
*spos
);
476 /// Lookup or create header for c oid
477 Header
lookup_create_map_header(
478 const MapHeaderLock
&l
,
479 const ghobject_t
&oid
,
480 KeyValueDB::Transaction t
);
483 * Generate new header for c oid with new seq number
485 * Has the side effect of syncronously saving the new DBObjectMap state
487 Header
_generate_new_header(const ghobject_t
&oid
, Header parent
);
488 Header
generate_new_header(const ghobject_t
&oid
, Header parent
) {
489 Mutex::Locker
l(header_lock
);
490 return _generate_new_header(oid
, parent
);
493 /// Lookup leaf header for c oid
494 Header
_lookup_map_header(
495 const MapHeaderLock
&l
,
496 const ghobject_t
&oid
);
497 Header
lookup_map_header(
498 const MapHeaderLock
&l2
,
499 const ghobject_t
&oid
) {
500 Mutex::Locker
l(header_lock
);
501 return _lookup_map_header(l2
, oid
);
504 /// Lookup header node for input
505 Header
lookup_parent(Header input
);
509 int _get_header(Header header
, bufferlist
*bl
);
511 /// Scan keys in header into out_keys and out_values (if nonnull)
512 int scan(Header header
,
513 const set
<string
> &in_keys
,
514 set
<string
> *out_keys
,
515 map
<string
, bufferlist
> *out_values
);
517 /// Remove header and all related prefixes
518 int _clear(Header header
,
519 KeyValueDB::Transaction t
);
521 /* Scan complete region bumping *begin to the beginning of any
522 * containing region and adding all complete region keys between
523 * the updated begin and end to the complete_keys_to_remove set */
524 int merge_new_complete(DBObjectMapIterator
&iter
,
527 set
<string
> *complete_keys_to_remove
);
529 /// Writes out State (mainly next_seq)
530 int write_state(KeyValueDB::Transaction _t
=
531 KeyValueDB::Transaction());
533 /// Copies header entry from parent @see rm_keys
534 int copy_up_header(Header header
,
535 KeyValueDB::Transaction t
);
537 /// Sets header @see set_header
538 void _set_header(Header header
, const bufferlist
&bl
,
539 KeyValueDB::Transaction t
);
542 * Removes header seq lock and possibly object lock
543 * once Header is out of scope
545 * @see generate_new_header
547 class RemoveOnDelete
{
550 explicit RemoveOnDelete(DBObjectMap
*db
) :
552 void operator() (_Header
*header
) {
553 Mutex::Locker
l(db
->header_lock
);
554 assert(db
->in_use
.count(header
->seq
));
555 db
->in_use
.erase(header
->seq
);
556 db
->header_cond
.Signal();
560 friend class RemoveOnDelete
;
562 WRITE_CLASS_ENCODER(DBObjectMap::_Header
)
563 WRITE_CLASS_ENCODER(DBObjectMap::State
)
565 ostream
& operator<<(ostream
& out
, const DBObjectMap::_Header
& h
);