1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #ifndef DBOBJECTMAP_DB_H
3 #define DBOBJECTMAP_DB_H
5 #include "include/buffer_fwd.h"
11 #include "include/memory.h"
12 #include <boost/scoped_ptr.hpp>
14 #include "os/ObjectMap.h"
15 #include "kv/KeyValueDB.h"
16 #include "osd/osd_types.h"
17 #include "common/Mutex.h"
18 #include "common/Cond.h"
19 #include "common/simple_cache.hpp"
20 #include <boost/optional/optional_io.hpp>
22 #include "SequencerPosition.h"
25 * DBObjectMap: Implements ObjectMap in terms of KeyValueDB
27 * Prefix space structure:
29 * @see complete_prefix
33 * - HOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->header.seq and
34 * corresponding omap header
35 * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number
39 * @see generate_new_header
40 * - USER_PREFIX + header_key(header->seq) + USER_PREFIX
41 * : key->value for header->seq
42 * - USER_PREFIX + header_key(header->seq) + COMPLETE_PREFIX: see below
43 * - USER_PREFIX + header_key(header->seq) + XATTR_PREFIX: xattrs
44 * - USER_PREFIX + header_key(header->seq) + SYS_PREFIX
45 * : USER_HEADER_KEY - omap header for header->seq
46 * : HEADER_KEY - encoding of header for header->seq
48 * For each node (represented by a header), we
49 * store three mappings: the key mapping, the complete mapping, and the parent.
50 * The complete mapping (COMPLETE_PREFIX space) is key->key. Each x->y entry in
51 * this mapping indicates that the key mapping contains all entries on [x,y).
52 * Note, max string is represented by "", so ""->"" indicates that the parent
53 * is unnecessary (@see rm_keys). When looking up a key not contained in the
54 * the complete set, we have to check the parent if we don't find it in the
55 * key set. During rm_keys, we copy keys from the parent and update the
56 * complete set to reflect the change @see rm_keys.
58 class DBObjectMap
: public ObjectMap
{
60 boost::scoped_ptr
<KeyValueDB
> db
;
63 * Serializes access to next_seq as well as the in_use set
70 * Set of headers currently in use
73 set
<ghobject_t
> map_header_in_use
;
76 * Takes the map_header_in_use entry in constructor, releases in
81 boost::optional
<ghobject_t
> locked
;
83 MapHeaderLock(const MapHeaderLock
&);
84 MapHeaderLock
&operator=(const MapHeaderLock
&);
86 explicit MapHeaderLock(DBObjectMap
*db
) : db(db
) {}
87 MapHeaderLock(DBObjectMap
*db
, const ghobject_t
&oid
) : db(db
), locked(oid
) {
88 Mutex::Locker
l(db
->header_lock
);
89 while (db
->map_header_in_use
.count(*locked
))
90 db
->map_header_cond
.Wait(db
->header_lock
);
91 db
->map_header_in_use
.insert(*locked
);
94 const ghobject_t
&get_locked() const {
99 void swap(MapHeaderLock
&o
) {
102 // centos6's boost optional doesn't seem to have swap :(
103 boost::optional
<ghobject_t
> _locked
= o
.locked
;
110 Mutex::Locker
l(db
->header_lock
);
111 assert(db
->map_header_in_use
.count(*locked
));
112 db
->map_header_cond
.Signal();
113 db
->map_header_in_use
.erase(*locked
);
118 DBObjectMap(CephContext
* cct
, KeyValueDB
*db
)
119 : ObjectMap(cct
), db(db
), header_lock("DBOBjectMap"),
120 cache_lock("DBObjectMap::CacheLock"),
121 caches(cct
->_conf
->filestore_omap_header_cache_size
)
125 const ghobject_t
&oid
,
126 const map
<string
, bufferlist
> &set
,
127 const SequencerPosition
*spos
=0
131 const ghobject_t
&oid
,
132 const bufferlist
&bl
,
133 const SequencerPosition
*spos
=0
137 const ghobject_t
&oid
,
142 const ghobject_t
&oid
,
143 const SequencerPosition
*spos
=0
146 int clear_keys_header(
147 const ghobject_t
&oid
,
148 const SequencerPosition
*spos
=0
152 const ghobject_t
&oid
,
153 const set
<string
> &to_clear
,
154 const SequencerPosition
*spos
=0
158 const ghobject_t
&oid
,
160 map
<string
, bufferlist
> *out
164 const ghobject_t
&oid
,
169 const ghobject_t
&oid
,
170 const set
<string
> &keys
,
171 map
<string
, bufferlist
> *out
175 const ghobject_t
&oid
,
176 const set
<string
> &keys
,
181 const ghobject_t
&oid
,
182 const set
<string
> &to_get
,
183 map
<string
, bufferlist
> *out
187 const ghobject_t
&oid
,
192 const ghobject_t
&oid
,
193 const map
<string
, bufferlist
> &to_set
,
194 const SequencerPosition
*spos
=0
198 const ghobject_t
&oid
,
199 const set
<string
> &to_remove
,
200 const SequencerPosition
*spos
=0
204 const ghobject_t
&oid
,
205 const ghobject_t
&target
,
206 const SequencerPosition
*spos
=0
210 const ghobject_t
&from
,
211 const ghobject_t
&to
,
212 const SequencerPosition
*spos
=0
216 const ghobject_t
&oid
,
217 const ghobject_t
&target
,
218 const SequencerPosition
*spos
=0
221 /// Read initial state from backing store
222 int init(bool upgrade
= false);
224 /// Upgrade store to current version
227 /// Consistency check, debug, there must be no parallel writes
228 int check(std::ostream
&out
, bool repair
= false) override
;
230 /// Ensure that all previous operations are durable
231 int sync(const ghobject_t
*oid
=0, const SequencerPosition
*spos
=0) override
;
233 /// Util, get all objects, there must be no other concurrent access
234 int list_objects(vector
<ghobject_t
> *objs
///< [out] objects
238 // Util, get all object headers, there must be no other concurrent access
239 int list_object_headers(vector
<_Header
> *out
///< [out] headers
242 ObjectMapIterator
get_iterator(const ghobject_t
&oid
) override
;
244 static const string USER_PREFIX
;
245 static const string XATTR_PREFIX
;
246 static const string SYS_PREFIX
;
247 static const string COMPLETE_PREFIX
;
248 static const string HEADER_KEY
;
249 static const string USER_HEADER_KEY
;
250 static const string GLOBAL_STATE_KEY
;
251 static const string HOBJECT_TO_SEQ
;
254 static const string LEAF_PREFIX
;
255 static const string REVERSE_LEAF_PREFIX
;
257 /// persistent state for store @see generate_header
261 State() : v(0), seq(1) {}
262 explicit State(uint64_t seq
) : v(0), seq(seq
) {}
264 void encode(bufferlist
&bl
) const {
265 ENCODE_START(2, 1, bl
);
271 void decode(bufferlist::iterator
&bl
) {
281 void dump(Formatter
*f
) const {
282 f
->dump_unsigned("seq", seq
);
285 static void generate_test_instances(list
<State
*> &o
) {
286 o
.push_back(new State(0));
287 o
.push_back(new State(20));
294 uint64_t num_children
;
298 SequencerPosition spos
;
300 void encode(bufferlist
&bl
) const {
302 ENCODE_START(2, 1, bl
);
304 ::encode(parent
, bl
);
305 ::encode(num_children
, bl
);
306 ::encode(unused
, bl
);
312 void decode(bufferlist::iterator
&bl
) {
316 ::decode(parent
, bl
);
317 ::decode(num_children
, bl
);
318 ::decode(unused
, bl
);
325 void dump(Formatter
*f
) const {
326 f
->dump_unsigned("seq", seq
);
327 f
->dump_unsigned("parent", parent
);
328 f
->dump_unsigned("num_children", num_children
);
329 f
->dump_stream("oid") << oid
;
332 static void generate_test_instances(list
<_Header
*> &o
) {
333 o
.push_back(new _Header
);
334 o
.push_back(new _Header
);
335 o
.back()->parent
= 20;
339 _Header() : seq(0), parent(0), num_children(1) {}
342 /// String munging (public for testing)
343 static string
ghobject_key(const ghobject_t
&oid
);
344 static string
ghobject_key_v0(coll_t c
, const ghobject_t
&oid
);
345 static int is_buggy_ghobject_key_v1(CephContext
* cct
,
348 /// Implicit lock on Header->seq
349 typedef ceph::shared_ptr
<_Header
> Header
;
351 SimpleLRU
<ghobject_t
, _Header
> caches
;
353 string
map_header_key(const ghobject_t
&oid
);
354 string
header_key(uint64_t seq
);
355 string
complete_prefix(Header header
);
356 string
user_prefix(Header header
);
357 string
sys_prefix(Header header
);
358 string
xattr_prefix(Header header
);
359 string
sys_parent_prefix(_Header header
);
360 string
sys_parent_prefix(Header header
) {
361 return sys_parent_prefix(*header
);
364 class EmptyIteratorImpl
: public ObjectMapIteratorImpl
{
366 int seek_to_first() override
{ return 0; }
367 int seek_to_last() { return 0; }
368 int upper_bound(const string
&after
) override
{ return 0; }
369 int lower_bound(const string
&to
) override
{ return 0; }
370 bool valid() override
{ return false; }
371 int next(bool validate
=true) override
{ ceph_abort(); return 0; }
372 string
key() override
{ ceph_abort(); return ""; }
373 bufferlist
value() override
{ ceph_abort(); return bufferlist(); }
374 int status() override
{ return 0; }
379 class DBObjectMapIteratorImpl
: public ObjectMapIteratorImpl
{
383 /// NOTE: implicit lock hlock->get_locked() when returned out of the class
385 /// NOTE: implicit lock on header->seq AND for all ancestors
388 /// parent_iter == NULL iff no parent
389 ceph::shared_ptr
<DBObjectMapIteratorImpl
> parent_iter
;
390 KeyValueDB::Iterator key_iter
;
391 KeyValueDB::Iterator complete_iter
;
393 /// cur_iter points to currently valid iterator
394 ceph::shared_ptr
<ObjectMapIteratorImpl
> cur_iter
;
397 /// init() called, key_iter, complete_iter, parent_iter filled in
402 DBObjectMapIteratorImpl(DBObjectMap
*map
, Header header
) :
403 map(map
), hlock(map
), header(header
), r(0), ready(false), invalid(true) {}
404 int seek_to_first() override
;
406 int upper_bound(const string
&after
) override
;
407 int lower_bound(const string
&to
) override
;
408 bool valid() override
;
409 int next(bool validate
=true) override
;
410 string
key() override
;
411 bufferlist
value() override
;
412 int status() override
;
415 return cur_iter
== parent_iter
;
418 /// skips to next valid parent entry
421 /// first parent() >= to
422 int lower_bound_parent(const string
&to
);
425 * Tests whether to_test is in complete region
427 * postcondition: complete_iter will be max s.t. complete_iter->value > to_test
429 int in_complete_region(const string
&to_test
, ///< [in] key to test
430 string
*begin
, ///< [out] beginning of region
431 string
*end
///< [out] end of region
432 ); ///< @returns true if to_test is in the complete region, else false
440 typedef ceph::shared_ptr
<DBObjectMapIteratorImpl
> DBObjectMapIterator
;
441 DBObjectMapIterator
_get_iterator(Header header
) {
442 return std::make_shared
<DBObjectMapIteratorImpl
>(this, header
);
447 /// Removes node corresponding to header
448 void clear_header(Header header
, KeyValueDB::Transaction t
);
450 /// Set node containing input to new contents
451 void set_header(Header input
, KeyValueDB::Transaction t
);
453 /// Remove leaf node corresponding to oid in c
454 void remove_map_header(
455 const MapHeaderLock
&l
,
456 const ghobject_t
&oid
,
458 KeyValueDB::Transaction t
);
460 /// Set leaf node for c and oid to the value of header
462 const MapHeaderLock
&l
,
463 const ghobject_t
&oid
, _Header header
,
464 KeyValueDB::Transaction t
);
466 /// Set leaf node for c and oid to the value of header
467 bool check_spos(const ghobject_t
&oid
,
469 const SequencerPosition
*spos
);
471 /// Lookup or create header for c oid
472 Header
lookup_create_map_header(
473 const MapHeaderLock
&l
,
474 const ghobject_t
&oid
,
475 KeyValueDB::Transaction t
);
478 * Generate new header for c oid with new seq number
480 * Has the side effect of syncronously saving the new DBObjectMap state
482 Header
_generate_new_header(const ghobject_t
&oid
, Header parent
);
483 Header
generate_new_header(const ghobject_t
&oid
, Header parent
) {
484 Mutex::Locker
l(header_lock
);
485 return _generate_new_header(oid
, parent
);
488 /// Lookup leaf header for c oid
489 Header
_lookup_map_header(
490 const MapHeaderLock
&l
,
491 const ghobject_t
&oid
);
492 Header
lookup_map_header(
493 const MapHeaderLock
&l2
,
494 const ghobject_t
&oid
) {
495 Mutex::Locker
l(header_lock
);
496 return _lookup_map_header(l2
, oid
);
499 /// Lookup header node for input
500 Header
lookup_parent(Header input
);
504 int _get_header(Header header
, bufferlist
*bl
);
506 /// Scan keys in header into out_keys and out_values (if nonnull)
507 int scan(Header header
,
508 const set
<string
> &in_keys
,
509 set
<string
> *out_keys
,
510 map
<string
, bufferlist
> *out_values
);
512 /// Remove header and all related prefixes
513 int _clear(Header header
,
514 KeyValueDB::Transaction t
);
516 /* Scan complete region bumping *begin to the beginning of any
517 * containing region and adding all complete region keys between
518 * the updated begin and end to the complete_keys_to_remove set */
519 int merge_new_complete(DBObjectMapIterator
&iter
,
522 set
<string
> *complete_keys_to_remove
);
524 /// Writes out State (mainly next_seq)
525 int write_state(KeyValueDB::Transaction _t
=
526 KeyValueDB::Transaction());
528 /// Copies header entry from parent @see rm_keys
529 int copy_up_header(Header header
,
530 KeyValueDB::Transaction t
);
532 /// Sets header @see set_header
533 void _set_header(Header header
, const bufferlist
&bl
,
534 KeyValueDB::Transaction t
);
537 * Removes header seq lock and possibly object lock
538 * once Header is out of scope
540 * @see generate_new_header
542 class RemoveOnDelete
{
545 explicit RemoveOnDelete(DBObjectMap
*db
) :
547 void operator() (_Header
*header
) {
548 Mutex::Locker
l(db
->header_lock
);
549 assert(db
->in_use
.count(header
->seq
));
550 db
->in_use
.erase(header
->seq
);
551 db
->header_cond
.Signal();
555 friend class RemoveOnDelete
;
557 WRITE_CLASS_ENCODER(DBObjectMap::_Header
)
558 WRITE_CLASS_ENCODER(DBObjectMap::State
)
560 ostream
& operator<<(ostream
& out
, const DBObjectMap::_Header
& h
);