]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/filestore/DBObjectMap.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / os / filestore / DBObjectMap.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #ifndef DBOBJECTMAP_DB_H
3 #define DBOBJECTMAP_DB_H
4
5 #include "include/buffer_fwd.h"
6 #include <set>
7 #include <map>
8 #include <string>
9
10 #include <vector>
11 #include <boost/scoped_ptr.hpp>
12
13 #include "os/ObjectMap.h"
14 #include "kv/KeyValueDB.h"
15 #include "osd/osd_types.h"
16 #include "common/ceph_mutex.h"
17 #include "common/simple_cache.hpp"
18 #include <boost/optional/optional_io.hpp>
19
20 #include "SequencerPosition.h"
21
22 /**
23 * DBObjectMap: Implements ObjectMap in terms of KeyValueDB
24 *
25 * Prefix space structure:
26 *
27 * @see complete_prefix
28 * @see user_prefix
29 * @see sys_prefix
30 *
31 * - HOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->header.seq and
32 * corresponding omap header
33 * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number
34 * @see State
35 * @see write_state
36 * @see init
37 * @see generate_new_header
38 * - USER_PREFIX + header_key(header->seq) + USER_PREFIX
39 * : key->value for header->seq
40 * - USER_PREFIX + header_key(header->seq) + COMPLETE_PREFIX: see below
41 * - USER_PREFIX + header_key(header->seq) + XATTR_PREFIX: xattrs
42 * - USER_PREFIX + header_key(header->seq) + SYS_PREFIX
43 * : USER_HEADER_KEY - omap header for header->seq
44 * : HEADER_KEY - encoding of header for header->seq
45 *
46 * For each node (represented by a header), we
47 * store three mappings: the key mapping, the complete mapping, and the parent.
48 * The complete mapping (COMPLETE_PREFIX space) is key->key. Each x->y entry in
49 * this mapping indicates that the key mapping contains all entries on [x,y).
50 * Note, max std::string is represented by "", so ""->"" indicates that the parent
51 * is unnecessary (@see rm_keys). When looking up a key not contained in the
52 * the complete std::set, we have to check the parent if we don't find it in the
53 * key std::set. During rm_keys, we copy keys from the parent and update the
54 * complete std::set to reflect the change @see rm_keys.
55 */
56 class DBObjectMap : public ObjectMap {
57 public:
58
59 KeyValueDB *get_db() override { return db.get(); }
60
61 /**
62 * Serializes access to next_seq as well as the in_use std::set
63 */
64 ceph::mutex header_lock = ceph::make_mutex("DBOBjectMap");
65 ceph::condition_variable header_cond;
66 ceph::condition_variable map_header_cond;
67
68 /**
69 * Std::Set of headers currently in use
70 */
71 std::set<uint64_t> in_use;
72 std::set<ghobject_t> map_header_in_use;
73
74 /**
75 * Takes the map_header_in_use entry in constructor, releases in
76 * destructor
77 */
78 class MapHeaderLock {
79 DBObjectMap *db;
80 boost::optional<ghobject_t> locked;
81
82 MapHeaderLock(const MapHeaderLock &);
83 MapHeaderLock &operator=(const MapHeaderLock &);
84 public:
85 explicit MapHeaderLock(DBObjectMap *db) : db(db) {}
86 MapHeaderLock(DBObjectMap *db, const ghobject_t &oid) : db(db), locked(oid) {
87 std::unique_lock l{db->header_lock};
88 db->map_header_cond.wait(l, [db, this] {
89 return !db->map_header_in_use.count(*locked);
90 });
91 db->map_header_in_use.insert(*locked);
92 }
93
94 const ghobject_t &get_locked() const {
95 ceph_assert(locked);
96 return *locked;
97 }
98
99 void swap(MapHeaderLock &o) {
100 ceph_assert(db == o.db);
101
102 // centos6's boost optional doesn't seem to have swap :(
103 boost::optional<ghobject_t> _locked = o.locked;
104 o.locked = locked;
105 locked = _locked;
106 }
107
108 ~MapHeaderLock() {
109 if (locked) {
110 std::lock_guard l{db->header_lock};
111 ceph_assert(db->map_header_in_use.count(*locked));
112 db->map_header_cond.notify_all();
113 db->map_header_in_use.erase(*locked);
114 }
115 }
116 };
117
118 DBObjectMap(CephContext* cct, KeyValueDB *db)
119 : ObjectMap(cct, db),
120 caches(cct->_conf->filestore_omap_header_cache_size)
121 {}
122
123 int set_keys(
124 const ghobject_t &oid,
125 const std::map<std::string, ceph::buffer::list> &set,
126 const SequencerPosition *spos=0
127 ) override;
128
129 int set_header(
130 const ghobject_t &oid,
131 const ceph::buffer::list &bl,
132 const SequencerPosition *spos=0
133 ) override;
134
135 int get_header(
136 const ghobject_t &oid,
137 ceph::buffer::list *bl
138 ) override;
139
140 int clear(
141 const ghobject_t &oid,
142 const SequencerPosition *spos=0
143 ) override;
144
145 int clear_keys_header(
146 const ghobject_t &oid,
147 const SequencerPosition *spos=0
148 ) override;
149
150 int rm_keys(
151 const ghobject_t &oid,
152 const std::set<std::string> &to_clear,
153 const SequencerPosition *spos=0
154 ) override;
155
156 int get(
157 const ghobject_t &oid,
158 ceph::buffer::list *header,
159 std::map<std::string, ceph::buffer::list> *out
160 ) override;
161
162 int get_keys(
163 const ghobject_t &oid,
164 std::set<std::string> *keys
165 ) override;
166
167 int get_values(
168 const ghobject_t &oid,
169 const std::set<std::string> &keys,
170 std::map<std::string, ceph::buffer::list> *out
171 ) override;
172
173 int check_keys(
174 const ghobject_t &oid,
175 const std::set<std::string> &keys,
176 std::set<std::string> *out
177 ) override;
178
179 int get_xattrs(
180 const ghobject_t &oid,
181 const std::set<std::string> &to_get,
182 std::map<std::string, ceph::buffer::list> *out
183 ) override;
184
185 int get_all_xattrs(
186 const ghobject_t &oid,
187 std::set<std::string> *out
188 ) override;
189
190 int set_xattrs(
191 const ghobject_t &oid,
192 const std::map<std::string, ceph::buffer::list> &to_set,
193 const SequencerPosition *spos=0
194 ) override;
195
196 int remove_xattrs(
197 const ghobject_t &oid,
198 const std::set<std::string> &to_remove,
199 const SequencerPosition *spos=0
200 ) override;
201
202 int clone(
203 const ghobject_t &oid,
204 const ghobject_t &target,
205 const SequencerPosition *spos=0
206 ) override;
207
208 int rename(
209 const ghobject_t &from,
210 const ghobject_t &to,
211 const SequencerPosition *spos=0
212 ) override;
213
214 int legacy_clone(
215 const ghobject_t &oid,
216 const ghobject_t &target,
217 const SequencerPosition *spos=0
218 ) override;
219
220 /// Read initial state from backing store
221 int get_state();
222 /// Write current state settings to DB
223 void set_state();
224 /// Read initial state and upgrade or initialize state
225 int init(bool upgrade = false);
226
227 /// Upgrade store to current version
228 int upgrade_to_v2();
229
230 /// Consistency check, debug, there must be no parallel writes
231 int check(std::ostream &out, bool repair = false, bool force = false) override;
232
233 /// Ensure that all previous operations are durable
234 int sync(const ghobject_t *oid=0, const SequencerPosition *spos=0) override;
235
236 void compact() override {
237 ceph_assert(db);
238 db->compact();
239 }
240
241 /// Util, get all objects, there must be no other concurrent access
242 int list_objects(std::vector<ghobject_t> *objs ///< [out] objects
243 );
244
245 struct _Header;
246 // Util, get all object headers, there must be no other concurrent access
247 int list_object_headers(std::vector<_Header> *out ///< [out] headers
248 );
249
250 ObjectMapIterator get_iterator(const ghobject_t &oid) override;
251
252 static const std::string USER_PREFIX;
253 static const std::string XATTR_PREFIX;
254 static const std::string SYS_PREFIX;
255 static const std::string COMPLETE_PREFIX;
256 static const std::string HEADER_KEY;
257 static const std::string USER_HEADER_KEY;
258 static const std::string GLOBAL_STATE_KEY;
259 static const std::string HOBJECT_TO_SEQ;
260
261 /// Legacy
262 static const std::string LEAF_PREFIX;
263 static const std::string REVERSE_LEAF_PREFIX;
264
265 /// persistent state for store @see generate_header
266 struct State {
267 static const __u8 CUR_VERSION = 3;
268 __u8 v;
269 uint64_t seq;
270 // legacy is false when complete regions never used
271 bool legacy;
272 State() : v(0), seq(1), legacy(false) {}
273 explicit State(uint64_t seq) : v(0), seq(seq), legacy(false) {}
274
275 void encode(ceph::buffer::list &bl) const {
276 ENCODE_START(3, 1, bl);
277 encode(v, bl);
278 encode(seq, bl);
279 encode(legacy, bl);
280 ENCODE_FINISH(bl);
281 }
282
283 void decode(ceph::buffer::list::const_iterator &bl) {
284 DECODE_START(3, bl);
285 if (struct_v >= 2)
286 decode(v, bl);
287 else
288 v = 0;
289 decode(seq, bl);
290 if (struct_v >= 3)
291 decode(legacy, bl);
292 else
293 legacy = false;
294 DECODE_FINISH(bl);
295 }
296
297 void dump(ceph::Formatter *f) const {
298 f->dump_unsigned("v", v);
299 f->dump_unsigned("seq", seq);
300 f->dump_bool("legacy", legacy);
301 }
302
303 static void generate_test_instances(std::list<State*> &o) {
304 o.push_back(new State(0));
305 o.push_back(new State(20));
306 }
307 } state;
308
309 struct _Header {
310 uint64_t seq;
311 uint64_t parent;
312 uint64_t num_children;
313
314 ghobject_t oid;
315
316 SequencerPosition spos;
317
318 void encode(ceph::buffer::list &bl) const {
319 coll_t unused;
320 ENCODE_START(2, 1, bl);
321 encode(seq, bl);
322 encode(parent, bl);
323 encode(num_children, bl);
324 encode(unused, bl);
325 encode(oid, bl);
326 encode(spos, bl);
327 ENCODE_FINISH(bl);
328 }
329
330 void decode(ceph::buffer::list::const_iterator &bl) {
331 coll_t unused;
332 DECODE_START(2, bl);
333 decode(seq, bl);
334 decode(parent, bl);
335 decode(num_children, bl);
336 decode(unused, bl);
337 decode(oid, bl);
338 if (struct_v >= 2)
339 decode(spos, bl);
340 DECODE_FINISH(bl);
341 }
342
343 void dump(ceph::Formatter *f) const {
344 f->dump_unsigned("seq", seq);
345 f->dump_unsigned("parent", parent);
346 f->dump_unsigned("num_children", num_children);
347 f->dump_stream("oid") << oid;
348 }
349
350 static void generate_test_instances(std::list<_Header*> &o) {
351 o.push_back(new _Header);
352 o.push_back(new _Header);
353 o.back()->parent = 20;
354 o.back()->seq = 30;
355 }
356
357 size_t length() {
358 return sizeof(_Header);
359 }
360
361 _Header() : seq(0), parent(0), num_children(1) {}
362 };
363
364 /// Std::String munging (public for testing)
365 static std::string ghobject_key(const ghobject_t &oid);
366 static std::string ghobject_key_v0(coll_t c, const ghobject_t &oid);
367 static int is_buggy_ghobject_key_v1(CephContext* cct,
368 const std::string &in);
369 private:
370 /// Implicit lock on Header->seq
371 typedef std::shared_ptr<_Header> Header;
372 ceph::mutex cache_lock = ceph::make_mutex("DBObjectMap::CacheLock");
373 SimpleLRU<ghobject_t, _Header> caches;
374
375 std::string map_header_key(const ghobject_t &oid);
376 std::string header_key(uint64_t seq);
377 std::string complete_prefix(Header header);
378 std::string user_prefix(Header header);
379 std::string sys_prefix(Header header);
380 std::string xattr_prefix(Header header);
381 std::string sys_parent_prefix(_Header header);
382 std::string sys_parent_prefix(Header header) {
383 return sys_parent_prefix(*header);
384 }
385
386 class EmptyIteratorImpl : public ObjectMapIteratorImpl {
387 public:
388 int seek_to_first() override { return 0; }
389 int seek_to_last() { return 0; }
390 int upper_bound(const std::string &after) override { return 0; }
391 int lower_bound(const std::string &to) override { return 0; }
392 bool valid() override { return false; }
393 int next() override { ceph_abort(); return 0; }
394 std::string key() override { ceph_abort(); return ""; }
395 ceph::buffer::list value() override { ceph_abort(); return ceph::buffer::list(); }
396 int status() override { return 0; }
397 };
398
399
400 /// Iterator
401 class DBObjectMapIteratorImpl : public ObjectMapIteratorImpl {
402 public:
403 DBObjectMap *map;
404
405 /// NOTE: implicit lock hlock->get_locked() when returned out of the class
406 MapHeaderLock hlock;
407 /// NOTE: implicit lock on header->seq AND for all ancestors
408 Header header;
409
410 /// parent_iter == NULL iff no parent
411 std::shared_ptr<DBObjectMapIteratorImpl> parent_iter;
412 KeyValueDB::Iterator key_iter;
413 KeyValueDB::Iterator complete_iter;
414
415 /// cur_iter points to currently valid iterator
416 std::shared_ptr<ObjectMapIteratorImpl> cur_iter;
417 int r;
418
419 /// init() called, key_iter, complete_iter, parent_iter filled in
420 bool ready;
421 /// past end
422 bool invalid;
423
424 DBObjectMapIteratorImpl(DBObjectMap *map, Header header) :
425 map(map), hlock(map), header(header), r(0), ready(false), invalid(true) {}
426 int seek_to_first() override;
427 int seek_to_last();
428 int upper_bound(const std::string &after) override;
429 int lower_bound(const std::string &to) override;
430 bool valid() override;
431 int next() override;
432 std::string key() override;
433 ceph::buffer::list value() override;
434 int status() override;
435
436 bool on_parent() {
437 return cur_iter == parent_iter;
438 }
439
440 /// skips to next valid parent entry
441 int next_parent();
442
443 /// first parent() >= to
444 int lower_bound_parent(const std::string &to);
445
446 /**
447 * Tests whether to_test is in complete region
448 *
449 * postcondition: complete_iter will be max s.t. complete_iter->value > to_test
450 */
451 int in_complete_region(const std::string &to_test, ///< [in] key to test
452 std::string *begin, ///< [out] beginning of region
453 std::string *end ///< [out] end of region
454 ); ///< @returns true if to_test is in the complete region, else false
455
456 private:
457 int init();
458 bool valid_parent();
459 int adjust();
460 };
461
462 typedef std::shared_ptr<DBObjectMapIteratorImpl> DBObjectMapIterator;
463 DBObjectMapIterator _get_iterator(Header header) {
464 return std::make_shared<DBObjectMapIteratorImpl>(this, header);
465 }
466
467 /// sys
468
469 /// Removes node corresponding to header
470 void clear_header(Header header, KeyValueDB::Transaction t);
471
472 /// Std::Set node containing input to new contents
473 void set_header(Header input, KeyValueDB::Transaction t);
474
475 /// Remove leaf node corresponding to oid in c
476 void remove_map_header(
477 const MapHeaderLock &l,
478 const ghobject_t &oid,
479 Header header,
480 KeyValueDB::Transaction t);
481
482 /// Std::Set leaf node for c and oid to the value of header
483 void set_map_header(
484 const MapHeaderLock &l,
485 const ghobject_t &oid, _Header header,
486 KeyValueDB::Transaction t);
487
488 /// Std::Set leaf node for c and oid to the value of header
489 bool check_spos(const ghobject_t &oid,
490 Header header,
491 const SequencerPosition *spos);
492
493 /// Lookup or create header for c oid
494 Header lookup_create_map_header(
495 const MapHeaderLock &l,
496 const ghobject_t &oid,
497 KeyValueDB::Transaction t);
498
499 /**
500 * Generate new header for c oid with new seq number
501 *
502 * Has the side effect of synchronously saving the new DBObjectMap state
503 */
504 Header _generate_new_header(const ghobject_t &oid, Header parent);
505 Header generate_new_header(const ghobject_t &oid, Header parent) {
506 std::lock_guard l{header_lock};
507 return _generate_new_header(oid, parent);
508 }
509
510 /// Lookup leaf header for c oid
511 Header _lookup_map_header(
512 const MapHeaderLock &l,
513 const ghobject_t &oid);
514 Header lookup_map_header(
515 const MapHeaderLock &l2,
516 const ghobject_t &oid) {
517 std::lock_guard l{header_lock};
518 return _lookup_map_header(l2, oid);
519 }
520
521 /// Lookup header node for input
522 Header lookup_parent(Header input);
523
524
525 /// Helpers
526 int _get_header(Header header, ceph::buffer::list *bl);
527
528 /// Scan keys in header into out_keys and out_values (if nonnull)
529 int scan(Header header,
530 const std::set<std::string> &in_keys,
531 std::set<std::string> *out_keys,
532 std::map<std::string, ceph::buffer::list> *out_values);
533
534 /// Remove header and all related prefixes
535 int _clear(Header header,
536 KeyValueDB::Transaction t);
537
538 /* Scan complete region bumping *begin to the beginning of any
539 * containing region and adding all complete region keys between
540 * the updated begin and end to the complete_keys_to_remove std::set */
541 int merge_new_complete(DBObjectMapIterator &iter,
542 std::string *begin,
543 const std::string &end,
544 std::set<std::string> *complete_keys_to_remove);
545
546 /// Writes out State (mainly next_seq)
547 int write_state(KeyValueDB::Transaction _t =
548 KeyValueDB::Transaction());
549
550 /// Copies header entry from parent @see rm_keys
551 int copy_up_header(Header header,
552 KeyValueDB::Transaction t);
553
554 /// Sets header @see set_header
555 void _set_header(Header header, const ceph::buffer::list &bl,
556 KeyValueDB::Transaction t);
557
558 /**
559 * Removes header seq lock and possibly object lock
560 * once Header is out of scope
561 * @see lookup_parent
562 * @see generate_new_header
563 */
564 class RemoveOnDelete {
565 public:
566 DBObjectMap *db;
567 explicit RemoveOnDelete(DBObjectMap *db) :
568 db(db) {}
569 void operator() (_Header *header) {
570 std::lock_guard l{db->header_lock};
571 ceph_assert(db->in_use.count(header->seq));
572 db->in_use.erase(header->seq);
573 db->header_cond.notify_all();
574 delete header;
575 }
576 };
577 friend class RemoveOnDelete;
578 };
579 WRITE_CLASS_ENCODER(DBObjectMap::_Header)
580 WRITE_CLASS_ENCODER(DBObjectMap::State)
581
582 std::ostream& operator<<(std::ostream& out, const DBObjectMap::_Header& h);
583
584 #endif