]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mds/SimpleLock.h
27eae7bebd4a0937b4daad52406eb2a608106678
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #ifndef CEPH_SIMPLELOCK_H
17 #define CEPH_SIMPLELOCK_H
19 #include <boost/intrusive_ptr.hpp>
21 #include "MDSCacheObject.h"
22 #include "MDSContext.h"
27 inline const char *get_lock_type_name(int t
) {
29 case CEPH_LOCK_DN
: return "dn";
30 case CEPH_LOCK_DVERSION
: return "dversion";
31 case CEPH_LOCK_IVERSION
: return "iversion";
32 case CEPH_LOCK_IFILE
: return "ifile";
33 case CEPH_LOCK_IAUTH
: return "iauth";
34 case CEPH_LOCK_ILINK
: return "ilink";
35 case CEPH_LOCK_IDFT
: return "idft";
36 case CEPH_LOCK_INEST
: return "inest";
37 case CEPH_LOCK_IXATTR
: return "ixattr";
38 case CEPH_LOCK_ISNAP
: return "isnap";
39 case CEPH_LOCK_INO
: return "ino";
40 case CEPH_LOCK_IFLOCK
: return "iflock";
41 case CEPH_LOCK_IPOLICY
: return "ipolicy";
42 default: ceph_abort(); return 0;
46 #include "include/memory.h"
49 typedef boost::intrusive_ptr
<MutationImpl
> MutationRef
;
64 explicit LockType(int t
) : type(t
) {
69 case CEPH_LOCK_IXATTR
:
71 case CEPH_LOCK_IFLOCK
:
72 case CEPH_LOCK_IPOLICY
:
82 case CEPH_LOCK_DVERSION
:
83 case CEPH_LOCK_IVERSION
:
98 const char *get_state_name(int n
) const {
100 case LOCK_UNDEF
: return "UNDEF";
101 case LOCK_SYNC
: return "sync";
102 case LOCK_LOCK
: return "lock";
104 case LOCK_PREXLOCK
: return "prexlock";
105 case LOCK_XLOCK
: return "xlock";
106 case LOCK_XLOCKDONE
: return "xlockdone";
107 case LOCK_XLOCKSNAP
: return "xlocksnap";
108 case LOCK_LOCK_XLOCK
: return "lock->xlock";
110 case LOCK_SYNC_LOCK
: return "sync->lock";
111 case LOCK_LOCK_SYNC
: return "lock->sync";
112 case LOCK_REMOTEXLOCK
: return "remote_xlock";
113 case LOCK_EXCL
: return "excl";
114 case LOCK_EXCL_SYNC
: return "excl->sync";
115 case LOCK_EXCL_LOCK
: return "excl->lock";
116 case LOCK_SYNC_EXCL
: return "sync->excl";
117 case LOCK_LOCK_EXCL
: return "lock->excl";
119 case LOCK_XSYN
: return "xsyn";
120 case LOCK_XSYN_EXCL
: return "xsyn->excl";
121 case LOCK_EXCL_XSYN
: return "excl->xsyn";
122 case LOCK_XSYN_SYNC
: return "xsyn->sync";
124 case LOCK_SYNC_MIX
: return "sync->mix";
125 case LOCK_SYNC_MIX2
: return "sync->mix(2)";
126 case LOCK_LOCK_TSYN
: return "lock->tsyn";
128 case LOCK_MIX_LOCK
: return "mix->lock";
129 case LOCK_MIX_LOCK2
: return "mix->lock(2)";
130 case LOCK_MIX
: return "mix";
131 case LOCK_MIX_TSYN
: return "mix->tsyn";
133 case LOCK_TSYN_MIX
: return "tsyn->mix";
134 case LOCK_TSYN_LOCK
: return "tsyn->lock";
135 case LOCK_TSYN
: return "tsyn";
137 case LOCK_MIX_SYNC
: return "mix->sync";
138 case LOCK_MIX_SYNC2
: return "mix->sync(2)";
139 case LOCK_EXCL_MIX
: return "excl->mix";
140 case LOCK_MIX_EXCL
: return "mix->excl";
142 case LOCK_PRE_SCAN
: return "*->scan";
143 case LOCK_SCAN
: return "scan";
145 case LOCK_SNAP_SYNC
: return "snap->sync";
147 default: ceph_abort(); return 0;
153 static const uint64_t WAIT_RD
= (1<<0); // to read
154 static const uint64_t WAIT_WR
= (1<<1); // to write
155 static const uint64_t WAIT_XLOCK
= (1<<2); // to xlock (** dup)
156 static const uint64_t WAIT_STABLE
= (1<<2); // for a stable state
157 static const uint64_t WAIT_REMOTEXLOCK
= (1<<3); // for a remote xlock
158 static const int WAIT_BITS
= 4;
159 static const uint64_t WAIT_ALL
= ((1<<WAIT_BITS
)-1);
163 // parent (what i lock)
164 MDSCacheObject
*parent
;
171 __s32 num_client_lease
;
173 struct unstable_bits_t
{
174 set
<__s32
> gather_set
; // auth+rep. >= 0 is mds, < 0 is client
177 int num_wrlock
, num_xlock
;
178 MutationRef xlock_by
;
179 client_t xlock_by_client
;
180 client_t excl_client
;
184 gather_set
.empty() &&
187 xlock_by
.get() == NULL
&&
188 xlock_by_client
== -1 &&
192 unstable_bits_t() : num_wrlock(0),
199 mutable std::unique_ptr
<unstable_bits_t
> _unstable
;
201 bool have_more() const { return _unstable
? true : false; }
202 unstable_bits_t
*more() const {
204 _unstable
.reset(new unstable_bits_t
);
205 return _unstable
.get();
207 void try_clear_more() {
208 if (_unstable
&& _unstable
->empty()) {
215 client_t
get_excl_client() const {
216 return have_more() ? more()->excl_client
: -1;
218 void set_excl_client(client_t c
) {
219 if (c
< 0 && !have_more())
220 return; // default is -1
221 more()->excl_client
= c
;
224 SimpleLock(MDSCacheObject
*o
, LockType
*lt
) :
231 virtual ~SimpleLock() {}
233 virtual bool is_scatterlock() const {
236 virtual bool is_locallock() const {
241 MDSCacheObject
*get_parent() { return parent
; }
242 int get_type() const { return type
->type
; }
243 const sm_t
* get_sm() const { return type
->sm
; }
245 int get_wait_shift() const {
246 switch (get_type()) {
247 case CEPH_LOCK_DN
: return 8;
248 case CEPH_LOCK_DVERSION
: return 8 + 1*SimpleLock::WAIT_BITS
;
249 case CEPH_LOCK_IAUTH
: return 8 + 2*SimpleLock::WAIT_BITS
;
250 case CEPH_LOCK_ILINK
: return 8 + 3*SimpleLock::WAIT_BITS
;
251 case CEPH_LOCK_IDFT
: return 8 + 4*SimpleLock::WAIT_BITS
;
252 case CEPH_LOCK_IFILE
: return 8 + 5*SimpleLock::WAIT_BITS
;
253 case CEPH_LOCK_IVERSION
: return 8 + 6*SimpleLock::WAIT_BITS
;
254 case CEPH_LOCK_IXATTR
: return 8 + 7*SimpleLock::WAIT_BITS
;
255 case CEPH_LOCK_ISNAP
: return 8 + 8*SimpleLock::WAIT_BITS
;
256 case CEPH_LOCK_INEST
: return 8 + 9*SimpleLock::WAIT_BITS
;
257 case CEPH_LOCK_IFLOCK
: return 8 +10*SimpleLock::WAIT_BITS
;
258 case CEPH_LOCK_IPOLICY
: return 8 +11*SimpleLock::WAIT_BITS
;
264 int get_cap_shift() const {
265 switch (get_type()) {
266 case CEPH_LOCK_IAUTH
: return CEPH_CAP_SAUTH
;
267 case CEPH_LOCK_ILINK
: return CEPH_CAP_SLINK
;
268 case CEPH_LOCK_IFILE
: return CEPH_CAP_SFILE
;
269 case CEPH_LOCK_IXATTR
: return CEPH_CAP_SXATTR
;
273 int get_cap_mask() const {
274 switch (get_type()) {
275 case CEPH_LOCK_IFILE
: return (1 << CEPH_CAP_FILE_BITS
) - 1;
276 default: return (1 << CEPH_CAP_SIMPLE_BITS
) - 1;
281 bool operator()(const SimpleLock
* l
, const SimpleLock
* r
) const {
282 // first sort by object type (dn < inode)
283 if (!(l
->type
->type
> CEPH_LOCK_DN
) && (r
->type
->type
> CEPH_LOCK_DN
)) return true;
284 if ((l
->type
->type
> CEPH_LOCK_DN
) == (r
->type
->type
> CEPH_LOCK_DN
)) {
285 // then sort by object
286 if (l
->parent
->is_lt(r
->parent
)) return true;
287 if (l
->parent
== r
->parent
) {
288 // then sort by (inode) lock type
289 if (l
->type
->type
< r
->type
->type
) return true;
296 void decode_locked_state(bufferlist
& bl
) {
297 parent
->decode_lock_state(type
->type
, bl
);
299 void encode_locked_state(bufferlist
& bl
) {
300 parent
->encode_lock_state(type
->type
, bl
);
302 void finish_waiters(uint64_t mask
, int r
=0) {
303 parent
->finish_waiting(mask
<< get_wait_shift(), r
);
305 void take_waiting(uint64_t mask
, list
<MDSInternalContextBase
*>& ls
) {
306 parent
->take_waiting(mask
<< get_wait_shift(), ls
);
308 void add_waiter(uint64_t mask
, MDSInternalContextBase
*c
) {
309 parent
->add_waiter((mask
<< get_wait_shift()) | MDSCacheObject::WAIT_ORDERED
, c
);
311 bool is_waiter_for(uint64_t mask
) const {
312 return parent
->is_waiter_for(mask
<< get_wait_shift());
318 int get_state() const { return state
; }
319 int set_state(int s
) {
321 //assert(!is_stable() || gather_set.size() == 0); // gather should be empty in stable states.
324 void set_state_rejoin(int s
, list
<MDSInternalContextBase
*>& waiters
) {
325 if (!is_stable() && get_parent()->is_auth()) {
327 get_parent()->auth_unpin(this);
332 take_waiting(SimpleLock::WAIT_ALL
, waiters
);
335 bool is_stable() const {
336 return get_sm()->states
[state
].next
== 0;
338 bool is_unstable_and_locked() const {
341 return is_rdlocked() || is_wrlocked() || is_xlocked();
343 int get_next_state() {
344 return get_sm()->states
[state
].next
;
348 bool is_sync_and_unlocked() const {
350 get_state() == LOCK_SYNC
&&
359 bool fw_rdlock_to_auth() {
360 return get_sm()->states[state].can_rdlock == FW;
363 bool req_rdlock_from_auth() {
364 return get_sm()->states
[state
].can_rdlock
== REQ
;
368 static set
<int32_t> empty_gather_set
;
370 // int32_t: <0 is client, >=0 is MDS rank
371 const set
<int32_t>& get_gather_set() const {
372 return have_more() ? more()->gather_set
: empty_gather_set
;
376 for (compact_map
<mds_rank_t
,unsigned>::iterator p
= parent
->replicas_begin();
377 p
!= parent
->replicas_end();
379 more()->gather_set
.insert(p
->first
);
381 bool is_gathering() const {
382 return have_more() && !more()->gather_set
.empty();
384 bool is_gathering(int32_t i
) const {
385 return have_more() && more()->gather_set
.count(i
);
387 void clear_gather() {
389 more()->gather_set
.clear();
391 void remove_gather(int32_t i
) {
393 more()->gather_set
.erase(i
);
398 virtual bool is_dirty() const { return false; }
399 virtual bool is_stale() const { return false; }
400 virtual bool is_flushing() const { return false; }
401 virtual bool is_flushed() const { return false; }
402 virtual void clear_flushed() { }
405 bool can_lease(client_t client
) const {
406 return get_sm()->states
[state
].can_lease
== ANY
||
407 (get_sm()->states
[state
].can_lease
== AUTH
&& parent
->is_auth()) ||
408 (get_sm()->states
[state
].can_lease
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
410 bool can_read(client_t client
) const {
411 return get_sm()->states
[state
].can_read
== ANY
||
412 (get_sm()->states
[state
].can_read
== AUTH
&& parent
->is_auth()) ||
413 (get_sm()->states
[state
].can_read
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
415 bool can_read_projected(client_t client
) const {
416 return get_sm()->states
[state
].can_read_projected
== ANY
||
417 (get_sm()->states
[state
].can_read_projected
== AUTH
&& parent
->is_auth()) ||
418 (get_sm()->states
[state
].can_read_projected
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
420 bool can_rdlock(client_t client
) const {
421 return get_sm()->states
[state
].can_rdlock
== ANY
||
422 (get_sm()->states
[state
].can_rdlock
== AUTH
&& parent
->is_auth()) ||
423 (get_sm()->states
[state
].can_rdlock
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
425 bool can_wrlock(client_t client
) const {
426 return get_sm()->states
[state
].can_wrlock
== ANY
||
427 (get_sm()->states
[state
].can_wrlock
== AUTH
&& parent
->is_auth()) ||
428 (get_sm()->states
[state
].can_wrlock
== XCL
&& client
>= 0 && (get_xlock_by_client() == client
||
429 get_excl_client() == client
));
431 bool can_force_wrlock(client_t client
) const {
432 return get_sm()->states
[state
].can_force_wrlock
== ANY
||
433 (get_sm()->states
[state
].can_force_wrlock
== AUTH
&& parent
->is_auth()) ||
434 (get_sm()->states
[state
].can_force_wrlock
== XCL
&& client
>= 0 && (get_xlock_by_client() == client
||
435 get_excl_client() == client
));
437 bool can_xlock(client_t client
) const {
438 return get_sm()->states
[state
].can_xlock
== ANY
||
439 (get_sm()->states
[state
].can_xlock
== AUTH
&& parent
->is_auth()) ||
440 (get_sm()->states
[state
].can_xlock
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
444 bool is_rdlocked() const { return num_rdlock
> 0; }
447 parent
->get(MDSCacheObject::PIN_LOCK
);
451 assert(num_rdlock
>0);
454 parent
->put(MDSCacheObject::PIN_LOCK
);
457 int get_num_rdlocks() const {
462 void get_wrlock(bool force
=false) {
463 //assert(can_wrlock() || force);
464 if (more()->num_wrlock
== 0)
465 parent
->get(MDSCacheObject::PIN_LOCK
);
466 ++more()->num_wrlock
;
469 --more()->num_wrlock
;
470 if (more()->num_wrlock
== 0) {
471 parent
->put(MDSCacheObject::PIN_LOCK
);
475 bool is_wrlocked() const {
476 return have_more() && more()->num_wrlock
> 0;
478 int get_num_wrlocks() const {
479 return have_more() ? more()->num_wrlock
: 0;
483 void get_xlock(MutationRef who
, client_t client
) {
484 assert(get_xlock_by() == MutationRef());
485 assert(state
== LOCK_XLOCK
|| is_locallock() ||
486 state
== LOCK_LOCK
/* if we are a slave */);
487 parent
->get(MDSCacheObject::PIN_LOCK
);
489 more()->xlock_by
= who
;
490 more()->xlock_by_client
= client
;
492 void set_xlock_done() {
493 assert(more()->xlock_by
);
494 assert(state
== LOCK_XLOCK
|| is_locallock() ||
495 state
== LOCK_LOCK
/* if we are a slave */);
497 state
= LOCK_XLOCKDONE
;
498 more()->xlock_by
.reset();
501 assert(state
== LOCK_XLOCK
|| state
== LOCK_XLOCKDONE
||
502 state
== LOCK_XLOCKSNAP
|| is_locallock() ||
503 state
== LOCK_LOCK
/* if we are a master of a slave */);
505 parent
->put(MDSCacheObject::PIN_LOCK
);
506 if (more()->num_xlock
== 0) {
507 more()->xlock_by
.reset();
508 more()->xlock_by_client
= -1;
512 bool is_xlocked() const {
513 return have_more() && more()->num_xlock
> 0;
515 int get_num_xlocks() const {
516 return have_more() ? more()->num_xlock
: 0;
518 client_t
get_xlock_by_client() const {
519 return have_more() ? more()->xlock_by_client
: -1;
521 bool is_xlocked_by_client(client_t c
) const {
522 return have_more() ? more()->xlock_by_client
== c
: false;
524 MutationRef
get_xlock_by() const {
525 return have_more() ? more()->xlock_by
: MutationRef();
529 void get_client_lease() {
532 void put_client_lease() {
533 assert(num_client_lease
> 0);
535 if (num_client_lease
== 0) {
539 bool is_leased() const {
540 return num_client_lease
> 0;
542 int get_num_client_lease() const {
543 return num_client_lease
;
546 bool is_used() const {
547 return is_xlocked() || is_rdlocked() || is_wrlocked() || num_client_lease
;
551 void encode(bufferlist
& bl
) const {
552 ENCODE_START(2, 2, bl
);
555 ::encode(more()->gather_set
, bl
);
557 ::encode(empty_gather_set
, bl
);
560 void decode(bufferlist::iterator
& p
) {
566 more()->gather_set
.swap(g
);
569 void encode_state_for_replica(bufferlist
& bl
) const {
570 __s16 s
= get_replica_state();
573 void decode_state(bufferlist::iterator
& p
, bool is_new
=true) {
579 void decode_state_rejoin(bufferlist::iterator
& p
, list
<MDSInternalContextBase
*>& waiters
) {
582 set_state_rejoin(s
, waiters
);
587 bool is_loner_mode() const {
588 return get_sm()->states
[state
].loner
;
590 int gcaps_allowed_ever() const {
591 return parent
->is_auth() ? get_sm()->allowed_ever_auth
: get_sm()->allowed_ever_replica
;
593 int gcaps_allowed(int who
, int s
=-1) const {
594 if (s
< 0) s
= state
;
595 if (parent
->is_auth()) {
596 if (get_xlock_by_client() >= 0 && who
== CAP_XLOCKER
)
597 return get_sm()->states
[s
].xlocker_caps
| get_sm()->states
[s
].caps
; // xlocker always gets more
598 else if (is_loner_mode() && who
== CAP_ANY
)
599 return get_sm()->states
[s
].caps
;
601 return get_sm()->states
[s
].loner_caps
| get_sm()->states
[s
].caps
; // loner always gets more
603 return get_sm()->states
[s
].replica_caps
;
605 int gcaps_careful() const {
606 if (get_num_wrlocks())
607 return get_sm()->careful
;
612 int gcaps_xlocker_mask(client_t client
) const {
613 if (client
== get_xlock_by_client())
614 return type
->type
== CEPH_LOCK_IFILE
? 0xf : (CEPH_CAP_GSHARED
|CEPH_CAP_GEXCL
);
618 // simplelock specifics
619 int get_replica_state() const {
620 return get_sm()->states
[state
].replica_state
;
622 void export_twiddle() {
624 state
= get_replica_state();
628 * called on first replica creation.
630 void replicate_relax() {
631 assert(parent
->is_auth());
632 assert(!parent
->is_replicated());
633 if (state
== LOCK_LOCK
&& !is_used())
636 bool remove_replica(int from
) {
637 if (is_gathering(from
)) {
644 bool do_import(int from
, int to
) {
651 if (!is_stable() && !is_gathering())
656 void _print(ostream
& out
) const {
657 out
<< get_lock_type_name(get_type()) << " ";
658 out
<< get_state_name(get_state());
659 if (!get_gather_set().empty())
660 out
<< " g=" << get_gather_set();
661 if (num_client_lease
)
662 out
<< " l=" << num_client_lease
;
664 out
<< " r=" << get_num_rdlocks();
666 out
<< " w=" << get_num_wrlocks();
668 out
<< " x=" << get_num_xlocks();
670 out
<< " by " << get_xlock_by();
680 * Write bare values (caller must be in an object section)
681 * to formatter, or nothing if is_sync_and_unlocked.
683 void dump(Formatter
*f
) const;
685 virtual void print(ostream
& out
) const {
691 WRITE_CLASS_ENCODER(SimpleLock
)
693 inline ostream
& operator<<(ostream
& out
, const SimpleLock
& l
)