]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mds/SimpleLock.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #ifndef CEPH_SIMPLELOCK_H
17 #define CEPH_SIMPLELOCK_H
19 #include <boost/intrusive_ptr.hpp>
21 #include "MDSCacheObject.h"
22 #include "MDSContext.h"
27 inline const char *get_lock_type_name(int t
) {
29 case CEPH_LOCK_DN
: return "dn";
30 case CEPH_LOCK_DVERSION
: return "dversion";
31 case CEPH_LOCK_IVERSION
: return "iversion";
32 case CEPH_LOCK_IFILE
: return "ifile";
33 case CEPH_LOCK_IAUTH
: return "iauth";
34 case CEPH_LOCK_ILINK
: return "ilink";
35 case CEPH_LOCK_IDFT
: return "idft";
36 case CEPH_LOCK_INEST
: return "inest";
37 case CEPH_LOCK_IXATTR
: return "ixattr";
38 case CEPH_LOCK_ISNAP
: return "isnap";
39 case CEPH_LOCK_INO
: return "ino";
40 case CEPH_LOCK_IFLOCK
: return "iflock";
41 case CEPH_LOCK_IPOLICY
: return "ipolicy";
42 default: ceph_abort(); return 0;
46 #include "include/memory.h"
49 typedef boost::intrusive_ptr
<MutationImpl
> MutationRef
;
64 explicit LockType(int t
) : type(t
) {
69 case CEPH_LOCK_IXATTR
:
71 case CEPH_LOCK_IFLOCK
:
72 case CEPH_LOCK_IPOLICY
:
82 case CEPH_LOCK_DVERSION
:
83 case CEPH_LOCK_IVERSION
:
98 const char *get_state_name(int n
) const {
100 case LOCK_UNDEF
: return "UNDEF";
101 case LOCK_SYNC
: return "sync";
102 case LOCK_LOCK
: return "lock";
104 case LOCK_PREXLOCK
: return "prexlock";
105 case LOCK_XLOCK
: return "xlock";
106 case LOCK_XLOCKDONE
: return "xlockdone";
107 case LOCK_XLOCKSNAP
: return "xlocksnap";
108 case LOCK_LOCK_XLOCK
: return "lock->xlock";
110 case LOCK_SYNC_LOCK
: return "sync->lock";
111 case LOCK_LOCK_SYNC
: return "lock->sync";
112 case LOCK_REMOTEXLOCK
: return "remote_xlock";
113 case LOCK_EXCL
: return "excl";
114 case LOCK_EXCL_SYNC
: return "excl->sync";
115 case LOCK_EXCL_LOCK
: return "excl->lock";
116 case LOCK_SYNC_EXCL
: return "sync->excl";
117 case LOCK_LOCK_EXCL
: return "lock->excl";
119 case LOCK_XSYN
: return "xsyn";
120 case LOCK_XSYN_EXCL
: return "xsyn->excl";
121 case LOCK_EXCL_XSYN
: return "excl->xsyn";
122 case LOCK_XSYN_SYNC
: return "xsyn->sync";
123 case LOCK_XSYN_LOCK
: return "xsyn->lock";
124 case LOCK_XSYN_MIX
: return "xsyn->mix";
126 case LOCK_SYNC_MIX
: return "sync->mix";
127 case LOCK_SYNC_MIX2
: return "sync->mix(2)";
128 case LOCK_LOCK_TSYN
: return "lock->tsyn";
130 case LOCK_MIX_LOCK
: return "mix->lock";
131 case LOCK_MIX_LOCK2
: return "mix->lock(2)";
132 case LOCK_MIX
: return "mix";
133 case LOCK_MIX_TSYN
: return "mix->tsyn";
135 case LOCK_TSYN_MIX
: return "tsyn->mix";
136 case LOCK_TSYN_LOCK
: return "tsyn->lock";
137 case LOCK_TSYN
: return "tsyn";
139 case LOCK_MIX_SYNC
: return "mix->sync";
140 case LOCK_MIX_SYNC2
: return "mix->sync(2)";
141 case LOCK_EXCL_MIX
: return "excl->mix";
142 case LOCK_MIX_EXCL
: return "mix->excl";
144 case LOCK_PRE_SCAN
: return "*->scan";
145 case LOCK_SCAN
: return "scan";
147 case LOCK_SNAP_SYNC
: return "snap->sync";
149 default: ceph_abort(); return 0;
155 static const uint64_t WAIT_RD
= (1<<0); // to read
156 static const uint64_t WAIT_WR
= (1<<1); // to write
157 static const uint64_t WAIT_XLOCK
= (1<<2); // to xlock (** dup)
158 static const uint64_t WAIT_STABLE
= (1<<2); // for a stable state
159 static const uint64_t WAIT_REMOTEXLOCK
= (1<<3); // for a remote xlock
160 static const int WAIT_BITS
= 4;
161 static const uint64_t WAIT_ALL
= ((1<<WAIT_BITS
)-1);
165 // parent (what i lock)
166 MDSCacheObject
*parent
;
174 NEED_RECOVER
= 1 << 1,
180 struct unstable_bits_t
{
181 set
<__s32
> gather_set
; // auth+rep. >= 0 is mds, < 0 is client
184 int num_wrlock
, num_xlock
;
185 MutationRef xlock_by
;
186 client_t xlock_by_client
;
187 client_t excl_client
;
191 gather_set
.empty() &&
194 xlock_by
.get() == NULL
&&
195 xlock_by_client
== -1 &&
199 unstable_bits_t() : num_wrlock(0),
206 mutable std::unique_ptr
<unstable_bits_t
> _unstable
;
208 bool have_more() const { return _unstable
? true : false; }
209 unstable_bits_t
*more() const {
211 _unstable
.reset(new unstable_bits_t
);
212 return _unstable
.get();
214 void try_clear_more() {
215 if (_unstable
&& _unstable
->empty()) {
222 client_t
get_excl_client() const {
223 return have_more() ? more()->excl_client
: -1;
225 void set_excl_client(client_t c
) {
226 if (c
< 0 && !have_more())
227 return; // default is -1
228 more()->excl_client
= c
;
231 SimpleLock(MDSCacheObject
*o
, LockType
*lt
) :
238 virtual ~SimpleLock() {}
240 virtual bool is_scatterlock() const {
243 virtual bool is_locallock() const {
248 MDSCacheObject
*get_parent() { return parent
; }
249 int get_type() const { return type
->type
; }
250 const sm_t
* get_sm() const { return type
->sm
; }
252 int get_wait_shift() const {
253 switch (get_type()) {
254 case CEPH_LOCK_DN
: return 8;
255 case CEPH_LOCK_DVERSION
: return 8 + 1*SimpleLock::WAIT_BITS
;
256 case CEPH_LOCK_IAUTH
: return 8 + 2*SimpleLock::WAIT_BITS
;
257 case CEPH_LOCK_ILINK
: return 8 + 3*SimpleLock::WAIT_BITS
;
258 case CEPH_LOCK_IDFT
: return 8 + 4*SimpleLock::WAIT_BITS
;
259 case CEPH_LOCK_IFILE
: return 8 + 5*SimpleLock::WAIT_BITS
;
260 case CEPH_LOCK_IVERSION
: return 8 + 6*SimpleLock::WAIT_BITS
;
261 case CEPH_LOCK_IXATTR
: return 8 + 7*SimpleLock::WAIT_BITS
;
262 case CEPH_LOCK_ISNAP
: return 8 + 8*SimpleLock::WAIT_BITS
;
263 case CEPH_LOCK_INEST
: return 8 + 9*SimpleLock::WAIT_BITS
;
264 case CEPH_LOCK_IFLOCK
: return 8 +10*SimpleLock::WAIT_BITS
;
265 case CEPH_LOCK_IPOLICY
: return 8 +11*SimpleLock::WAIT_BITS
;
271 int get_cap_shift() const {
272 switch (get_type()) {
273 case CEPH_LOCK_IAUTH
: return CEPH_CAP_SAUTH
;
274 case CEPH_LOCK_ILINK
: return CEPH_CAP_SLINK
;
275 case CEPH_LOCK_IFILE
: return CEPH_CAP_SFILE
;
276 case CEPH_LOCK_IXATTR
: return CEPH_CAP_SXATTR
;
280 int get_cap_mask() const {
281 switch (get_type()) {
282 case CEPH_LOCK_IFILE
: return (1 << CEPH_CAP_FILE_BITS
) - 1;
283 default: return (1 << CEPH_CAP_SIMPLE_BITS
) - 1;
288 bool operator()(const SimpleLock
* l
, const SimpleLock
* r
) const {
289 // first sort by object type (dn < inode)
290 if (!(l
->type
->type
> CEPH_LOCK_DN
) && (r
->type
->type
> CEPH_LOCK_DN
)) return true;
291 if ((l
->type
->type
> CEPH_LOCK_DN
) == (r
->type
->type
> CEPH_LOCK_DN
)) {
292 // then sort by object
293 if (l
->parent
->is_lt(r
->parent
)) return true;
294 if (l
->parent
== r
->parent
) {
295 // then sort by (inode) lock type
296 if (l
->type
->type
< r
->type
->type
) return true;
303 void decode_locked_state(bufferlist
& bl
) {
304 parent
->decode_lock_state(type
->type
, bl
);
306 void encode_locked_state(bufferlist
& bl
) {
307 parent
->encode_lock_state(type
->type
, bl
);
309 void finish_waiters(uint64_t mask
, int r
=0) {
310 parent
->finish_waiting(mask
<< get_wait_shift(), r
);
312 void take_waiting(uint64_t mask
, list
<MDSInternalContextBase
*>& ls
) {
313 parent
->take_waiting(mask
<< get_wait_shift(), ls
);
315 void add_waiter(uint64_t mask
, MDSInternalContextBase
*c
) {
316 parent
->add_waiter((mask
<< get_wait_shift()) | MDSCacheObject::WAIT_ORDERED
, c
);
318 bool is_waiter_for(uint64_t mask
) const {
319 return parent
->is_waiter_for(mask
<< get_wait_shift());
325 int get_state() const { return state
; }
326 int set_state(int s
) {
328 //assert(!is_stable() || gather_set.size() == 0); // gather should be empty in stable states.
331 void set_state_rejoin(int s
, list
<MDSInternalContextBase
*>& waiters
, bool survivor
) {
332 assert(!get_parent()->is_auth());
334 // If lock in the replica object was not in SYNC state when auth mds of the object failed.
335 // Auth mds of the object may take xlock on the lock and change the object when replaying
337 if (!survivor
|| state
!= LOCK_SYNC
)
343 take_waiting(SimpleLock::WAIT_ALL
, waiters
);
346 bool is_stable() const {
347 return get_sm()->states
[state
].next
== 0;
349 bool is_unstable_and_locked() const {
352 return is_rdlocked() || is_wrlocked() || is_xlocked();
354 int get_next_state() {
355 return get_sm()->states
[state
].next
;
359 bool is_sync_and_unlocked() const {
361 get_state() == LOCK_SYNC
&&
370 bool fw_rdlock_to_auth() {
371 return get_sm()->states[state].can_rdlock == FW;
374 bool req_rdlock_from_auth() {
375 return get_sm()->states
[state
].can_rdlock
== REQ
;
379 static set
<int32_t> empty_gather_set
;
381 // int32_t: <0 is client, >=0 is MDS rank
382 const set
<int32_t>& get_gather_set() const {
383 return have_more() ? more()->gather_set
: empty_gather_set
;
387 for (const auto p
: parent
->get_replicas()) {
388 more()->gather_set
.insert(p
.first
);
391 bool is_gathering() const {
392 return have_more() && !more()->gather_set
.empty();
394 bool is_gathering(int32_t i
) const {
395 return have_more() && more()->gather_set
.count(i
);
397 void clear_gather() {
399 more()->gather_set
.clear();
401 void remove_gather(int32_t i
) {
403 more()->gather_set
.erase(i
);
408 virtual bool is_dirty() const { return false; }
409 virtual bool is_stale() const { return false; }
410 virtual bool is_flushing() const { return false; }
411 virtual bool is_flushed() const { return false; }
412 virtual void clear_flushed() { }
415 bool can_lease(client_t client
) const {
416 return get_sm()->states
[state
].can_lease
== ANY
||
417 (get_sm()->states
[state
].can_lease
== AUTH
&& parent
->is_auth()) ||
418 (get_sm()->states
[state
].can_lease
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
420 bool can_read(client_t client
) const {
421 return get_sm()->states
[state
].can_read
== ANY
||
422 (get_sm()->states
[state
].can_read
== AUTH
&& parent
->is_auth()) ||
423 (get_sm()->states
[state
].can_read
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
425 bool can_read_projected(client_t client
) const {
426 return get_sm()->states
[state
].can_read_projected
== ANY
||
427 (get_sm()->states
[state
].can_read_projected
== AUTH
&& parent
->is_auth()) ||
428 (get_sm()->states
[state
].can_read_projected
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
430 bool can_rdlock(client_t client
) const {
431 return get_sm()->states
[state
].can_rdlock
== ANY
||
432 (get_sm()->states
[state
].can_rdlock
== AUTH
&& parent
->is_auth()) ||
433 (get_sm()->states
[state
].can_rdlock
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
435 bool can_wrlock(client_t client
) const {
436 return get_sm()->states
[state
].can_wrlock
== ANY
||
437 (get_sm()->states
[state
].can_wrlock
== AUTH
&& parent
->is_auth()) ||
438 (get_sm()->states
[state
].can_wrlock
== XCL
&& client
>= 0 && (get_xlock_by_client() == client
||
439 get_excl_client() == client
));
441 bool can_force_wrlock(client_t client
) const {
442 return get_sm()->states
[state
].can_force_wrlock
== ANY
||
443 (get_sm()->states
[state
].can_force_wrlock
== AUTH
&& parent
->is_auth()) ||
444 (get_sm()->states
[state
].can_force_wrlock
== XCL
&& client
>= 0 && (get_xlock_by_client() == client
||
445 get_excl_client() == client
));
447 bool can_xlock(client_t client
) const {
448 return get_sm()->states
[state
].can_xlock
== ANY
||
449 (get_sm()->states
[state
].can_xlock
== AUTH
&& parent
->is_auth()) ||
450 (get_sm()->states
[state
].can_xlock
== XCL
&& client
>= 0 && get_xlock_by_client() == client
);
454 bool is_rdlocked() const { return num_rdlock
> 0; }
457 parent
->get(MDSCacheObject::PIN_LOCK
);
461 assert(num_rdlock
>0);
464 parent
->put(MDSCacheObject::PIN_LOCK
);
467 int get_num_rdlocks() const {
472 void get_wrlock(bool force
=false) {
473 //assert(can_wrlock() || force);
474 if (more()->num_wrlock
== 0)
475 parent
->get(MDSCacheObject::PIN_LOCK
);
476 ++more()->num_wrlock
;
479 --more()->num_wrlock
;
480 if (more()->num_wrlock
== 0) {
481 parent
->put(MDSCacheObject::PIN_LOCK
);
485 bool is_wrlocked() const {
486 return have_more() && more()->num_wrlock
> 0;
488 int get_num_wrlocks() const {
489 return have_more() ? more()->num_wrlock
: 0;
493 void get_xlock(MutationRef who
, client_t client
) {
494 assert(get_xlock_by() == MutationRef());
495 assert(state
== LOCK_XLOCK
|| is_locallock() ||
496 state
== LOCK_LOCK
/* if we are a slave */);
497 parent
->get(MDSCacheObject::PIN_LOCK
);
499 more()->xlock_by
= who
;
500 more()->xlock_by_client
= client
;
502 void set_xlock_done() {
503 assert(more()->xlock_by
);
504 assert(state
== LOCK_XLOCK
|| is_locallock() ||
505 state
== LOCK_LOCK
/* if we are a slave */);
507 state
= LOCK_XLOCKDONE
;
508 more()->xlock_by
.reset();
511 assert(state
== LOCK_XLOCK
|| state
== LOCK_XLOCKDONE
||
512 state
== LOCK_XLOCKSNAP
|| is_locallock() ||
513 state
== LOCK_LOCK
/* if we are a master of a slave */);
515 parent
->put(MDSCacheObject::PIN_LOCK
);
516 if (more()->num_xlock
== 0) {
517 more()->xlock_by
.reset();
518 more()->xlock_by_client
= -1;
522 bool is_xlocked() const {
523 return have_more() && more()->num_xlock
> 0;
525 int get_num_xlocks() const {
526 return have_more() ? more()->num_xlock
: 0;
528 client_t
get_xlock_by_client() const {
529 return have_more() ? more()->xlock_by_client
: -1;
531 bool is_xlocked_by_client(client_t c
) const {
532 return have_more() ? more()->xlock_by_client
== c
: false;
534 MutationRef
get_xlock_by() const {
535 return have_more() ? more()->xlock_by
: MutationRef();
539 bool is_leased() const {
540 return state_flags
& LEASED
;
542 void get_client_lease() {
543 assert(!is_leased());
544 state_flags
|= LEASED
;
546 void put_client_lease() {
548 state_flags
&= ~LEASED
;
551 bool is_used() const {
552 return is_xlocked() || is_rdlocked() || is_wrlocked() || is_leased();
555 bool needs_recover() const {
556 return state_flags
& NEED_RECOVER
;
558 void mark_need_recover() {
559 state_flags
|= NEED_RECOVER
;
561 void clear_need_recover() {
562 state_flags
&= ~NEED_RECOVER
;
566 void encode(bufferlist
& bl
) const {
567 ENCODE_START(2, 2, bl
);
570 ::encode(more()->gather_set
, bl
);
572 ::encode(empty_gather_set
, bl
);
575 void decode(bufferlist::iterator
& p
) {
581 more()->gather_set
.swap(g
);
584 void encode_state_for_replica(bufferlist
& bl
) const {
585 __s16 s
= get_replica_state();
588 void decode_state(bufferlist::iterator
& p
, bool is_new
=true) {
594 void decode_state_rejoin(bufferlist::iterator
& p
, list
<MDSInternalContextBase
*>& waiters
, bool survivor
) {
597 set_state_rejoin(s
, waiters
, survivor
);
602 bool is_loner_mode() const {
603 return get_sm()->states
[state
].loner
;
605 int gcaps_allowed_ever() const {
606 return parent
->is_auth() ? get_sm()->allowed_ever_auth
: get_sm()->allowed_ever_replica
;
608 int gcaps_allowed(int who
, int s
=-1) const {
609 if (s
< 0) s
= state
;
610 if (parent
->is_auth()) {
611 if (get_xlock_by_client() >= 0 && who
== CAP_XLOCKER
)
612 return get_sm()->states
[s
].xlocker_caps
| get_sm()->states
[s
].caps
; // xlocker always gets more
613 else if (is_loner_mode() && who
== CAP_ANY
)
614 return get_sm()->states
[s
].caps
;
616 return get_sm()->states
[s
].loner_caps
| get_sm()->states
[s
].caps
; // loner always gets more
618 return get_sm()->states
[s
].replica_caps
;
620 int gcaps_careful() const {
621 if (get_num_wrlocks())
622 return get_sm()->careful
;
627 int gcaps_xlocker_mask(client_t client
) const {
628 if (client
== get_xlock_by_client())
629 return type
->type
== CEPH_LOCK_IFILE
? 0xf : (CEPH_CAP_GSHARED
|CEPH_CAP_GEXCL
);
633 // simplelock specifics
634 int get_replica_state() const {
635 return get_sm()->states
[state
].replica_state
;
637 void export_twiddle() {
639 state
= get_replica_state();
643 * called on first replica creation.
645 void replicate_relax() {
646 assert(parent
->is_auth());
647 assert(!parent
->is_replicated());
648 if (state
== LOCK_LOCK
&& !is_used())
651 bool remove_replica(int from
) {
652 if (is_gathering(from
)) {
659 bool do_import(int from
, int to
) {
666 if (!is_stable() && !is_gathering())
671 void _print(ostream
& out
) const {
672 out
<< get_lock_type_name(get_type()) << " ";
673 out
<< get_state_name(get_state());
674 if (!get_gather_set().empty())
675 out
<< " g=" << get_gather_set();
679 out
<< " r=" << get_num_rdlocks();
681 out
<< " w=" << get_num_wrlocks();
683 out
<< " x=" << get_num_xlocks();
685 out
<< " by " << get_xlock_by();
695 * Write bare values (caller must be in an object section)
696 * to formatter, or nothing if is_sync_and_unlocked.
698 void dump(Formatter
*f
) const;
700 virtual void print(ostream
& out
) const {
706 WRITE_CLASS_ENCODER(SimpleLock
)
708 inline ostream
& operator<<(ostream
& out
, const SimpleLock
& l
)