]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/SimpleLock.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / mds / SimpleLock.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_SIMPLELOCK_H
17 #define CEPH_SIMPLELOCK_H
18
19 #include <boost/intrusive_ptr.hpp>
20
21 #include "MDSCacheObject.h"
22 #include "MDSContext.h"
23
24 // -- lock types --
25 // see CEPH_LOCK_*
26
27 extern "C" {
28 #include "locks.h"
29 }
30
31 #define CAP_ANY 0
32 #define CAP_LONER 1
33 #define CAP_XLOCKER 2
34
35 struct MDLockCache;
36 struct MDLockCacheItem;
37 struct MutationImpl;
38 typedef boost::intrusive_ptr<MutationImpl> MutationRef;
39
40 struct LockType {
41 explicit LockType(int t) : type(t) {
42 switch (type) {
43 case CEPH_LOCK_DN:
44 case CEPH_LOCK_IAUTH:
45 case CEPH_LOCK_ILINK:
46 case CEPH_LOCK_IXATTR:
47 case CEPH_LOCK_ISNAP:
48 case CEPH_LOCK_IFLOCK:
49 case CEPH_LOCK_IPOLICY:
50 sm = &sm_simplelock;
51 break;
52 case CEPH_LOCK_IDFT:
53 case CEPH_LOCK_INEST:
54 sm = &sm_scatterlock;
55 break;
56 case CEPH_LOCK_IFILE:
57 sm = &sm_filelock;
58 break;
59 case CEPH_LOCK_DVERSION:
60 case CEPH_LOCK_IVERSION:
61 sm = &sm_locallock;
62 break;
63 default:
64 sm = 0;
65 }
66 }
67
68 int type;
69 const sm_t *sm;
70 };
71
72
73 class SimpleLock {
74 public:
75 // waiting
76 static const uint64_t WAIT_RD = (1<<0); // to read
77 static const uint64_t WAIT_WR = (1<<1); // to write
78 static const uint64_t WAIT_XLOCK = (1<<2); // to xlock (** dup)
79 static const uint64_t WAIT_STABLE = (1<<2); // for a stable state
80 static const uint64_t WAIT_REMOTEXLOCK = (1<<3); // for a remote xlock
81 static const int WAIT_BITS = 4;
82 static const uint64_t WAIT_ALL = ((1<<WAIT_BITS)-1);
83
84 static std::string_view get_state_name(int n) {
85 switch (n) {
86 case LOCK_UNDEF: return "UNDEF";
87 case LOCK_SYNC: return "sync";
88 case LOCK_LOCK: return "lock";
89
90 case LOCK_PREXLOCK: return "prexlock";
91 case LOCK_XLOCK: return "xlock";
92 case LOCK_XLOCKDONE: return "xlockdone";
93 case LOCK_XLOCKSNAP: return "xlocksnap";
94 case LOCK_LOCK_XLOCK: return "lock->xlock";
95
96 case LOCK_SYNC_LOCK: return "sync->lock";
97 case LOCK_LOCK_SYNC: return "lock->sync";
98 case LOCK_REMOTEXLOCK: return "remote_xlock";
99 case LOCK_EXCL: return "excl";
100 case LOCK_EXCL_SYNC: return "excl->sync";
101 case LOCK_EXCL_LOCK: return "excl->lock";
102 case LOCK_SYNC_EXCL: return "sync->excl";
103 case LOCK_LOCK_EXCL: return "lock->excl";
104
105 case LOCK_XSYN: return "xsyn";
106 case LOCK_XSYN_EXCL: return "xsyn->excl";
107 case LOCK_EXCL_XSYN: return "excl->xsyn";
108 case LOCK_XSYN_SYNC: return "xsyn->sync";
109 case LOCK_XSYN_LOCK: return "xsyn->lock";
110 case LOCK_XSYN_MIX: return "xsyn->mix";
111
112 case LOCK_SYNC_MIX: return "sync->mix";
113 case LOCK_SYNC_MIX2: return "sync->mix(2)";
114 case LOCK_LOCK_TSYN: return "lock->tsyn";
115
116 case LOCK_MIX_LOCK: return "mix->lock";
117 case LOCK_MIX_LOCK2: return "mix->lock(2)";
118 case LOCK_MIX: return "mix";
119 case LOCK_MIX_TSYN: return "mix->tsyn";
120
121 case LOCK_TSYN_MIX: return "tsyn->mix";
122 case LOCK_TSYN_LOCK: return "tsyn->lock";
123 case LOCK_TSYN: return "tsyn";
124
125 case LOCK_MIX_SYNC: return "mix->sync";
126 case LOCK_MIX_SYNC2: return "mix->sync(2)";
127 case LOCK_EXCL_MIX: return "excl->mix";
128 case LOCK_MIX_EXCL: return "mix->excl";
129
130 case LOCK_PRE_SCAN: return "*->scan";
131 case LOCK_SCAN: return "scan";
132
133 case LOCK_SNAP_SYNC: return "snap->sync";
134
135 default: ceph_abort(); return std::string_view();
136 }
137 }
138
139 static std::string_view get_lock_type_name(int t) {
140 switch (t) {
141 case CEPH_LOCK_DN: return "dn";
142 case CEPH_LOCK_DVERSION: return "dversion";
143 case CEPH_LOCK_IVERSION: return "iversion";
144 case CEPH_LOCK_IFILE: return "ifile";
145 case CEPH_LOCK_IAUTH: return "iauth";
146 case CEPH_LOCK_ILINK: return "ilink";
147 case CEPH_LOCK_IDFT: return "idft";
148 case CEPH_LOCK_INEST: return "inest";
149 case CEPH_LOCK_IXATTR: return "ixattr";
150 case CEPH_LOCK_ISNAP: return "isnap";
151 case CEPH_LOCK_IFLOCK: return "iflock";
152 case CEPH_LOCK_IPOLICY: return "ipolicy";
153 default: return "unknown";
154 }
155 }
156
157 static std::string_view get_lock_action_name(int a) {
158 switch (a) {
159 case LOCK_AC_SYNC: return "sync";
160 case LOCK_AC_MIX: return "mix";
161 case LOCK_AC_LOCK: return "lock";
162 case LOCK_AC_LOCKFLUSHED: return "lockflushed";
163
164 case LOCK_AC_SYNCACK: return "syncack";
165 case LOCK_AC_MIXACK: return "mixack";
166 case LOCK_AC_LOCKACK: return "lockack";
167
168 case LOCK_AC_REQSCATTER: return "reqscatter";
169 case LOCK_AC_REQUNSCATTER: return "requnscatter";
170 case LOCK_AC_NUDGE: return "nudge";
171 case LOCK_AC_REQRDLOCK: return "reqrdlock";
172 default: return "???";
173 }
174 }
175
176 SimpleLock(MDSCacheObject *o, LockType *lt) :
177 type(lt),
178 parent(o)
179 {}
180 virtual ~SimpleLock() {}
181
182 client_t get_excl_client() const {
183 return have_more() ? more()->excl_client : -1;
184 }
185 void set_excl_client(client_t c) {
186 if (c < 0 && !have_more())
187 return; // default is -1
188 more()->excl_client = c;
189 }
190
191 virtual bool is_scatterlock() const {
192 return false;
193 }
194 virtual bool is_locallock() const {
195 return false;
196 }
197
198 // parent
199 MDSCacheObject *get_parent() { return parent; }
200 int get_type() const { return type->type; }
201 const sm_t* get_sm() const { return type->sm; }
202
203 int get_wait_shift() const;
204 int get_cap_shift() const;
205 int get_cap_mask() const;
206
207 void decode_locked_state(const bufferlist& bl) {
208 parent->decode_lock_state(type->type, bl);
209 }
210 void encode_locked_state(bufferlist& bl) {
211 parent->encode_lock_state(type->type, bl);
212 }
213 void finish_waiters(uint64_t mask, int r=0) {
214 parent->finish_waiting(mask << get_wait_shift(), r);
215 }
216 void take_waiting(uint64_t mask, MDSContext::vec& ls) {
217 parent->take_waiting(mask << get_wait_shift(), ls);
218 }
219 void add_waiter(uint64_t mask, MDSContext *c) {
220 parent->add_waiter((mask << get_wait_shift()) | MDSCacheObject::WAIT_ORDERED, c);
221 }
222 bool is_waiter_for(uint64_t mask) const {
223 return parent->is_waiter_for(mask << get_wait_shift());
224 }
225
226 bool is_cached() const {
227 return state_flags & CACHED;
228 }
229 void add_cache(MDLockCacheItem& item);
230 void remove_cache(MDLockCacheItem& item);
231 MDLockCache* get_first_cache();
232
233 // state
234 int get_state() const { return state; }
235 int set_state(int s) {
236 state = s;
237 //assert(!is_stable() || gather_set.size() == 0); // gather should be empty in stable states.
238 return s;
239 }
240 void set_state_rejoin(int s, MDSContext::vec& waiters, bool survivor) {
241 ceph_assert(!get_parent()->is_auth());
242
243 // If lock in the replica object was not in SYNC state when auth mds of the object failed.
244 // Auth mds of the object may take xlock on the lock and change the object when replaying
245 // unsafe requests.
246 if (!survivor || state != LOCK_SYNC)
247 mark_need_recover();
248
249 state = s;
250
251 if (is_stable())
252 take_waiting(SimpleLock::WAIT_ALL, waiters);
253 }
254
255 bool is_stable() const {
256 return get_sm()->states[state].next == 0;
257 }
258 bool is_unstable_and_locked() const {
259 if (is_stable())
260 return false;
261 return is_rdlocked() || is_wrlocked() || is_xlocked();
262 }
263 int get_next_state() {
264 return get_sm()->states[state].next;
265 }
266
267 bool is_sync_and_unlocked() const {
268 return
269 get_state() == LOCK_SYNC &&
270 !is_rdlocked() &&
271 !is_leased() &&
272 !is_wrlocked() &&
273 !is_xlocked();
274 }
275
276 /*
277 bool fw_rdlock_to_auth() {
278 return get_sm()->states[state].can_rdlock == FW;
279 }
280 */
281 bool req_rdlock_from_auth() {
282 return get_sm()->states[state].can_rdlock == REQ;
283 }
284
285 // gather set
286 static set<int32_t> empty_gather_set;
287
288 // int32_t: <0 is client, >=0 is MDS rank
289 const set<int32_t>& get_gather_set() const {
290 return have_more() ? more()->gather_set : empty_gather_set;
291 }
292
293 void init_gather() {
294 for (const auto p : parent->get_replicas()) {
295 more()->gather_set.insert(p.first);
296 }
297 }
298 bool is_gathering() const {
299 return have_more() && !more()->gather_set.empty();
300 }
301 bool is_gathering(int32_t i) const {
302 return have_more() && more()->gather_set.count(i);
303 }
304 void clear_gather() {
305 if (have_more())
306 more()->gather_set.clear();
307 }
308 void remove_gather(int32_t i) {
309 if (have_more())
310 more()->gather_set.erase(i);
311 }
312
313 virtual bool is_dirty() const { return false; }
314 virtual bool is_stale() const { return false; }
315 virtual bool is_flushing() const { return false; }
316 virtual bool is_flushed() const { return false; }
317 virtual void clear_flushed() { }
318
319 // can_*
320 bool can_lease(client_t client) const {
321 return get_sm()->states[state].can_lease == ANY ||
322 (get_sm()->states[state].can_lease == AUTH && parent->is_auth()) ||
323 (get_sm()->states[state].can_lease == XCL && client >= 0 && get_xlock_by_client() == client);
324 }
325 bool can_read(client_t client) const {
326 return get_sm()->states[state].can_read == ANY ||
327 (get_sm()->states[state].can_read == AUTH && parent->is_auth()) ||
328 (get_sm()->states[state].can_read == XCL && client >= 0 && get_xlock_by_client() == client);
329 }
330 bool can_read_projected(client_t client) const {
331 return get_sm()->states[state].can_read_projected == ANY ||
332 (get_sm()->states[state].can_read_projected == AUTH && parent->is_auth()) ||
333 (get_sm()->states[state].can_read_projected == XCL && client >= 0 && get_xlock_by_client() == client);
334 }
335 bool can_rdlock(client_t client) const {
336 return get_sm()->states[state].can_rdlock == ANY ||
337 (get_sm()->states[state].can_rdlock == AUTH && parent->is_auth()) ||
338 (get_sm()->states[state].can_rdlock == XCL && client >= 0 && get_xlock_by_client() == client);
339 }
340 bool can_wrlock(client_t client) const {
341 return get_sm()->states[state].can_wrlock == ANY ||
342 (get_sm()->states[state].can_wrlock == AUTH && parent->is_auth()) ||
343 (get_sm()->states[state].can_wrlock == XCL && client >= 0 && (get_xlock_by_client() == client ||
344 get_excl_client() == client));
345 }
346 bool can_force_wrlock(client_t client) const {
347 return get_sm()->states[state].can_force_wrlock == ANY ||
348 (get_sm()->states[state].can_force_wrlock == AUTH && parent->is_auth()) ||
349 (get_sm()->states[state].can_force_wrlock == XCL && client >= 0 && (get_xlock_by_client() == client ||
350 get_excl_client() == client));
351 }
352 bool can_xlock(client_t client) const {
353 return get_sm()->states[state].can_xlock == ANY ||
354 (get_sm()->states[state].can_xlock == AUTH && parent->is_auth()) ||
355 (get_sm()->states[state].can_xlock == XCL && client >= 0 && get_xlock_by_client() == client);
356 }
357
358 // rdlock
359 bool is_rdlocked() const { return num_rdlock > 0; }
360 int get_rdlock() {
361 if (!num_rdlock)
362 parent->get(MDSCacheObject::PIN_LOCK);
363 return ++num_rdlock;
364 }
365 int put_rdlock() {
366 ceph_assert(num_rdlock>0);
367 --num_rdlock;
368 if (num_rdlock == 0)
369 parent->put(MDSCacheObject::PIN_LOCK);
370 return num_rdlock;
371 }
372 int get_num_rdlocks() const {
373 return num_rdlock;
374 }
375
376 // wrlock
377 void get_wrlock(bool force=false) {
378 //assert(can_wrlock() || force);
379 if (more()->num_wrlock == 0)
380 parent->get(MDSCacheObject::PIN_LOCK);
381 ++more()->num_wrlock;
382 }
383 void put_wrlock() {
384 --more()->num_wrlock;
385 if (more()->num_wrlock == 0) {
386 parent->put(MDSCacheObject::PIN_LOCK);
387 try_clear_more();
388 }
389 }
390 bool is_wrlocked() const {
391 return have_more() && more()->num_wrlock > 0;
392 }
393 int get_num_wrlocks() const {
394 return have_more() ? more()->num_wrlock : 0;
395 }
396
397 // xlock
398 void get_xlock(MutationRef who, client_t client) {
399 ceph_assert(get_xlock_by() == MutationRef());
400 ceph_assert(state == LOCK_XLOCK || is_locallock() ||
401 state == LOCK_LOCK /* if we are a slave */);
402 parent->get(MDSCacheObject::PIN_LOCK);
403 more()->num_xlock++;
404 more()->xlock_by = who;
405 more()->xlock_by_client = client;
406 }
407 void set_xlock_done() {
408 ceph_assert(more()->xlock_by);
409 ceph_assert(state == LOCK_XLOCK || is_locallock() ||
410 state == LOCK_LOCK /* if we are a slave */);
411 if (!is_locallock())
412 state = LOCK_XLOCKDONE;
413 more()->xlock_by.reset();
414 }
415 void put_xlock() {
416 ceph_assert(state == LOCK_XLOCK || state == LOCK_XLOCKDONE ||
417 state == LOCK_XLOCKSNAP || state == LOCK_LOCK_XLOCK ||
418 state == LOCK_LOCK || /* if we are a master of a slave */
419 is_locallock());
420 --more()->num_xlock;
421 parent->put(MDSCacheObject::PIN_LOCK);
422 if (more()->num_xlock == 0) {
423 more()->xlock_by.reset();
424 more()->xlock_by_client = -1;
425 try_clear_more();
426 }
427 }
428 bool is_xlocked() const {
429 return have_more() && more()->num_xlock > 0;
430 }
431 int get_num_xlocks() const {
432 return have_more() ? more()->num_xlock : 0;
433 }
434 client_t get_xlock_by_client() const {
435 return have_more() ? more()->xlock_by_client : -1;
436 }
437 bool is_xlocked_by_client(client_t c) const {
438 return have_more() ? more()->xlock_by_client == c : false;
439 }
440 MutationRef get_xlock_by() const {
441 return have_more() ? more()->xlock_by : MutationRef();
442 }
443
444 // lease
445 bool is_leased() const {
446 return state_flags & LEASED;
447 }
448 void get_client_lease() {
449 ceph_assert(!is_leased());
450 state_flags |= LEASED;
451 }
452 void put_client_lease() {
453 ceph_assert(is_leased());
454 state_flags &= ~LEASED;
455 }
456
457 bool needs_recover() const {
458 return state_flags & NEED_RECOVER;
459 }
460 void mark_need_recover() {
461 state_flags |= NEED_RECOVER;
462 }
463 void clear_need_recover() {
464 state_flags &= ~NEED_RECOVER;
465 }
466
467 // encode/decode
468 void encode(bufferlist& bl) const {
469 ENCODE_START(2, 2, bl);
470 encode(state, bl);
471 if (have_more())
472 encode(more()->gather_set, bl);
473 else
474 encode(empty_gather_set, bl);
475 ENCODE_FINISH(bl);
476 }
477 void decode(bufferlist::const_iterator& p) {
478 DECODE_START(2, p);
479 decode(state, p);
480 set<__s32> g;
481 decode(g, p);
482 if (!g.empty())
483 more()->gather_set.swap(g);
484 DECODE_FINISH(p);
485 }
486 void encode_state_for_replica(bufferlist& bl) const {
487 __s16 s = get_replica_state();
488 using ceph::encode;
489 encode(s, bl);
490 }
491 void decode_state(bufferlist::const_iterator& p, bool is_new=true) {
492 using ceph::decode;
493 __s16 s;
494 decode(s, p);
495 if (is_new)
496 state = s;
497 }
498 void decode_state_rejoin(bufferlist::const_iterator& p, MDSContext::vec& waiters, bool survivor) {
499 __s16 s;
500 using ceph::decode;
501 decode(s, p);
502 set_state_rejoin(s, waiters, survivor);
503 }
504
505 // caps
506 bool is_loner_mode() const {
507 return get_sm()->states[state].loner;
508 }
509 int gcaps_allowed_ever() const {
510 return parent->is_auth() ? get_sm()->allowed_ever_auth : get_sm()->allowed_ever_replica;
511 }
512 int gcaps_allowed(int who, int s=-1) const {
513 if (s < 0) s = state;
514 if (parent->is_auth()) {
515 if (get_xlock_by_client() >= 0 && who == CAP_XLOCKER)
516 return get_sm()->states[s].xlocker_caps | get_sm()->states[s].caps; // xlocker always gets more
517 else if (is_loner_mode() && who == CAP_ANY)
518 return get_sm()->states[s].caps;
519 else
520 return get_sm()->states[s].loner_caps | get_sm()->states[s].caps; // loner always gets more
521 } else
522 return get_sm()->states[s].replica_caps;
523 }
524 int gcaps_careful() const {
525 if (get_num_wrlocks())
526 return get_sm()->careful;
527 return 0;
528 }
529
530 int gcaps_xlocker_mask(client_t client) const {
531 if (client == get_xlock_by_client())
532 return type->type == CEPH_LOCK_IFILE ? 0xf : (CEPH_CAP_GSHARED|CEPH_CAP_GEXCL);
533 return 0;
534 }
535
536 // simplelock specifics
537 int get_replica_state() const {
538 return get_sm()->states[state].replica_state;
539 }
540 void export_twiddle() {
541 clear_gather();
542 state = get_replica_state();
543 }
544
545 bool remove_replica(int from) {
546 if (is_gathering(from)) {
547 remove_gather(from);
548 if (!is_gathering())
549 return true;
550 }
551 return false;
552 }
553 bool do_import(int from, int to) {
554 if (!is_stable()) {
555 remove_gather(from);
556 remove_gather(to);
557 if (!is_gathering())
558 return true;
559 }
560 if (!is_stable() && !is_gathering())
561 return true;
562 return false;
563 }
564
565 void _print(ostream& out) const {
566 out << get_lock_type_name(get_type()) << " ";
567 out << get_state_name(get_state());
568 if (!get_gather_set().empty())
569 out << " g=" << get_gather_set();
570 if (is_leased())
571 out << " l";
572 if (is_rdlocked())
573 out << " r=" << get_num_rdlocks();
574 if (is_wrlocked())
575 out << " w=" << get_num_wrlocks();
576 if (is_xlocked()) {
577 out << " x=" << get_num_xlocks();
578 if (get_xlock_by())
579 out << " by " << get_xlock_by();
580 }
581 /*if (is_stable())
582 out << " stable";
583 else
584 out << " unstable";
585 */
586 }
587
588 /**
589 * Write bare values (caller must be in an object section)
590 * to formatter, or nothing if is_sync_and_unlocked.
591 */
592 void dump(Formatter *f) const;
593
594 virtual void print(ostream& out) const {
595 out << "(";
596 _print(out);
597 out << ")";
598 }
599
600 LockType *type;
601
602 protected:
603 // parent (what i lock)
604 MDSCacheObject *parent;
605
606 // lock state
607 __s16 state = LOCK_SYNC;
608 __s16 state_flags = 0;
609
610 enum {
611 LEASED = 1 << 0,
612 NEED_RECOVER = 1 << 1,
613 CACHED = 1 << 2,
614 };
615
616 private:
617 // XXX not in mempool
618 struct unstable_bits_t {
619 unstable_bits_t();
620
621 bool empty() {
622 return
623 gather_set.empty() &&
624 num_wrlock == 0 &&
625 num_xlock == 0 &&
626 xlock_by.get() == NULL &&
627 xlock_by_client == -1 &&
628 excl_client == -1 &&
629 lock_caches.empty();
630 }
631
632 set<__s32> gather_set; // auth+rep. >= 0 is mds, < 0 is client
633
634 // local state
635 int num_wrlock = 0, num_xlock = 0;
636 MutationRef xlock_by;
637 client_t xlock_by_client = -1;
638 client_t excl_client = -1;
639
640 elist<MDLockCacheItem*> lock_caches;
641 };
642
643 bool have_more() const { return _unstable ? true : false; }
644 unstable_bits_t *more() const {
645 if (!_unstable)
646 _unstable.reset(new unstable_bits_t);
647 return _unstable.get();
648 }
649 void try_clear_more() {
650 if (_unstable && _unstable->empty()) {
651 _unstable.reset();
652 }
653 }
654
655 int num_rdlock = 0;
656
657 mutable std::unique_ptr<unstable_bits_t> _unstable;
658 };
659 WRITE_CLASS_ENCODER(SimpleLock)
660
661 inline ostream& operator<<(ostream& out, const SimpleLock& l)
662 {
663 l.print(out);
664 return out;
665 }
666 #endif