]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/ScatterLock.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / mds / ScatterLock.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#ifndef CEPH_SCATTERLOCK_H
17#define CEPH_SCATTERLOCK_H
18
19#include "SimpleLock.h"
20
11fdf7f2
TL
21#include "MDSContext.h"
22
7c673cae 23class ScatterLock : public SimpleLock {
7c673cae 24public:
b32b8144
FG
25 ScatterLock(MDSCacheObject *o, LockType *lt) :
26 SimpleLock(o, lt) {}
7c673cae 27 ~ScatterLock() override {
11fdf7f2 28 ceph_assert(!_more);
7c673cae
FG
29 }
30
31 bool is_scatterlock() const override {
32 return true;
33 }
34
35 bool is_sync_and_unlocked() const {
36 return
37 SimpleLock::is_sync_and_unlocked() &&
38 !is_dirty() &&
39 !is_flushing();
40 }
41
42 bool can_scatter_pin(client_t loner) {
43 /*
44 LOCK : NOT okay because it can MIX and force replicas to journal something
45 TSYN : also not okay for same reason
46 EXCL : also not okay
47
48 MIX : okay, replica can stall before sending AC_SYNCACK
49 SYNC : okay, replica can stall before sending AC_MIXACK or AC_LOCKACK
50 */
51 return
52 get_state() == LOCK_SYNC ||
53 get_state() == LOCK_MIX;
54 }
55
11fdf7f2 56 void set_xlock_snap_sync(MDSContext *c)
7c673cae 57 {
11fdf7f2
TL
58 ceph_assert(get_type() == CEPH_LOCK_IFILE);
59 ceph_assert(state == LOCK_XLOCK || state == LOCK_XLOCKDONE);
7c673cae
FG
60 state = LOCK_XLOCKSNAP;
61 add_waiter(WAIT_STABLE, c);
62 }
63
64 xlist<ScatterLock*>::item *get_updated_item() { return &more()->item_updated; }
65
66 utime_t get_update_stamp() {
b32b8144 67 return _more ? _more->update_stamp : utime_t();
7c673cae
FG
68 }
69
70 void set_update_stamp(utime_t t) { more()->update_stamp = t; }
71
72 void set_scatter_wanted() {
b32b8144 73 state_flags |= SCATTER_WANTED;
7c673cae
FG
74 }
75 void set_unscatter_wanted() {
b32b8144 76 state_flags |= UNSCATTER_WANTED;
7c673cae
FG
77 }
78 void clear_scatter_wanted() {
b32b8144 79 state_flags &= ~SCATTER_WANTED;
7c673cae
FG
80 }
81 void clear_unscatter_wanted() {
b32b8144 82 state_flags &= ~UNSCATTER_WANTED;
7c673cae
FG
83 }
84 bool get_scatter_wanted() const {
b32b8144 85 return state_flags & SCATTER_WANTED;
7c673cae
FG
86 }
87 bool get_unscatter_wanted() const {
b32b8144 88 return state_flags & UNSCATTER_WANTED;
7c673cae
FG
89 }
90
91 bool is_dirty() const override {
b32b8144 92 return state_flags & DIRTY;
7c673cae
FG
93 }
94 bool is_flushing() const override {
b32b8144 95 return state_flags & FLUSHING;
7c673cae
FG
96 }
97 bool is_flushed() const override {
b32b8144 98 return state_flags & FLUSHED;
7c673cae
FG
99 }
100 bool is_dirty_or_flushing() const {
b32b8144 101 return is_dirty() || is_flushing();
7c673cae
FG
102 }
103
104 void mark_dirty() {
105 if (!is_dirty()) {
106 if (!is_flushing())
107 parent->get(MDSCacheObject::PIN_DIRTYSCATTERED);
108 set_dirty();
109 }
110 }
111 void start_flush() {
112 if (is_dirty()) {
113 set_flushing();
114 clear_dirty();
115 }
116 }
117 void finish_flush() {
118 if (is_flushing()) {
119 clear_flushing();
120 set_flushed();
121 if (!is_dirty()) {
122 parent->put(MDSCacheObject::PIN_DIRTYSCATTERED);
123 parent->clear_dirty_scattered(get_type());
124 }
125 }
126 }
91327a77
AA
127 void clear_flushed() override {
128 state_flags &= ~FLUSHED;
129 }
7c673cae
FG
130 void remove_dirty() {
131 start_flush();
132 finish_flush();
91327a77 133 clear_flushed();
7c673cae
FG
134 }
135
136 void infer_state_from_strong_rejoin(int rstate, bool locktoo) {
137 if (rstate == LOCK_MIX ||
138 rstate == LOCK_MIX_LOCK || // replica still has wrlocks?
139 rstate == LOCK_MIX_SYNC)
140 state = LOCK_MIX;
141 else if (locktoo && rstate == LOCK_LOCK)
142 state = LOCK_LOCK;
143 }
144
f67539c2 145 void encode_state_for_rejoin(ceph::buffer::list& bl, int rep) {
7c673cae
FG
146 __s16 s = get_replica_state();
147 if (is_gathering(rep)) {
148 // the recovering mds may hold rejoined wrlocks
149 if (state == LOCK_MIX_SYNC)
150 s = LOCK_MIX_SYNC;
151 else
152 s = LOCK_MIX_LOCK;
153 }
154
b32b8144
FG
155 // If there is a recovering mds who replcated an object when it failed
156 // and scatterlock in the object was in MIX state, It's possible that
157 // the recovering mds needs to take wrlock on the scatterlock when it
158 // replays unsafe requests. So this mds should delay taking rdlock on
159 // the scatterlock until the recovering mds finishes replaying unsafe.
160 // Otherwise unsafe requests may get replayed after current request.
161 //
162 // For example:
163 // The recovering mds is auth mds of a dirfrag, this mds is auth mds
11fdf7f2 164 // of corresponding inode. when 'rm -rf' the direcotry, this mds should
b32b8144
FG
165 // delay the rmdir request until the recovering mds has replayed unlink
166 // requests.
7c673cae 167 if (s == LOCK_MIX || s == LOCK_MIX_LOCK || s == LOCK_MIX_SYNC)
b32b8144 168 mark_need_recover();
7c673cae 169
11fdf7f2
TL
170 using ceph::encode;
171 encode(s, bl);
7c673cae
FG
172 }
173
f67539c2 174 void decode_state_rejoin(ceph::buffer::list::const_iterator& p, MDSContext::vec& waiters, bool survivor) {
b32b8144 175 SimpleLock::decode_state_rejoin(p, waiters, survivor);
7c673cae
FG
176 if (is_flushing()) {
177 set_dirty();
178 clear_flushing();
179 }
180 }
181
182 bool remove_replica(int from, bool rejoin) {
183 if (rejoin &&
184 (state == LOCK_MIX ||
185 state == LOCK_MIX_SYNC ||
186 state == LOCK_MIX_LOCK2 ||
187 state == LOCK_MIX_TSYN ||
188 state == LOCK_MIX_EXCL))
189 return false;
190 return SimpleLock::remove_replica(from);
191 }
192
f67539c2 193 void print(std::ostream& out) const override {
7c673cae
FG
194 out << "(";
195 _print(out);
196 if (is_dirty())
197 out << " dirty";
198 if (is_flushing())
199 out << " flushing";
200 if (is_flushed())
201 out << " flushed";
202 if (get_scatter_wanted())
203 out << " scatter_wanted";
204 out << ")";
205 }
206
207private:
9f95a23c
TL
208 struct more_bits_t {
209 xlist<ScatterLock*>::item item_updated;
210 utime_t update_stamp;
211
212 explicit more_bits_t(ScatterLock *lock) :
213 item_updated(lock)
214 {}
215 };
216
217 more_bits_t *more() {
218 if (!_more)
219 _more.reset(new more_bits_t(this));
220 return _more.get();
221 }
222
223 enum {
224 SCATTER_WANTED = 1 << 8,
225 UNSCATTER_WANTED = 1 << 9,
226 DIRTY = 1 << 10,
227 FLUSHING = 1 << 11,
228 FLUSHED = 1 << 12,
229 };
230
7c673cae 231 void set_flushing() {
b32b8144 232 state_flags |= FLUSHING;
7c673cae
FG
233 }
234 void clear_flushing() {
b32b8144 235 state_flags &= ~FLUSHING;
7c673cae
FG
236 }
237 void set_flushed() {
b32b8144 238 state_flags |= FLUSHED;
7c673cae
FG
239 }
240 void set_dirty() {
b32b8144 241 state_flags |= DIRTY;
7c673cae
FG
242 }
243 void clear_dirty() {
b32b8144
FG
244 state_flags &= ~DIRTY;
245 if (_more) {
246 _more->item_updated.remove_myself();
247 _more.reset();
7c673cae
FG
248 }
249 }
9f95a23c
TL
250
251 mutable std::unique_ptr<more_bits_t> _more;
7c673cae
FG
252};
253
254#endif