]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_CAPABILITY_H | |
17 | #define CEPH_CAPABILITY_H | |
18 | ||
7c673cae | 19 | #include "include/buffer_fwd.h" |
94b18763 FG |
20 | #include "include/counter.h" |
21 | #include "include/mempool.h" | |
7c673cae | 22 | #include "include/xlist.h" |
9f95a23c | 23 | #include "include/elist.h" |
7c673cae FG |
24 | |
25 | #include "common/config.h" | |
26 | ||
27 | #include "mdstypes.h" | |
28 | ||
94b18763 | 29 | |
7c673cae FG |
30 | /* |
31 | ||
32 | Capability protocol notes. | |
33 | ||
34 | - two types of cap events from mds -> client: | |
35 | - cap "issue" in a MClientReply, or an MClientCaps IMPORT op. | |
36 | - cap "update" (revocation or grant) .. an MClientCaps message. | |
37 | - if client has cap, the mds should have it too. | |
38 | ||
39 | - if client has no dirty data, it can release it without waiting for an mds ack. | |
40 | - client may thus get a cap _update_ and not have the cap. ignore it. | |
41 | ||
42 | - mds should track seq of last issue. any release | |
43 | attempt will only succeed if the client has seen the latest. | |
44 | ||
45 | - a UPDATE updates the clients issued caps, wanted, etc. it may also flush dirty metadata. | |
46 | - 'caps' are which caps the client retains. | |
47 | - if 0, client wishes to release the cap | |
48 | - 'wanted' is which caps the client wants. | |
49 | - 'dirty' is which metadata is to be written. | |
50 | - client gets a FLUSH_ACK with matching dirty flags indicating which caps were written. | |
51 | ||
52 | - a FLUSH_ACK acks a FLUSH. | |
53 | - 'dirty' is the _original_ FLUSH's dirty (i.e., which metadata was written back) | |
54 | - 'seq' is the _original_ FLUSH's seq. | |
55 | - 'caps' is the _original_ FLUSH's caps (not actually important) | |
56 | - client can conclude that (dirty & ~caps) bits were successfully cleaned. | |
57 | ||
58 | - a FLUSHSNAP flushes snapshot metadata. | |
59 | - 'dirty' indicates which caps, were dirty, if any. | |
60 | - mds writes metadata. if dirty!=0, replies with FLUSHSNAP_ACK. | |
61 | ||
62 | */ | |
63 | ||
64 | class CInode; | |
a8e16298 | 65 | class Session; |
9f95a23c | 66 | class MDLockCache; |
7c673cae FG |
67 | |
68 | namespace ceph { | |
69 | class Formatter; | |
70 | } | |
71 | ||
72 | class Capability : public Counter<Capability> { | |
73 | public: | |
94b18763 FG |
74 | MEMPOOL_CLASS_HELPERS(); |
75 | ||
7c673cae | 76 | struct Export { |
9f95a23c TL |
77 | Export() {} |
78 | Export(int64_t id, int w, int i, int p, snapid_t cf, | |
79 | ceph_seq_t s, ceph_seq_t m, utime_t lis, unsigned st) : | |
80 | cap_id(id), wanted(w), issued(i), pending(p), client_follows(cf), | |
81 | seq(s), mseq(m), last_issue_stamp(lis), state(st) {} | |
f67539c2 TL |
82 | void encode(ceph::buffer::list &bl) const; |
83 | void decode(ceph::buffer::list::const_iterator &p); | |
84 | void dump(ceph::Formatter *f) const; | |
9f95a23c TL |
85 | static void generate_test_instances(std::list<Export*>& ls); |
86 | ||
11fdf7f2 TL |
87 | int64_t cap_id = 0; |
88 | int32_t wanted = 0; | |
89 | int32_t issued = 0; | |
90 | int32_t pending = 0; | |
7c673cae | 91 | snapid_t client_follows; |
11fdf7f2 TL |
92 | ceph_seq_t seq = 0; |
93 | ceph_seq_t mseq = 0; | |
7c673cae | 94 | utime_t last_issue_stamp; |
11fdf7f2 | 95 | uint32_t state = 0; |
7c673cae FG |
96 | }; |
97 | struct Import { | |
9f95a23c | 98 | Import() {} |
7c673cae | 99 | Import(int64_t i, ceph_seq_t s, ceph_seq_t m) : cap_id(i), issue_seq(s), mseq(m) {} |
f67539c2 TL |
100 | void encode(ceph::buffer::list &bl) const; |
101 | void decode(ceph::buffer::list::const_iterator &p); | |
102 | void dump(ceph::Formatter *f) const; | |
9f95a23c TL |
103 | |
104 | int64_t cap_id = 0; | |
105 | ceph_seq_t issue_seq = 0; | |
106 | ceph_seq_t mseq = 0; | |
7c673cae FG |
107 | }; |
108 | struct revoke_info { | |
9f95a23c | 109 | revoke_info() {} |
7c673cae | 110 | revoke_info(__u32 b, ceph_seq_t s, ceph_seq_t li) : before(b), seq(s), last_issue(li) {} |
f67539c2 TL |
111 | void encode(ceph::buffer::list& bl) const; |
112 | void decode(ceph::buffer::list::const_iterator& bl); | |
113 | void dump(ceph::Formatter *f) const; | |
9f95a23c TL |
114 | static void generate_test_instances(std::list<revoke_info*>& ls); |
115 | ||
116 | __u32 before = 0; | |
117 | ceph_seq_t seq = 0; | |
118 | ceph_seq_t last_issue = 0; | |
7c673cae FG |
119 | }; |
120 | ||
a8e16298 | 121 | const static unsigned STATE_NOTABLE = (1<<0); |
7c673cae FG |
122 | const static unsigned STATE_NEW = (1<<1); |
123 | const static unsigned STATE_IMPORTING = (1<<2); | |
11fdf7f2 | 124 | const static unsigned STATE_NEEDSNAPFLUSH = (1<<3); |
a8e16298 | 125 | const static unsigned STATE_CLIENTWRITEABLE = (1<<4); |
11fdf7f2 TL |
126 | const static unsigned STATE_NOINLINE = (1<<5); |
127 | const static unsigned STATE_NOPOOLNS = (1<<6); | |
128 | const static unsigned STATE_NOQUOTA = (1<<7); | |
129 | ||
130 | const static unsigned MASK_STATE_EXPORTED = | |
131 | (STATE_CLIENTWRITEABLE | STATE_NOINLINE | STATE_NOPOOLNS | STATE_NOQUOTA); | |
7c673cae | 132 | |
a8e16298 | 133 | Capability(CInode *i=nullptr, Session *s=nullptr, uint64_t id=0); |
11fdf7f2 | 134 | Capability(const Capability& other) = delete; |
7c673cae | 135 | |
11fdf7f2 | 136 | const Capability& operator=(const Capability& other) = delete; |
7c673cae | 137 | |
a8e16298 | 138 | int pending() const { |
494da23a | 139 | return _pending; |
a8e16298 TL |
140 | } |
141 | int issued() const { | |
494da23a | 142 | return _issued; |
a8e16298 | 143 | } |
494da23a TL |
144 | int revoking() const { |
145 | return _issued & ~_pending; | |
146 | } | |
147 | ceph_seq_t issue(unsigned c, bool reval=false) { | |
148 | if (reval) | |
149 | revalidate(); | |
a8e16298 | 150 | |
7c673cae FG |
151 | if (_pending & ~c) { |
152 | // revoking (and maybe adding) bits. note caps prior to this revocation | |
94b18763 | 153 | _revokes.emplace_back(_pending, last_sent, last_issue); |
7c673cae FG |
154 | _pending = c; |
155 | _issued |= c; | |
a8e16298 TL |
156 | if (!is_notable()) |
157 | mark_notable(); | |
7c673cae FG |
158 | } else if (~_pending & c) { |
159 | // adding bits only. remove obsolete revocations? | |
160 | _pending |= c; | |
161 | _issued |= c; | |
162 | // drop old _revokes with no bits we don't have | |
163 | while (!_revokes.empty() && | |
164 | (_revokes.back().before & ~_pending) == 0) | |
165 | _revokes.pop_back(); | |
166 | } else { | |
167 | // no change. | |
11fdf7f2 | 168 | ceph_assert(_pending == c); |
7c673cae FG |
169 | } |
170 | //last_issue = | |
a8e16298 | 171 | inc_last_seq(); |
7c673cae FG |
172 | return last_sent; |
173 | } | |
494da23a TL |
174 | ceph_seq_t issue_norevoke(unsigned c, bool reval=false) { |
175 | if (reval) | |
176 | revalidate(); | |
a8e16298 | 177 | |
7c673cae FG |
178 | _pending |= c; |
179 | _issued |= c; | |
494da23a TL |
180 | clear_new(); |
181 | ||
a8e16298 | 182 | inc_last_seq(); |
7c673cae FG |
183 | return last_sent; |
184 | } | |
9f95a23c TL |
185 | int confirm_receipt(ceph_seq_t seq, unsigned caps) { |
186 | int was_revoking = (_issued & ~_pending); | |
7c673cae FG |
187 | if (seq == last_sent) { |
188 | _revokes.clear(); | |
189 | _issued = caps; | |
190 | // don't add bits | |
191 | _pending &= caps; | |
192 | } else { | |
193 | // can i forget any revocations? | |
194 | while (!_revokes.empty() && _revokes.front().seq < seq) | |
195 | _revokes.pop_front(); | |
196 | if (!_revokes.empty()) { | |
197 | if (_revokes.front().seq == seq) | |
198 | _revokes.begin()->before = caps; | |
a8e16298 | 199 | calc_issued(); |
7c673cae FG |
200 | } else { |
201 | // seq < last_sent | |
202 | _issued = caps | _pending; | |
203 | } | |
204 | } | |
205 | ||
a8e16298 | 206 | if (was_revoking && _issued == _pending) { |
7c673cae FG |
207 | item_revoking_caps.remove_myself(); |
208 | item_client_revoking_caps.remove_myself(); | |
a8e16298 | 209 | maybe_clear_notable(); |
7c673cae | 210 | } |
9f95a23c | 211 | return was_revoking & ~_issued; // return revoked |
7c673cae FG |
212 | } |
213 | // we may get a release racing with revocations, which means our revokes will be ignored | |
214 | // by the client. clean them out of our _revokes history so we don't wait on them. | |
215 | void clean_revoke_from(ceph_seq_t li) { | |
216 | bool changed = false; | |
217 | while (!_revokes.empty() && _revokes.front().last_issue <= li) { | |
218 | _revokes.pop_front(); | |
219 | changed = true; | |
220 | } | |
221 | if (changed) { | |
a8e16298 TL |
222 | bool was_revoking = (_issued & ~_pending); |
223 | calc_issued(); | |
224 | if (was_revoking && _issued == _pending) { | |
7c673cae FG |
225 | item_revoking_caps.remove_myself(); |
226 | item_client_revoking_caps.remove_myself(); | |
a8e16298 | 227 | maybe_clear_notable(); |
7c673cae FG |
228 | } |
229 | } | |
230 | } | |
11fdf7f2 | 231 | ceph_seq_t get_mseq() const { return mseq; } |
7c673cae FG |
232 | void inc_mseq() { mseq++; } |
233 | ||
a8e16298 TL |
234 | utime_t get_last_issue_stamp() const { return last_issue_stamp; } |
235 | utime_t get_last_revoke_stamp() const { return last_revoke_stamp; } | |
7c673cae FG |
236 | |
237 | void set_last_issue() { last_issue = last_sent; } | |
238 | void set_last_issue_stamp(utime_t t) { last_issue_stamp = t; } | |
239 | void set_last_revoke_stamp(utime_t t) { last_revoke_stamp = t; } | |
240 | void reset_num_revoke_warnings() { num_revoke_warnings = 0; } | |
241 | void inc_num_revoke_warnings() { ++num_revoke_warnings; } | |
11fdf7f2 | 242 | unsigned get_num_revoke_warnings() const { return num_revoke_warnings; } |
7c673cae FG |
243 | |
244 | void set_cap_id(uint64_t i) { cap_id = i; } | |
11fdf7f2 | 245 | uint64_t get_cap_id() const { return cap_id; } |
7c673cae FG |
246 | |
247 | //ceph_seq_t get_last_issue() { return last_issue; } | |
248 | ||
11fdf7f2 | 249 | bool is_suppress() const { return suppress > 0; } |
7c673cae FG |
250 | void inc_suppress() { suppress++; } |
251 | void dec_suppress() { suppress--; } | |
252 | ||
a8e16298 TL |
253 | static bool is_wanted_notable(int wanted) { |
254 | return wanted & (CEPH_CAP_ANY_WR|CEPH_CAP_FILE_WR|CEPH_CAP_FILE_RD); | |
255 | } | |
f91f0fd5 TL |
256 | bool is_wanted_notable() const { |
257 | return is_wanted_notable(wanted()); | |
258 | } | |
a8e16298 TL |
259 | bool is_notable() const { return state & STATE_NOTABLE; } |
260 | ||
261 | bool is_stale() const; | |
494da23a | 262 | bool is_valid() const; |
a8e16298 | 263 | bool is_new() const { return state & STATE_NEW; } |
7c673cae FG |
264 | void mark_new() { state |= STATE_NEW; } |
265 | void clear_new() { state &= ~STATE_NEW; } | |
11fdf7f2 | 266 | bool is_importing() const { return state & STATE_IMPORTING; } |
7c673cae FG |
267 | void mark_importing() { state |= STATE_IMPORTING; } |
268 | void clear_importing() { state &= ~STATE_IMPORTING; } | |
11fdf7f2 TL |
269 | bool need_snapflush() const { return state & STATE_NEEDSNAPFLUSH; } |
270 | void mark_needsnapflush() { state |= STATE_NEEDSNAPFLUSH; } | |
271 | void clear_needsnapflush() { state &= ~STATE_NEEDSNAPFLUSH; } | |
7c673cae | 272 | |
a8e16298 TL |
273 | bool is_clientwriteable() const { return state & STATE_CLIENTWRITEABLE; } |
274 | void mark_clientwriteable() { | |
275 | if (!is_clientwriteable()) { | |
276 | state |= STATE_CLIENTWRITEABLE; | |
277 | if (!is_notable()) | |
278 | mark_notable(); | |
279 | } | |
280 | } | |
281 | void clear_clientwriteable() { | |
282 | if (is_clientwriteable()) { | |
283 | state &= ~STATE_CLIENTWRITEABLE; | |
284 | maybe_clear_notable(); | |
285 | } | |
286 | } | |
287 | ||
11fdf7f2 TL |
288 | bool is_noinline() const { return state & STATE_NOINLINE; } |
289 | bool is_nopoolns() const { return state & STATE_NOPOOLNS; } | |
290 | bool is_noquota() const { return state & STATE_NOQUOTA; } | |
291 | ||
a8e16298 TL |
292 | CInode *get_inode() const { return inode; } |
293 | Session *get_session() const { return session; } | |
294 | client_t get_client() const; | |
7c673cae FG |
295 | |
296 | // caps this client wants to hold | |
a8e16298 TL |
297 | int wanted() const { return _wanted; } |
298 | void set_wanted(int w); | |
7c673cae FG |
299 | |
300 | void inc_last_seq() { last_sent++; } | |
a8e16298 | 301 | ceph_seq_t get_last_seq() const { |
a8e16298 TL |
302 | return last_sent; |
303 | } | |
304 | ceph_seq_t get_last_issue() const { return last_issue; } | |
7c673cae FG |
305 | |
306 | void reset_seq() { | |
307 | last_sent = 0; | |
308 | last_issue = 0; | |
309 | } | |
310 | ||
311 | // -- exports -- | |
a8e16298 | 312 | Export make_export() const { |
11fdf7f2 | 313 | return Export(cap_id, wanted(), issued(), pending(), client_follows, get_last_seq(), mseq+1, last_issue_stamp, state); |
7c673cae | 314 | } |
28e407b8 | 315 | void merge(const Export& other, bool auth_cap) { |
494da23a TL |
316 | // issued + pending |
317 | int newpending = other.pending | pending(); | |
318 | if (other.issued & ~newpending) | |
319 | issue(other.issued | newpending); | |
320 | else | |
321 | issue(newpending); | |
322 | last_issue_stamp = other.last_issue_stamp; | |
7c673cae FG |
323 | |
324 | client_follows = other.client_follows; | |
325 | ||
11fdf7f2 TL |
326 | state |= other.state & MASK_STATE_EXPORTED; |
327 | if ((other.state & STATE_CLIENTWRITEABLE) && !is_notable()) | |
328 | mark_notable(); | |
329 | ||
7c673cae | 330 | // wanted |
a8e16298 | 331 | set_wanted(wanted() | other.wanted); |
7c673cae FG |
332 | if (auth_cap) |
333 | mseq = other.mseq; | |
334 | } | |
335 | void merge(int otherwanted, int otherissued) { | |
494da23a TL |
336 | // issued + pending |
337 | int newpending = pending(); | |
338 | if (otherissued & ~newpending) | |
339 | issue(otherissued | newpending); | |
340 | else | |
341 | issue(newpending); | |
7c673cae FG |
342 | |
343 | // wanted | |
a8e16298 | 344 | set_wanted(wanted() | otherwanted); |
7c673cae FG |
345 | } |
346 | ||
9f95a23c | 347 | int revoke() { |
494da23a | 348 | if (revoking()) |
9f95a23c TL |
349 | return confirm_receipt(last_sent, pending()); |
350 | return 0; | |
7c673cae FG |
351 | } |
352 | ||
353 | // serializers | |
f67539c2 TL |
354 | void encode(ceph::buffer::list &bl) const; |
355 | void decode(ceph::buffer::list::const_iterator &bl); | |
356 | void dump(ceph::Formatter *f) const; | |
9f95a23c | 357 | static void generate_test_instances(std::list<Capability*>& ls); |
f67539c2 | 358 | |
9f95a23c TL |
359 | snapid_t client_follows = 0; |
360 | version_t client_xattr_version = 0; | |
361 | version_t client_inline_version = 0; | |
362 | int64_t last_rbytes = 0; | |
363 | int64_t last_rsize = 0; | |
7c673cae FG |
364 | |
365 | xlist<Capability*>::item item_session_caps; | |
366 | xlist<Capability*>::item item_snaprealm_caps; | |
367 | xlist<Capability*>::item item_revoking_caps; | |
368 | xlist<Capability*>::item item_client_revoking_caps; | |
369 | ||
9f95a23c TL |
370 | elist<MDLockCache*> lock_caches; |
371 | int get_lock_cache_allowed() const { return lock_cache_allowed; } | |
372 | void set_lock_cache_allowed(int c) { lock_cache_allowed |= c; } | |
373 | void clear_lock_cache_allowed(int c) { lock_cache_allowed &= ~c; } | |
374 | ||
7c673cae | 375 | private: |
9f95a23c TL |
376 | void calc_issued() { |
377 | _issued = _pending; | |
378 | for (const auto &r : _revokes) { | |
379 | _issued |= r.before; | |
380 | } | |
381 | } | |
382 | ||
383 | void revalidate(); | |
384 | ||
385 | void mark_notable(); | |
386 | void maybe_clear_notable(); | |
387 | ||
7c673cae | 388 | CInode *inode; |
a8e16298 | 389 | Session *session; |
7c673cae FG |
390 | |
391 | uint64_t cap_id; | |
a8e16298 | 392 | uint32_t cap_gen; |
7c673cae | 393 | |
9f95a23c | 394 | __u32 _wanted = 0; // what the client wants (ideally) |
7c673cae FG |
395 | |
396 | utime_t last_issue_stamp; | |
397 | utime_t last_revoke_stamp; | |
9f95a23c | 398 | unsigned num_revoke_warnings = 0; |
7c673cae FG |
399 | |
400 | // track in-flight caps -------------- | |
401 | // - add new caps to _pending | |
402 | // - track revocations in _revokes list | |
9f95a23c | 403 | __u32 _pending = 0, _issued = 0; |
94b18763 | 404 | mempool::mds_co::list<revoke_info> _revokes; |
7c673cae | 405 | |
9f95a23c TL |
406 | ceph_seq_t last_sent = 0; |
407 | ceph_seq_t last_issue = 0; | |
408 | ceph_seq_t mseq = 0; | |
7c673cae | 409 | |
9f95a23c TL |
410 | int suppress = 0; |
411 | unsigned state = 0; | |
a8e16298 | 412 | |
9f95a23c | 413 | int lock_cache_allowed = 0; |
7c673cae FG |
414 | }; |
415 | ||
416 | WRITE_CLASS_ENCODER(Capability::Export) | |
417 | WRITE_CLASS_ENCODER(Capability::Import) | |
418 | WRITE_CLASS_ENCODER(Capability::revoke_info) | |
419 | WRITE_CLASS_ENCODER(Capability) | |
420 | ||
421 | ||
422 | ||
423 | #endif |