]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_CAPABILITY_H | |
17 | #define CEPH_CAPABILITY_H | |
18 | ||
19 | #include "include/counter.h" | |
20 | #include "include/buffer_fwd.h" | |
21 | #include "include/xlist.h" | |
22 | ||
23 | #include "common/config.h" | |
24 | ||
25 | #include "mdstypes.h" | |
26 | ||
27 | /* | |
28 | ||
29 | Capability protocol notes. | |
30 | ||
31 | - two types of cap events from mds -> client: | |
32 | - cap "issue" in a MClientReply, or an MClientCaps IMPORT op. | |
33 | - cap "update" (revocation or grant) .. an MClientCaps message. | |
34 | - if client has cap, the mds should have it too. | |
35 | ||
36 | - if client has no dirty data, it can release it without waiting for an mds ack. | |
37 | - client may thus get a cap _update_ and not have the cap. ignore it. | |
38 | ||
39 | - mds should track seq of last issue. any release | |
40 | attempt will only succeed if the client has seen the latest. | |
41 | ||
42 | - a UPDATE updates the clients issued caps, wanted, etc. it may also flush dirty metadata. | |
43 | - 'caps' are which caps the client retains. | |
44 | - if 0, client wishes to release the cap | |
45 | - 'wanted' is which caps the client wants. | |
46 | - 'dirty' is which metadata is to be written. | |
47 | - client gets a FLUSH_ACK with matching dirty flags indicating which caps were written. | |
48 | ||
49 | - a FLUSH_ACK acks a FLUSH. | |
50 | - 'dirty' is the _original_ FLUSH's dirty (i.e., which metadata was written back) | |
51 | - 'seq' is the _original_ FLUSH's seq. | |
52 | - 'caps' is the _original_ FLUSH's caps (not actually important) | |
53 | - client can conclude that (dirty & ~caps) bits were successfully cleaned. | |
54 | ||
55 | - a FLUSHSNAP flushes snapshot metadata. | |
56 | - 'dirty' indicates which caps, were dirty, if any. | |
57 | - mds writes metadata. if dirty!=0, replies with FLUSHSNAP_ACK. | |
58 | ||
59 | */ | |
60 | ||
61 | class CInode; | |
62 | ||
63 | namespace ceph { | |
64 | class Formatter; | |
65 | } | |
66 | ||
67 | class Capability : public Counter<Capability> { | |
68 | public: | |
69 | struct Export { | |
70 | int64_t cap_id; | |
71 | int32_t wanted; | |
72 | int32_t issued; | |
73 | int32_t pending; | |
74 | snapid_t client_follows; | |
75 | ceph_seq_t seq; | |
76 | ceph_seq_t mseq; | |
77 | utime_t last_issue_stamp; | |
78 | Export() : cap_id(0), wanted(0), issued(0), pending(0), seq(0), mseq(0) {} | |
79 | Export(int64_t id, int w, int i, int p, snapid_t cf, ceph_seq_t s, ceph_seq_t m, utime_t lis) : | |
80 | cap_id(id), wanted(w), issued(i), pending(p), client_follows(cf), | |
81 | seq(s), mseq(m), last_issue_stamp(lis) {} | |
82 | void encode(bufferlist &bl) const; | |
83 | void decode(bufferlist::iterator &p); | |
84 | void dump(Formatter *f) const; | |
85 | static void generate_test_instances(list<Export*>& ls); | |
86 | }; | |
87 | struct Import { | |
88 | int64_t cap_id; | |
89 | ceph_seq_t issue_seq; | |
90 | ceph_seq_t mseq; | |
91 | Import() : cap_id(0), issue_seq(0), mseq(0) {} | |
92 | Import(int64_t i, ceph_seq_t s, ceph_seq_t m) : cap_id(i), issue_seq(s), mseq(m) {} | |
93 | void encode(bufferlist &bl) const; | |
94 | void decode(bufferlist::iterator &p); | |
95 | void dump(Formatter *f) const; | |
96 | }; | |
97 | struct revoke_info { | |
98 | __u32 before; | |
99 | ceph_seq_t seq, last_issue; | |
100 | revoke_info() : before(0), seq(0), last_issue(0) {} | |
101 | revoke_info(__u32 b, ceph_seq_t s, ceph_seq_t li) : before(b), seq(s), last_issue(li) {} | |
102 | void encode(bufferlist& bl) const; | |
103 | void decode(bufferlist::iterator& bl); | |
104 | void dump(Formatter *f) const; | |
105 | static void generate_test_instances(list<revoke_info*>& ls); | |
106 | }; | |
107 | ||
108 | ||
109 | const static unsigned STATE_STALE = (1<<0); | |
110 | const static unsigned STATE_NEW = (1<<1); | |
111 | const static unsigned STATE_IMPORTING = (1<<2); | |
112 | ||
113 | ||
114 | Capability(CInode *i = NULL, uint64_t id = 0, client_t c = 0) : | |
115 | client_follows(0), client_xattr_version(0), | |
116 | client_inline_version(0), | |
117 | last_rbytes(0), last_rsize(0), | |
118 | item_session_caps(this), item_snaprealm_caps(this), | |
119 | item_revoking_caps(this), item_client_revoking_caps(this), | |
120 | inode(i), client(c), | |
121 | cap_id(id), | |
122 | _wanted(0), num_revoke_warnings(0), | |
123 | _pending(0), _issued(0), | |
124 | last_sent(0), | |
125 | last_issue(0), | |
126 | mseq(0), | |
127 | suppress(0), state(0) { | |
128 | } | |
129 | Capability(const Capability& other); // no copying | |
130 | ||
131 | const Capability& operator=(const Capability& other); // no copying | |
132 | ||
133 | int pending() { return _pending; } | |
134 | int issued() { return _issued; } | |
135 | bool is_null() { return !_pending && _revokes.empty(); } | |
136 | ||
137 | ceph_seq_t issue(unsigned c) { | |
138 | if (_pending & ~c) { | |
139 | // revoking (and maybe adding) bits. note caps prior to this revocation | |
140 | _revokes.push_back(revoke_info(_pending, last_sent, last_issue)); | |
141 | _pending = c; | |
142 | _issued |= c; | |
143 | } else if (~_pending & c) { | |
144 | // adding bits only. remove obsolete revocations? | |
145 | _pending |= c; | |
146 | _issued |= c; | |
147 | // drop old _revokes with no bits we don't have | |
148 | while (!_revokes.empty() && | |
149 | (_revokes.back().before & ~_pending) == 0) | |
150 | _revokes.pop_back(); | |
151 | } else { | |
152 | // no change. | |
153 | assert(_pending == c); | |
154 | } | |
155 | //last_issue = | |
156 | ++last_sent; | |
157 | return last_sent; | |
158 | } | |
159 | ceph_seq_t issue_norevoke(unsigned c) { | |
160 | _pending |= c; | |
161 | _issued |= c; | |
162 | //check_rdcaps_list(); | |
163 | ++last_sent; | |
164 | return last_sent; | |
165 | } | |
166 | void _calc_issued() { | |
167 | _issued = _pending; | |
168 | for (list<revoke_info>::iterator p = _revokes.begin(); p != _revokes.end(); ++p) | |
169 | _issued |= p->before; | |
170 | } | |
171 | void confirm_receipt(ceph_seq_t seq, unsigned caps) { | |
172 | if (seq == last_sent) { | |
173 | _revokes.clear(); | |
174 | _issued = caps; | |
175 | // don't add bits | |
176 | _pending &= caps; | |
177 | } else { | |
178 | // can i forget any revocations? | |
179 | while (!_revokes.empty() && _revokes.front().seq < seq) | |
180 | _revokes.pop_front(); | |
181 | if (!_revokes.empty()) { | |
182 | if (_revokes.front().seq == seq) | |
183 | _revokes.begin()->before = caps; | |
184 | _calc_issued(); | |
185 | } else { | |
186 | // seq < last_sent | |
187 | _issued = caps | _pending; | |
188 | } | |
189 | } | |
190 | ||
191 | if (_issued == _pending) { | |
192 | item_revoking_caps.remove_myself(); | |
193 | item_client_revoking_caps.remove_myself(); | |
194 | } | |
195 | //check_rdcaps_list(); | |
196 | } | |
197 | // we may get a release racing with revocations, which means our revokes will be ignored | |
198 | // by the client. clean them out of our _revokes history so we don't wait on them. | |
199 | void clean_revoke_from(ceph_seq_t li) { | |
200 | bool changed = false; | |
201 | while (!_revokes.empty() && _revokes.front().last_issue <= li) { | |
202 | _revokes.pop_front(); | |
203 | changed = true; | |
204 | } | |
205 | if (changed) { | |
206 | _calc_issued(); | |
207 | if (_issued == _pending) { | |
208 | item_revoking_caps.remove_myself(); | |
209 | item_client_revoking_caps.remove_myself(); | |
210 | } | |
211 | } | |
212 | } | |
213 | ceph_seq_t get_mseq() { return mseq; } | |
214 | void inc_mseq() { mseq++; } | |
215 | ||
216 | ceph_seq_t get_last_sent() { return last_sent; } | |
217 | utime_t get_last_issue_stamp() { return last_issue_stamp; } | |
218 | utime_t get_last_revoke_stamp() { return last_revoke_stamp; } | |
219 | ||
220 | void set_last_issue() { last_issue = last_sent; } | |
221 | void set_last_issue_stamp(utime_t t) { last_issue_stamp = t; } | |
222 | void set_last_revoke_stamp(utime_t t) { last_revoke_stamp = t; } | |
223 | void reset_num_revoke_warnings() { num_revoke_warnings = 0; } | |
224 | void inc_num_revoke_warnings() { ++num_revoke_warnings; } | |
225 | unsigned get_num_revoke_warnings() { return num_revoke_warnings; } | |
226 | ||
227 | void set_cap_id(uint64_t i) { cap_id = i; } | |
228 | uint64_t get_cap_id() { return cap_id; } | |
229 | ||
230 | //ceph_seq_t get_last_issue() { return last_issue; } | |
231 | ||
232 | bool is_suppress() { return suppress > 0; } | |
233 | void inc_suppress() { suppress++; } | |
234 | void dec_suppress() { suppress--; } | |
235 | ||
236 | bool is_stale() { return state & STATE_STALE; } | |
237 | void mark_stale() { state |= STATE_STALE; } | |
238 | void clear_stale() { state &= ~STATE_STALE; } | |
239 | bool is_new() { return state & STATE_NEW; } | |
240 | void mark_new() { state |= STATE_NEW; } | |
241 | void clear_new() { state &= ~STATE_NEW; } | |
242 | bool is_importing() { return state & STATE_IMPORTING; } | |
243 | void mark_importing() { state |= STATE_IMPORTING; } | |
244 | void clear_importing() { state &= ~STATE_IMPORTING; } | |
245 | ||
246 | CInode *get_inode() { return inode; } | |
247 | client_t get_client() const { return client; } | |
248 | ||
249 | // caps this client wants to hold | |
250 | int wanted() { return _wanted; } | |
251 | void set_wanted(int w) { | |
252 | _wanted = w; | |
253 | //check_rdcaps_list(); | |
254 | } | |
255 | ||
256 | void inc_last_seq() { last_sent++; } | |
257 | ceph_seq_t get_last_seq() { return last_sent; } | |
258 | ceph_seq_t get_last_issue() { return last_issue; } | |
259 | ||
260 | void reset_seq() { | |
261 | last_sent = 0; | |
262 | last_issue = 0; | |
263 | } | |
264 | ||
265 | // -- exports -- | |
266 | Export make_export() { | |
267 | return Export(cap_id, _wanted, issued(), pending(), client_follows, last_sent, mseq+1, last_issue_stamp); | |
268 | } | |
269 | void merge(Export& other, bool auth_cap) { | |
270 | if (!is_stale()) { | |
271 | // issued + pending | |
272 | int newpending = other.pending | pending(); | |
273 | if (other.issued & ~newpending) | |
274 | issue(other.issued | newpending); | |
275 | else | |
276 | issue(newpending); | |
277 | last_issue_stamp = other.last_issue_stamp; | |
278 | } else { | |
279 | issue(CEPH_CAP_PIN); | |
280 | } | |
281 | ||
282 | client_follows = other.client_follows; | |
283 | ||
284 | // wanted | |
285 | _wanted = _wanted | other.wanted; | |
286 | if (auth_cap) | |
287 | mseq = other.mseq; | |
288 | } | |
289 | void merge(int otherwanted, int otherissued) { | |
290 | if (!is_stale()) { | |
291 | // issued + pending | |
292 | int newpending = pending(); | |
293 | if (otherissued & ~newpending) | |
294 | issue(otherissued | newpending); | |
295 | else | |
296 | issue(newpending); | |
297 | } else { | |
298 | issue(CEPH_CAP_PIN); | |
299 | } | |
300 | ||
301 | // wanted | |
302 | _wanted = _wanted | otherwanted; | |
303 | } | |
304 | ||
305 | void revoke() { | |
306 | if (pending() & ~CEPH_CAP_PIN) | |
307 | issue(CEPH_CAP_PIN); | |
308 | confirm_receipt(last_sent, CEPH_CAP_PIN); | |
309 | } | |
310 | ||
311 | // serializers | |
312 | void encode(bufferlist &bl) const; | |
313 | void decode(bufferlist::iterator &bl); | |
314 | void dump(Formatter *f) const; | |
315 | static void generate_test_instances(list<Capability*>& ls); | |
316 | ||
317 | snapid_t client_follows; | |
318 | version_t client_xattr_version; | |
319 | version_t client_inline_version; | |
320 | int64_t last_rbytes; | |
321 | int64_t last_rsize; | |
322 | ||
323 | xlist<Capability*>::item item_session_caps; | |
324 | xlist<Capability*>::item item_snaprealm_caps; | |
325 | xlist<Capability*>::item item_revoking_caps; | |
326 | xlist<Capability*>::item item_client_revoking_caps; | |
327 | ||
328 | private: | |
329 | CInode *inode; | |
330 | client_t client; | |
331 | ||
332 | uint64_t cap_id; | |
333 | ||
334 | __u32 _wanted; // what the client wants (ideally) | |
335 | ||
336 | utime_t last_issue_stamp; | |
337 | utime_t last_revoke_stamp; | |
338 | unsigned num_revoke_warnings; | |
339 | ||
340 | // track in-flight caps -------------- | |
341 | // - add new caps to _pending | |
342 | // - track revocations in _revokes list | |
343 | __u32 _pending, _issued; | |
344 | list<revoke_info> _revokes; | |
345 | ||
346 | ceph_seq_t last_sent; | |
347 | ceph_seq_t last_issue; | |
348 | ceph_seq_t mseq; | |
349 | ||
350 | int suppress; | |
351 | unsigned state; | |
352 | }; | |
353 | ||
354 | WRITE_CLASS_ENCODER(Capability::Export) | |
355 | WRITE_CLASS_ENCODER(Capability::Import) | |
356 | WRITE_CLASS_ENCODER(Capability::revoke_info) | |
357 | WRITE_CLASS_ENCODER(Capability) | |
358 | ||
359 | ||
360 | ||
361 | #endif |