]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_MDS_MUTATION_H | |
16 | #define CEPH_MDS_MUTATION_H | |
17 | ||
18 | #include "include/interval_set.h" | |
19 | #include "include/elist.h" | |
20 | #include "include/filepath.h" | |
21 | ||
22 | #include "MDSCacheObject.h" | |
11fdf7f2 | 23 | #include "MDSContext.h" |
7c673cae FG |
24 | |
25 | #include "SimpleLock.h" | |
26 | #include "Capability.h" | |
27 | ||
28 | #include "common/TrackedOp.h" | |
11fdf7f2 TL |
29 | #include "messages/MClientRequest.h" |
30 | #include "messages/MMDSSlaveRequest.h" | |
7c673cae FG |
31 | |
32 | class LogSegment; | |
33 | class Capability; | |
34 | class CInode; | |
35 | class CDir; | |
36 | class CDentry; | |
37 | class Session; | |
38 | class ScatterLock; | |
11fdf7f2 | 39 | struct sr_t; |
7c673cae FG |
40 | |
41 | struct MutationImpl : public TrackedOp { | |
42 | metareqid_t reqid; | |
43 | __u32 attempt = 0; // which attempt for this request | |
44 | LogSegment *ls = nullptr; // the log segment i'm committing to | |
45 | ||
46 | private: | |
47 | utime_t mds_stamp; ///< mds-local timestamp (real time) | |
48 | utime_t op_stamp; ///< op timestamp (client provided) | |
49 | ||
50 | public: | |
51 | // flag mutation as slave | |
52 | mds_rank_t slave_to_mds = MDS_RANK_NONE; // this is a slave request if >= 0. | |
53 | ||
54 | // -- my pins and locks -- | |
55 | // cache pins (so things don't expire) | |
56 | set< MDSCacheObject* > pins; | |
11fdf7f2 | 57 | CInode* stickydiri = nullptr; |
7c673cae FG |
58 | |
59 | // auth pins | |
60 | map<MDSCacheObject*, mds_rank_t> remote_auth_pins; | |
11fdf7f2 | 61 | set<MDSCacheObject*> auth_pins; |
7c673cae FG |
62 | |
63 | // held locks | |
11fdf7f2 TL |
64 | struct LockOp { |
65 | enum { | |
66 | RDLOCK = 1, | |
67 | WRLOCK = 2, | |
68 | XLOCK = 4, | |
69 | REMOTE_WRLOCK = 8, | |
70 | }; | |
71 | SimpleLock* lock; | |
72 | mutable unsigned flags; | |
73 | mutable mds_rank_t wrlock_target; | |
74 | operator SimpleLock*() const { | |
75 | return lock; | |
76 | } | |
77 | LockOp(SimpleLock *l, unsigned f=0, mds_rank_t t=MDS_RANK_NONE) : | |
78 | lock(l), flags(f), wrlock_target(t) {} | |
79 | bool is_rdlock() const { return !!(flags & RDLOCK); } | |
80 | bool is_xlock() const { return !!(flags & XLOCK); } | |
81 | bool is_wrlock() const { return !!(flags & WRLOCK); } | |
82 | void clear_wrlock() const { flags &= ~WRLOCK; } | |
83 | bool is_remote_wrlock() const { return !!(flags & REMOTE_WRLOCK); } | |
84 | void clear_remote_wrlock() const { | |
85 | flags &= ~REMOTE_WRLOCK; | |
86 | wrlock_target = MDS_RANK_NONE; | |
87 | } | |
88 | }; | |
89 | ||
90 | struct LockOpVec : public vector<LockOp> { | |
91 | void add_rdlock(SimpleLock *lock) { | |
92 | emplace_back(lock, LockOp::RDLOCK); | |
93 | } | |
94 | void erase_rdlock(SimpleLock *lock); | |
95 | void add_xlock(SimpleLock *lock) { | |
96 | emplace_back(lock, LockOp::XLOCK); | |
97 | } | |
98 | void add_wrlock(SimpleLock *lock) { | |
99 | emplace_back(lock, LockOp::WRLOCK); | |
100 | } | |
101 | void add_remote_wrlock(SimpleLock *lock, mds_rank_t rank) { | |
102 | ceph_assert(rank != MDS_RANK_NONE); | |
103 | emplace_back(lock, LockOp::REMOTE_WRLOCK, rank); | |
104 | } | |
105 | void sort_and_merge(); | |
106 | ||
107 | LockOpVec() { | |
108 | reserve(32); | |
109 | } | |
110 | }; | |
111 | typedef set<LockOp, SimpleLock::ptr_lt> lock_set; | |
112 | typedef lock_set::iterator lock_iterator; | |
113 | lock_set locks; // full ordering | |
114 | ||
115 | bool is_rdlocked(SimpleLock *lock) const { | |
116 | auto it = locks.find(lock); | |
117 | return it != locks.end() && it->is_rdlock(); | |
118 | } | |
119 | bool is_xlocked(SimpleLock *lock) const { | |
120 | auto it = locks.find(lock); | |
121 | return it != locks.end() && it->is_xlock(); | |
122 | } | |
123 | bool is_wrlocked(SimpleLock *lock) const { | |
124 | auto it = locks.find(lock); | |
125 | return it != locks.end() && it->is_wrlock(); | |
126 | } | |
127 | bool is_remote_wrlocked(SimpleLock *lock) const { | |
128 | auto it = locks.find(lock); | |
129 | return it != locks.end() && it->is_remote_wrlock(); | |
130 | } | |
7c673cae FG |
131 | |
132 | // lock we are currently trying to acquire. if we give up for some reason, | |
133 | // be sure to eval() this. | |
134 | SimpleLock *locking = nullptr; | |
135 | mds_rank_t locking_target_mds = -1; | |
136 | ||
137 | // if this flag is set, do not attempt to acquire further locks. | |
138 | // (useful for wrlock, which may be a moving auth target) | |
139 | bool done_locking = false; | |
140 | bool committing = false; | |
141 | bool aborted = false; | |
142 | bool killed = false; | |
143 | ||
144 | // for applying projected inode changes | |
145 | list<CInode*> projected_inodes; | |
146 | list<CDir*> projected_fnodes; | |
147 | list<ScatterLock*> updated_locks; | |
148 | ||
149 | list<CInode*> dirty_cow_inodes; | |
150 | list<pair<CDentry*,version_t> > dirty_cow_dentries; | |
151 | ||
152 | // keep our default values synced with MDRequestParam's | |
153 | MutationImpl() : TrackedOp(nullptr, utime_t()) {} | |
154 | MutationImpl(OpTracker *tracker, utime_t initiated, | |
11fdf7f2 | 155 | const metareqid_t &ri, __u32 att=0, mds_rank_t slave_to=MDS_RANK_NONE) |
7c673cae FG |
156 | : TrackedOp(tracker, initiated), |
157 | reqid(ri), attempt(att), | |
158 | slave_to_mds(slave_to) { } | |
159 | ~MutationImpl() override { | |
11fdf7f2 TL |
160 | ceph_assert(locking == NULL); |
161 | ceph_assert(pins.empty()); | |
162 | ceph_assert(auth_pins.empty()); | |
7c673cae FG |
163 | } |
164 | ||
165 | bool is_master() const { return slave_to_mds == MDS_RANK_NONE; } | |
166 | bool is_slave() const { return slave_to_mds != MDS_RANK_NONE; } | |
167 | ||
168 | client_t get_client() const { | |
169 | if (reqid.name.is_client()) | |
170 | return client_t(reqid.name.num()); | |
171 | return -1; | |
172 | } | |
173 | ||
174 | void set_mds_stamp(utime_t t) { | |
175 | mds_stamp = t; | |
176 | } | |
177 | utime_t get_mds_stamp() const { | |
178 | return mds_stamp; | |
179 | } | |
180 | void set_op_stamp(utime_t t) { | |
181 | op_stamp = t; | |
182 | } | |
183 | utime_t get_op_stamp() const { | |
184 | if (op_stamp != utime_t()) | |
185 | return op_stamp; | |
186 | return get_mds_stamp(); | |
187 | } | |
188 | ||
189 | // pin items in cache | |
190 | void pin(MDSCacheObject *o); | |
191 | void unpin(MDSCacheObject *o); | |
192 | void set_stickydirs(CInode *in); | |
11fdf7f2 | 193 | void put_stickydirs(); |
7c673cae FG |
194 | void drop_pins(); |
195 | ||
196 | void start_locking(SimpleLock *lock, int target=-1); | |
197 | void finish_locking(SimpleLock *lock); | |
198 | ||
199 | // auth pins | |
200 | bool is_auth_pinned(MDSCacheObject *object) const; | |
201 | void auth_pin(MDSCacheObject *object); | |
202 | void auth_unpin(MDSCacheObject *object); | |
203 | void drop_local_auth_pins(); | |
204 | void add_projected_inode(CInode *in); | |
205 | void pop_and_dirty_projected_inodes(); | |
206 | void add_projected_fnode(CDir *dir); | |
207 | void pop_and_dirty_projected_fnodes(); | |
208 | void add_updated_lock(ScatterLock *lock); | |
209 | void add_cow_inode(CInode *in); | |
210 | void add_cow_dentry(CDentry *dn); | |
211 | void apply(); | |
212 | void cleanup(); | |
213 | ||
214 | virtual void print(ostream &out) const { | |
215 | out << "mutation(" << this << ")"; | |
216 | } | |
217 | ||
218 | virtual void dump(Formatter *f) const {} | |
219 | void _dump_op_descriptor_unlocked(ostream& stream) const override; | |
220 | }; | |
221 | ||
222 | inline ostream& operator<<(ostream &out, const MutationImpl &mut) | |
223 | { | |
224 | mut.print(out); | |
225 | return out; | |
226 | } | |
227 | ||
228 | typedef boost::intrusive_ptr<MutationImpl> MutationRef; | |
229 | ||
230 | ||
231 | ||
232 | /** active_request_t | |
233 | * state we track for requests we are currently processing. | |
234 | * mostly information about locks held, so that we can drop them all | |
235 | * the request is finished or forwarded. see request_*(). | |
236 | */ | |
237 | struct MDRequestImpl : public MutationImpl { | |
238 | Session *session; | |
239 | elist<MDRequestImpl*>::item item_session_request; // if not on list, op is aborted. | |
240 | ||
241 | // -- i am a client (master) request | |
11fdf7f2 | 242 | MClientRequest::const_ref client_request; // client request (if any) |
7c673cae FG |
243 | |
244 | // store up to two sets of dn vectors, inode pointers, for request path1 and path2. | |
245 | vector<CDentry*> dn[2]; | |
246 | CDentry *straydn; | |
247 | CInode *in[2]; | |
248 | snapid_t snapid; | |
249 | ||
250 | CInode *tracei; | |
251 | CDentry *tracedn; | |
252 | ||
253 | inodeno_t alloc_ino, used_prealloc_ino; | |
254 | interval_set<inodeno_t> prealloc_inos; | |
255 | ||
b32b8144 FG |
256 | int snap_caps = 0; |
257 | int getattr_caps = 0; ///< caps requested by getattr | |
258 | bool no_early_reply = false; | |
259 | bool did_early_reply = false; | |
260 | bool o_trunc = false; ///< request is an O_TRUNC mutation | |
261 | bool has_completed = false; ///< request has already completed | |
7c673cae FG |
262 | |
263 | bufferlist reply_extra_bl; | |
264 | ||
265 | // inos we did a embedded cap release on, and may need to eval if we haven't since reissued | |
266 | map<vinodeno_t, ceph_seq_t> cap_releases; | |
267 | ||
268 | // -- i am a slave request | |
11fdf7f2 | 269 | MMDSSlaveRequest::const_ref slave_request; // slave request (if one is pending; implies slave == true) |
7c673cae FG |
270 | |
271 | // -- i am an internal op | |
272 | int internal_op; | |
273 | Context *internal_op_finish; | |
274 | void *internal_op_private; | |
275 | ||
276 | // indicates how may retries of request have been made | |
277 | int retry; | |
278 | ||
279 | // indicator for vxattr osdmap update | |
280 | bool waited_for_osdmap; | |
281 | ||
282 | // break rarely-used fields into a separately allocated structure | |
283 | // to save memory for most ops | |
284 | struct More { | |
91327a77 | 285 | int slave_error = 0; |
7c673cae FG |
286 | set<mds_rank_t> slaves; // mds nodes that have slave requests to me (implies client_request) |
287 | set<mds_rank_t> waiting_on_slave; // peers i'm waiting for slavereq replies from. | |
288 | ||
289 | // for rename/link/unlink | |
290 | set<mds_rank_t> witnessed; // nodes who have journaled a RenamePrepare | |
291 | map<MDSCacheObject*,version_t> pvmap; | |
292 | ||
91327a77 AA |
293 | bool has_journaled_slaves = false; |
294 | bool slave_update_journaled = false; | |
295 | bool slave_rolling_back = false; | |
7c673cae FG |
296 | |
297 | // for rename | |
298 | set<mds_rank_t> extra_witnesses; // replica list from srcdn auth (rename) | |
91327a77 | 299 | mds_rank_t srcdn_auth_mds = MDS_RANK_NONE; |
7c673cae | 300 | bufferlist inode_import; |
91327a77 AA |
301 | version_t inode_import_v = 0; |
302 | CInode* rename_inode = nullptr; | |
303 | bool is_freeze_authpin = false; | |
304 | bool is_ambiguous_auth = false; | |
305 | bool is_remote_frozen_authpin = false; | |
306 | bool is_inode_exporter = false; | |
7c673cae | 307 | |
28e407b8 | 308 | map<client_t, pair<Session*, uint64_t> > imported_session_map; |
7c673cae FG |
309 | map<CInode*, map<client_t,Capability::Export> > cap_imports; |
310 | ||
311 | // for lock/flock | |
91327a77 | 312 | bool flock_was_waiting = false; |
7c673cae FG |
313 | |
314 | // for snaps | |
91327a77 | 315 | version_t stid = 0; |
7c673cae FG |
316 | bufferlist snapidbl; |
317 | ||
11fdf7f2 TL |
318 | sr_t *srci_srnode = nullptr; |
319 | sr_t *desti_srnode = nullptr; | |
320 | ||
7c673cae | 321 | // called when slave commits or aborts |
91327a77 | 322 | Context *slave_commit = nullptr; |
7c673cae FG |
323 | bufferlist rollback_bl; |
324 | ||
11fdf7f2 | 325 | MDSContext::vec waiting_for_finish; |
7c673cae FG |
326 | |
327 | // export & fragment | |
91327a77 | 328 | CDir* export_dir = nullptr; |
7c673cae FG |
329 | dirfrag_t fragment_base; |
330 | ||
331 | // for internal ops doing lookup | |
332 | filepath filepath1; | |
333 | filepath filepath2; | |
334 | ||
91327a77 | 335 | More() {} |
7c673cae FG |
336 | } *_more; |
337 | ||
338 | ||
339 | // --------------------------------------------------- | |
340 | struct Params { | |
341 | metareqid_t reqid; | |
342 | __u32 attempt; | |
11fdf7f2 TL |
343 | MClientRequest::const_ref client_req; |
344 | Message::const_ref triggering_slave_req; | |
7c673cae FG |
345 | mds_rank_t slave_to; |
346 | utime_t initiated; | |
347 | utime_t throttled, all_read, dispatched; | |
348 | int internal_op; | |
349 | // keep these default values synced to MutationImpl's | |
11fdf7f2 TL |
350 | Params() : attempt(0), slave_to(MDS_RANK_NONE), internal_op(-1) {} |
351 | const utime_t& get_recv_stamp() const { | |
352 | return initiated; | |
353 | } | |
354 | const utime_t& get_throttle_stamp() const { | |
355 | return throttled; | |
356 | } | |
357 | const utime_t& get_recv_complete_stamp() const { | |
358 | return all_read; | |
359 | } | |
360 | const utime_t& get_dispatch_stamp() const { | |
361 | return dispatched; | |
362 | } | |
7c673cae | 363 | }; |
11fdf7f2 TL |
364 | MDRequestImpl(const Params* params, OpTracker *tracker) : |
365 | MutationImpl(tracker, params->initiated, | |
366 | params->reqid, params->attempt, params->slave_to), | |
7c673cae | 367 | session(NULL), item_session_request(this), |
11fdf7f2 | 368 | client_request(params->client_req), straydn(NULL), snapid(CEPH_NOSNAP), |
7c673cae | 369 | tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0), |
11fdf7f2 | 370 | internal_op(params->internal_op), internal_op_finish(NULL), |
7c673cae FG |
371 | internal_op_private(NULL), |
372 | retry(0), | |
373 | waited_for_osdmap(false), _more(NULL) { | |
374 | in[0] = in[1] = NULL; | |
7c673cae FG |
375 | } |
376 | ~MDRequestImpl() override; | |
377 | ||
378 | More* more(); | |
379 | bool has_more() const; | |
380 | bool has_witnesses(); | |
381 | bool slave_did_prepare(); | |
382 | bool slave_rolling_back(); | |
383 | bool did_ino_allocation() const; | |
384 | bool freeze_auth_pin(CInode *inode); | |
385 | void unfreeze_auth_pin(bool clear_inode=false); | |
386 | void set_remote_frozen_auth_pin(CInode *inode); | |
387 | bool can_auth_pin(MDSCacheObject *object); | |
388 | void drop_local_auth_pins(); | |
389 | void set_ambiguous_auth(CInode *inode); | |
390 | void clear_ambiguous_auth(); | |
391 | const filepath& get_filepath(); | |
392 | const filepath& get_filepath2(); | |
393 | void set_filepath(const filepath& fp); | |
394 | void set_filepath2(const filepath& fp); | |
b32b8144 | 395 | bool is_queued_for_replay() const; |
7c673cae FG |
396 | |
397 | void print(ostream &out) const override; | |
398 | void dump(Formatter *f) const override; | |
399 | ||
11fdf7f2 TL |
400 | MClientRequest::const_ref release_client_request(); |
401 | void reset_slave_request(const MMDSSlaveRequest::const_ref& req=nullptr); | |
91327a77 | 402 | |
7c673cae FG |
403 | // TrackedOp stuff |
404 | typedef boost::intrusive_ptr<MDRequestImpl> Ref; | |
405 | protected: | |
406 | void _dump(Formatter *f) const override; | |
407 | void _dump_op_descriptor_unlocked(ostream& stream) const override; | |
91327a77 | 408 | private: |
11fdf7f2 | 409 | mutable ceph::spinlock msg_lock; |
7c673cae FG |
410 | }; |
411 | ||
412 | typedef boost::intrusive_ptr<MDRequestImpl> MDRequestRef; | |
413 | ||
414 | ||
415 | struct MDSlaveUpdate { | |
416 | int origop; | |
417 | bufferlist rollback; | |
418 | elist<MDSlaveUpdate*>::item item; | |
419 | Context *waiter; | |
420 | set<CInode*> olddirs; | |
421 | set<CInode*> unlinked; | |
422 | MDSlaveUpdate(int oo, bufferlist &rbl, elist<MDSlaveUpdate*> &list) : | |
423 | origop(oo), | |
424 | item(this), | |
425 | waiter(0) { | |
426 | rollback.claim(rbl); | |
427 | list.push_back(&item); | |
428 | } | |
429 | ~MDSlaveUpdate() { | |
430 | item.remove_myself(); | |
431 | if (waiter) | |
432 | waiter->complete(0); | |
433 | } | |
434 | }; | |
435 | ||
436 | ||
437 | #endif |