]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_MDS_MUTATION_H | |
16 | #define CEPH_MDS_MUTATION_H | |
17 | ||
18 | #include "include/interval_set.h" | |
19 | #include "include/elist.h" | |
20 | #include "include/filepath.h" | |
21 | ||
22 | #include "MDSCacheObject.h" | |
23 | ||
24 | #include "SimpleLock.h" | |
25 | #include "Capability.h" | |
26 | ||
27 | #include "common/TrackedOp.h" | |
28 | ||
29 | class LogSegment; | |
30 | class Capability; | |
31 | class CInode; | |
32 | class CDir; | |
33 | class CDentry; | |
34 | class Session; | |
35 | class ScatterLock; | |
36 | class MClientRequest; | |
37 | class MMDSSlaveRequest; | |
38 | ||
39 | struct MutationImpl : public TrackedOp { | |
40 | metareqid_t reqid; | |
41 | __u32 attempt = 0; // which attempt for this request | |
42 | LogSegment *ls = nullptr; // the log segment i'm committing to | |
43 | ||
44 | private: | |
45 | utime_t mds_stamp; ///< mds-local timestamp (real time) | |
46 | utime_t op_stamp; ///< op timestamp (client provided) | |
47 | ||
48 | public: | |
49 | // flag mutation as slave | |
50 | mds_rank_t slave_to_mds = MDS_RANK_NONE; // this is a slave request if >= 0. | |
51 | ||
52 | // -- my pins and locks -- | |
53 | // cache pins (so things don't expire) | |
54 | set< MDSCacheObject* > pins; | |
55 | set<CInode*> stickydirs; | |
56 | ||
57 | // auth pins | |
58 | map<MDSCacheObject*, mds_rank_t> remote_auth_pins; | |
59 | set< MDSCacheObject* > auth_pins; | |
60 | ||
61 | // held locks | |
62 | set< SimpleLock* > rdlocks; // always local. | |
63 | set< SimpleLock* > wrlocks; // always local. | |
64 | map< SimpleLock*, mds_rank_t > remote_wrlocks; | |
65 | set< SimpleLock* > xlocks; // local or remote. | |
66 | set< SimpleLock*, SimpleLock::ptr_lt > locks; // full ordering | |
67 | ||
68 | // lock we are currently trying to acquire. if we give up for some reason, | |
69 | // be sure to eval() this. | |
70 | SimpleLock *locking = nullptr; | |
71 | mds_rank_t locking_target_mds = -1; | |
72 | ||
73 | // if this flag is set, do not attempt to acquire further locks. | |
74 | // (useful for wrlock, which may be a moving auth target) | |
75 | bool done_locking = false; | |
76 | bool committing = false; | |
77 | bool aborted = false; | |
78 | bool killed = false; | |
79 | ||
80 | // for applying projected inode changes | |
81 | list<CInode*> projected_inodes; | |
82 | list<CDir*> projected_fnodes; | |
83 | list<ScatterLock*> updated_locks; | |
84 | ||
85 | list<CInode*> dirty_cow_inodes; | |
86 | list<pair<CDentry*,version_t> > dirty_cow_dentries; | |
87 | ||
88 | // keep our default values synced with MDRequestParam's | |
89 | MutationImpl() : TrackedOp(nullptr, utime_t()) {} | |
90 | MutationImpl(OpTracker *tracker, utime_t initiated, | |
91 | metareqid_t ri, __u32 att=0, mds_rank_t slave_to=MDS_RANK_NONE) | |
92 | : TrackedOp(tracker, initiated), | |
93 | reqid(ri), attempt(att), | |
94 | slave_to_mds(slave_to) { } | |
95 | ~MutationImpl() override { | |
96 | assert(locking == NULL); | |
97 | assert(pins.empty()); | |
98 | assert(auth_pins.empty()); | |
99 | assert(xlocks.empty()); | |
100 | assert(rdlocks.empty()); | |
101 | assert(wrlocks.empty()); | |
102 | assert(remote_wrlocks.empty()); | |
103 | } | |
104 | ||
105 | bool is_master() const { return slave_to_mds == MDS_RANK_NONE; } | |
106 | bool is_slave() const { return slave_to_mds != MDS_RANK_NONE; } | |
107 | ||
108 | client_t get_client() const { | |
109 | if (reqid.name.is_client()) | |
110 | return client_t(reqid.name.num()); | |
111 | return -1; | |
112 | } | |
113 | ||
114 | void set_mds_stamp(utime_t t) { | |
115 | mds_stamp = t; | |
116 | } | |
117 | utime_t get_mds_stamp() const { | |
118 | return mds_stamp; | |
119 | } | |
120 | void set_op_stamp(utime_t t) { | |
121 | op_stamp = t; | |
122 | } | |
123 | utime_t get_op_stamp() const { | |
124 | if (op_stamp != utime_t()) | |
125 | return op_stamp; | |
126 | return get_mds_stamp(); | |
127 | } | |
128 | ||
129 | // pin items in cache | |
130 | void pin(MDSCacheObject *o); | |
131 | void unpin(MDSCacheObject *o); | |
132 | void set_stickydirs(CInode *in); | |
133 | void drop_pins(); | |
134 | ||
135 | void start_locking(SimpleLock *lock, int target=-1); | |
136 | void finish_locking(SimpleLock *lock); | |
137 | ||
138 | // auth pins | |
139 | bool is_auth_pinned(MDSCacheObject *object) const; | |
140 | void auth_pin(MDSCacheObject *object); | |
141 | void auth_unpin(MDSCacheObject *object); | |
142 | void drop_local_auth_pins(); | |
143 | void add_projected_inode(CInode *in); | |
144 | void pop_and_dirty_projected_inodes(); | |
145 | void add_projected_fnode(CDir *dir); | |
146 | void pop_and_dirty_projected_fnodes(); | |
147 | void add_updated_lock(ScatterLock *lock); | |
148 | void add_cow_inode(CInode *in); | |
149 | void add_cow_dentry(CDentry *dn); | |
150 | void apply(); | |
151 | void cleanup(); | |
152 | ||
153 | virtual void print(ostream &out) const { | |
154 | out << "mutation(" << this << ")"; | |
155 | } | |
156 | ||
157 | virtual void dump(Formatter *f) const {} | |
158 | void _dump_op_descriptor_unlocked(ostream& stream) const override; | |
159 | }; | |
160 | ||
161 | inline ostream& operator<<(ostream &out, const MutationImpl &mut) | |
162 | { | |
163 | mut.print(out); | |
164 | return out; | |
165 | } | |
166 | ||
167 | typedef boost::intrusive_ptr<MutationImpl> MutationRef; | |
168 | ||
169 | ||
170 | ||
171 | /** active_request_t | |
172 | * state we track for requests we are currently processing. | |
173 | * mostly information about locks held, so that we can drop them all | |
174 | * the request is finished or forwarded. see request_*(). | |
175 | */ | |
176 | struct MDRequestImpl : public MutationImpl { | |
177 | Session *session; | |
178 | elist<MDRequestImpl*>::item item_session_request; // if not on list, op is aborted. | |
179 | ||
180 | // -- i am a client (master) request | |
181 | MClientRequest *client_request; // client request (if any) | |
182 | ||
183 | // store up to two sets of dn vectors, inode pointers, for request path1 and path2. | |
184 | vector<CDentry*> dn[2]; | |
185 | CDentry *straydn; | |
186 | CInode *in[2]; | |
187 | snapid_t snapid; | |
188 | ||
189 | CInode *tracei; | |
190 | CDentry *tracedn; | |
191 | ||
192 | inodeno_t alloc_ino, used_prealloc_ino; | |
193 | interval_set<inodeno_t> prealloc_inos; | |
194 | ||
b32b8144 FG |
195 | int snap_caps = 0; |
196 | int getattr_caps = 0; ///< caps requested by getattr | |
197 | bool no_early_reply = false; | |
198 | bool did_early_reply = false; | |
199 | bool o_trunc = false; ///< request is an O_TRUNC mutation | |
200 | bool has_completed = false; ///< request has already completed | |
7c673cae FG |
201 | |
202 | bufferlist reply_extra_bl; | |
203 | ||
204 | // inos we did a embedded cap release on, and may need to eval if we haven't since reissued | |
205 | map<vinodeno_t, ceph_seq_t> cap_releases; | |
206 | ||
207 | // -- i am a slave request | |
208 | MMDSSlaveRequest *slave_request; // slave request (if one is pending; implies slave == true) | |
209 | ||
210 | // -- i am an internal op | |
211 | int internal_op; | |
212 | Context *internal_op_finish; | |
213 | void *internal_op_private; | |
214 | ||
215 | // indicates how may retries of request have been made | |
216 | int retry; | |
217 | ||
218 | // indicator for vxattr osdmap update | |
219 | bool waited_for_osdmap; | |
220 | ||
221 | // break rarely-used fields into a separately allocated structure | |
222 | // to save memory for most ops | |
223 | struct More { | |
224 | int slave_error; | |
225 | set<mds_rank_t> slaves; // mds nodes that have slave requests to me (implies client_request) | |
226 | set<mds_rank_t> waiting_on_slave; // peers i'm waiting for slavereq replies from. | |
227 | ||
228 | // for rename/link/unlink | |
229 | set<mds_rank_t> witnessed; // nodes who have journaled a RenamePrepare | |
230 | map<MDSCacheObject*,version_t> pvmap; | |
231 | ||
232 | bool has_journaled_slaves; | |
233 | bool slave_update_journaled; | |
234 | bool slave_rolling_back; | |
235 | ||
236 | // for rename | |
237 | set<mds_rank_t> extra_witnesses; // replica list from srcdn auth (rename) | |
238 | mds_rank_t srcdn_auth_mds; | |
239 | bufferlist inode_import; | |
240 | version_t inode_import_v; | |
241 | CInode* rename_inode; | |
242 | bool is_freeze_authpin; | |
243 | bool is_ambiguous_auth; | |
244 | bool is_remote_frozen_authpin; | |
245 | bool is_inode_exporter; | |
246 | ||
28e407b8 | 247 | map<client_t, pair<Session*, uint64_t> > imported_session_map; |
7c673cae FG |
248 | map<CInode*, map<client_t,Capability::Export> > cap_imports; |
249 | ||
250 | // for lock/flock | |
251 | bool flock_was_waiting; | |
252 | ||
253 | // for snaps | |
254 | version_t stid; | |
255 | bufferlist snapidbl; | |
256 | ||
257 | // called when slave commits or aborts | |
258 | Context *slave_commit; | |
259 | bufferlist rollback_bl; | |
260 | ||
261 | list<MDSInternalContextBase*> waiting_for_finish; | |
262 | ||
263 | // export & fragment | |
264 | CDir* export_dir; | |
265 | dirfrag_t fragment_base; | |
266 | ||
267 | // for internal ops doing lookup | |
268 | filepath filepath1; | |
269 | filepath filepath2; | |
270 | ||
271 | More() : | |
272 | slave_error(0), | |
273 | has_journaled_slaves(false), slave_update_journaled(false), | |
274 | slave_rolling_back(false), | |
275 | srcdn_auth_mds(-1), inode_import_v(0), rename_inode(0), | |
276 | is_freeze_authpin(false), is_ambiguous_auth(false), | |
277 | is_remote_frozen_authpin(false), is_inode_exporter(false), | |
278 | flock_was_waiting(false), stid(0), slave_commit(0), export_dir(NULL) { } | |
279 | } *_more; | |
280 | ||
281 | ||
282 | // --------------------------------------------------- | |
283 | struct Params { | |
284 | metareqid_t reqid; | |
285 | __u32 attempt; | |
286 | MClientRequest *client_req; | |
287 | class Message *triggering_slave_req; | |
288 | mds_rank_t slave_to; | |
289 | utime_t initiated; | |
290 | utime_t throttled, all_read, dispatched; | |
291 | int internal_op; | |
292 | // keep these default values synced to MutationImpl's | |
293 | Params() : attempt(0), client_req(NULL), | |
294 | triggering_slave_req(NULL), slave_to(MDS_RANK_NONE), internal_op(-1) {} | |
295 | }; | |
296 | MDRequestImpl(const Params& params, OpTracker *tracker) : | |
297 | MutationImpl(tracker, params.initiated, | |
298 | params.reqid, params.attempt, params.slave_to), | |
299 | session(NULL), item_session_request(this), | |
300 | client_request(params.client_req), straydn(NULL), snapid(CEPH_NOSNAP), | |
301 | tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0), | |
7c673cae FG |
302 | slave_request(NULL), internal_op(params.internal_op), internal_op_finish(NULL), |
303 | internal_op_private(NULL), | |
304 | retry(0), | |
305 | waited_for_osdmap(false), _more(NULL) { | |
306 | in[0] = in[1] = NULL; | |
307 | if (!params.throttled.is_zero()) | |
308 | mark_event("throttled", params.throttled); | |
309 | if (!params.all_read.is_zero()) | |
310 | mark_event("all_read", params.all_read); | |
311 | if (!params.dispatched.is_zero()) | |
312 | mark_event("dispatched", params.dispatched); | |
313 | } | |
314 | ~MDRequestImpl() override; | |
315 | ||
316 | More* more(); | |
317 | bool has_more() const; | |
318 | bool has_witnesses(); | |
319 | bool slave_did_prepare(); | |
320 | bool slave_rolling_back(); | |
321 | bool did_ino_allocation() const; | |
322 | bool freeze_auth_pin(CInode *inode); | |
323 | void unfreeze_auth_pin(bool clear_inode=false); | |
324 | void set_remote_frozen_auth_pin(CInode *inode); | |
325 | bool can_auth_pin(MDSCacheObject *object); | |
326 | void drop_local_auth_pins(); | |
327 | void set_ambiguous_auth(CInode *inode); | |
328 | void clear_ambiguous_auth(); | |
329 | const filepath& get_filepath(); | |
330 | const filepath& get_filepath2(); | |
331 | void set_filepath(const filepath& fp); | |
332 | void set_filepath2(const filepath& fp); | |
b32b8144 | 333 | bool is_queued_for_replay() const; |
7c673cae FG |
334 | |
335 | void print(ostream &out) const override; | |
336 | void dump(Formatter *f) const override; | |
337 | ||
338 | // TrackedOp stuff | |
339 | typedef boost::intrusive_ptr<MDRequestImpl> Ref; | |
340 | protected: | |
341 | void _dump(Formatter *f) const override; | |
342 | void _dump_op_descriptor_unlocked(ostream& stream) const override; | |
343 | }; | |
344 | ||
345 | typedef boost::intrusive_ptr<MDRequestImpl> MDRequestRef; | |
346 | ||
347 | ||
348 | struct MDSlaveUpdate { | |
349 | int origop; | |
350 | bufferlist rollback; | |
351 | elist<MDSlaveUpdate*>::item item; | |
352 | Context *waiter; | |
353 | set<CInode*> olddirs; | |
354 | set<CInode*> unlinked; | |
355 | MDSlaveUpdate(int oo, bufferlist &rbl, elist<MDSlaveUpdate*> &list) : | |
356 | origop(oo), | |
357 | item(this), | |
358 | waiter(0) { | |
359 | rollback.claim(rbl); | |
360 | list.push_back(&item); | |
361 | } | |
362 | ~MDSlaveUpdate() { | |
363 | item.remove_myself(); | |
364 | if (waiter) | |
365 | waiter->complete(0); | |
366 | } | |
367 | }; | |
368 | ||
369 | ||
370 | #endif |