]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/Mutation.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / mds / Mutation.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_MDS_MUTATION_H
16#define CEPH_MDS_MUTATION_H
17
18#include "include/interval_set.h"
19#include "include/elist.h"
20#include "include/filepath.h"
21
22#include "MDSCacheObject.h"
11fdf7f2 23#include "MDSContext.h"
7c673cae
FG
24
25#include "SimpleLock.h"
26#include "Capability.h"
27
28#include "common/TrackedOp.h"
11fdf7f2
TL
29#include "messages/MClientRequest.h"
30#include "messages/MMDSSlaveRequest.h"
7c673cae
FG
31
32class LogSegment;
33class Capability;
34class CInode;
35class CDir;
36class CDentry;
37class Session;
38class ScatterLock;
11fdf7f2 39struct sr_t;
7c673cae
FG
40
41struct MutationImpl : public TrackedOp {
42 metareqid_t reqid;
43 __u32 attempt = 0; // which attempt for this request
44 LogSegment *ls = nullptr; // the log segment i'm committing to
45
46private:
47 utime_t mds_stamp; ///< mds-local timestamp (real time)
48 utime_t op_stamp; ///< op timestamp (client provided)
49
50public:
51 // flag mutation as slave
52 mds_rank_t slave_to_mds = MDS_RANK_NONE; // this is a slave request if >= 0.
53
54 // -- my pins and locks --
55 // cache pins (so things don't expire)
56 set< MDSCacheObject* > pins;
11fdf7f2 57 CInode* stickydiri = nullptr;
7c673cae
FG
58
59 // auth pins
60 map<MDSCacheObject*, mds_rank_t> remote_auth_pins;
11fdf7f2 61 set<MDSCacheObject*> auth_pins;
7c673cae
FG
62
63 // held locks
11fdf7f2
TL
64 struct LockOp {
65 enum {
66 RDLOCK = 1,
67 WRLOCK = 2,
68 XLOCK = 4,
69 REMOTE_WRLOCK = 8,
70 };
71 SimpleLock* lock;
72 mutable unsigned flags;
73 mutable mds_rank_t wrlock_target;
74 operator SimpleLock*() const {
75 return lock;
76 }
77 LockOp(SimpleLock *l, unsigned f=0, mds_rank_t t=MDS_RANK_NONE) :
78 lock(l), flags(f), wrlock_target(t) {}
79 bool is_rdlock() const { return !!(flags & RDLOCK); }
80 bool is_xlock() const { return !!(flags & XLOCK); }
81 bool is_wrlock() const { return !!(flags & WRLOCK); }
82 void clear_wrlock() const { flags &= ~WRLOCK; }
83 bool is_remote_wrlock() const { return !!(flags & REMOTE_WRLOCK); }
84 void clear_remote_wrlock() const {
85 flags &= ~REMOTE_WRLOCK;
86 wrlock_target = MDS_RANK_NONE;
87 }
88 };
89
90 struct LockOpVec : public vector<LockOp> {
91 void add_rdlock(SimpleLock *lock) {
92 emplace_back(lock, LockOp::RDLOCK);
93 }
94 void erase_rdlock(SimpleLock *lock);
95 void add_xlock(SimpleLock *lock) {
96 emplace_back(lock, LockOp::XLOCK);
97 }
98 void add_wrlock(SimpleLock *lock) {
99 emplace_back(lock, LockOp::WRLOCK);
100 }
101 void add_remote_wrlock(SimpleLock *lock, mds_rank_t rank) {
102 ceph_assert(rank != MDS_RANK_NONE);
103 emplace_back(lock, LockOp::REMOTE_WRLOCK, rank);
104 }
105 void sort_and_merge();
106
107 LockOpVec() {
108 reserve(32);
109 }
110 };
111 typedef set<LockOp, SimpleLock::ptr_lt> lock_set;
112 typedef lock_set::iterator lock_iterator;
113 lock_set locks; // full ordering
114
115 bool is_rdlocked(SimpleLock *lock) const {
116 auto it = locks.find(lock);
117 return it != locks.end() && it->is_rdlock();
118 }
119 bool is_xlocked(SimpleLock *lock) const {
120 auto it = locks.find(lock);
121 return it != locks.end() && it->is_xlock();
122 }
123 bool is_wrlocked(SimpleLock *lock) const {
124 auto it = locks.find(lock);
125 return it != locks.end() && it->is_wrlock();
126 }
127 bool is_remote_wrlocked(SimpleLock *lock) const {
128 auto it = locks.find(lock);
129 return it != locks.end() && it->is_remote_wrlock();
130 }
7c673cae
FG
131
132 // lock we are currently trying to acquire. if we give up for some reason,
133 // be sure to eval() this.
134 SimpleLock *locking = nullptr;
135 mds_rank_t locking_target_mds = -1;
136
137 // if this flag is set, do not attempt to acquire further locks.
138 // (useful for wrlock, which may be a moving auth target)
139 bool done_locking = false;
140 bool committing = false;
141 bool aborted = false;
142 bool killed = false;
143
144 // for applying projected inode changes
145 list<CInode*> projected_inodes;
146 list<CDir*> projected_fnodes;
147 list<ScatterLock*> updated_locks;
148
149 list<CInode*> dirty_cow_inodes;
150 list<pair<CDentry*,version_t> > dirty_cow_dentries;
151
152 // keep our default values synced with MDRequestParam's
153 MutationImpl() : TrackedOp(nullptr, utime_t()) {}
154 MutationImpl(OpTracker *tracker, utime_t initiated,
11fdf7f2 155 const metareqid_t &ri, __u32 att=0, mds_rank_t slave_to=MDS_RANK_NONE)
7c673cae
FG
156 : TrackedOp(tracker, initiated),
157 reqid(ri), attempt(att),
158 slave_to_mds(slave_to) { }
159 ~MutationImpl() override {
11fdf7f2
TL
160 ceph_assert(locking == NULL);
161 ceph_assert(pins.empty());
162 ceph_assert(auth_pins.empty());
7c673cae
FG
163 }
164
165 bool is_master() const { return slave_to_mds == MDS_RANK_NONE; }
166 bool is_slave() const { return slave_to_mds != MDS_RANK_NONE; }
167
168 client_t get_client() const {
169 if (reqid.name.is_client())
170 return client_t(reqid.name.num());
171 return -1;
172 }
173
174 void set_mds_stamp(utime_t t) {
175 mds_stamp = t;
176 }
177 utime_t get_mds_stamp() const {
178 return mds_stamp;
179 }
180 void set_op_stamp(utime_t t) {
181 op_stamp = t;
182 }
183 utime_t get_op_stamp() const {
184 if (op_stamp != utime_t())
185 return op_stamp;
186 return get_mds_stamp();
187 }
188
189 // pin items in cache
190 void pin(MDSCacheObject *o);
191 void unpin(MDSCacheObject *o);
192 void set_stickydirs(CInode *in);
11fdf7f2 193 void put_stickydirs();
7c673cae
FG
194 void drop_pins();
195
196 void start_locking(SimpleLock *lock, int target=-1);
197 void finish_locking(SimpleLock *lock);
198
199 // auth pins
200 bool is_auth_pinned(MDSCacheObject *object) const;
201 void auth_pin(MDSCacheObject *object);
202 void auth_unpin(MDSCacheObject *object);
203 void drop_local_auth_pins();
204 void add_projected_inode(CInode *in);
205 void pop_and_dirty_projected_inodes();
206 void add_projected_fnode(CDir *dir);
207 void pop_and_dirty_projected_fnodes();
208 void add_updated_lock(ScatterLock *lock);
209 void add_cow_inode(CInode *in);
210 void add_cow_dentry(CDentry *dn);
211 void apply();
212 void cleanup();
213
214 virtual void print(ostream &out) const {
215 out << "mutation(" << this << ")";
216 }
217
218 virtual void dump(Formatter *f) const {}
219 void _dump_op_descriptor_unlocked(ostream& stream) const override;
220};
221
222inline ostream& operator<<(ostream &out, const MutationImpl &mut)
223{
224 mut.print(out);
225 return out;
226}
227
228typedef boost::intrusive_ptr<MutationImpl> MutationRef;
229
230
231
232/** active_request_t
233 * state we track for requests we are currently processing.
234 * mostly information about locks held, so that we can drop them all
235 * the request is finished or forwarded. see request_*().
236 */
237struct MDRequestImpl : public MutationImpl {
238 Session *session;
239 elist<MDRequestImpl*>::item item_session_request; // if not on list, op is aborted.
240
241 // -- i am a client (master) request
11fdf7f2 242 MClientRequest::const_ref client_request; // client request (if any)
7c673cae
FG
243
244 // store up to two sets of dn vectors, inode pointers, for request path1 and path2.
245 vector<CDentry*> dn[2];
246 CDentry *straydn;
247 CInode *in[2];
248 snapid_t snapid;
249
250 CInode *tracei;
251 CDentry *tracedn;
252
253 inodeno_t alloc_ino, used_prealloc_ino;
254 interval_set<inodeno_t> prealloc_inos;
255
b32b8144
FG
256 int snap_caps = 0;
257 int getattr_caps = 0; ///< caps requested by getattr
258 bool no_early_reply = false;
259 bool did_early_reply = false;
260 bool o_trunc = false; ///< request is an O_TRUNC mutation
261 bool has_completed = false; ///< request has already completed
7c673cae
FG
262
263 bufferlist reply_extra_bl;
264
265 // inos we did a embedded cap release on, and may need to eval if we haven't since reissued
266 map<vinodeno_t, ceph_seq_t> cap_releases;
267
268 // -- i am a slave request
11fdf7f2 269 MMDSSlaveRequest::const_ref slave_request; // slave request (if one is pending; implies slave == true)
7c673cae
FG
270
271 // -- i am an internal op
272 int internal_op;
273 Context *internal_op_finish;
274 void *internal_op_private;
275
276 // indicates how may retries of request have been made
277 int retry;
278
279 // indicator for vxattr osdmap update
280 bool waited_for_osdmap;
281
282 // break rarely-used fields into a separately allocated structure
283 // to save memory for most ops
284 struct More {
91327a77 285 int slave_error = 0;
7c673cae
FG
286 set<mds_rank_t> slaves; // mds nodes that have slave requests to me (implies client_request)
287 set<mds_rank_t> waiting_on_slave; // peers i'm waiting for slavereq replies from.
288
289 // for rename/link/unlink
290 set<mds_rank_t> witnessed; // nodes who have journaled a RenamePrepare
291 map<MDSCacheObject*,version_t> pvmap;
292
91327a77
AA
293 bool has_journaled_slaves = false;
294 bool slave_update_journaled = false;
295 bool slave_rolling_back = false;
7c673cae
FG
296
297 // for rename
298 set<mds_rank_t> extra_witnesses; // replica list from srcdn auth (rename)
91327a77 299 mds_rank_t srcdn_auth_mds = MDS_RANK_NONE;
7c673cae 300 bufferlist inode_import;
91327a77
AA
301 version_t inode_import_v = 0;
302 CInode* rename_inode = nullptr;
303 bool is_freeze_authpin = false;
304 bool is_ambiguous_auth = false;
305 bool is_remote_frozen_authpin = false;
306 bool is_inode_exporter = false;
7c673cae 307
28e407b8 308 map<client_t, pair<Session*, uint64_t> > imported_session_map;
7c673cae
FG
309 map<CInode*, map<client_t,Capability::Export> > cap_imports;
310
311 // for lock/flock
91327a77 312 bool flock_was_waiting = false;
7c673cae
FG
313
314 // for snaps
91327a77 315 version_t stid = 0;
7c673cae
FG
316 bufferlist snapidbl;
317
11fdf7f2
TL
318 sr_t *srci_srnode = nullptr;
319 sr_t *desti_srnode = nullptr;
320
7c673cae 321 // called when slave commits or aborts
91327a77 322 Context *slave_commit = nullptr;
7c673cae
FG
323 bufferlist rollback_bl;
324
11fdf7f2 325 MDSContext::vec waiting_for_finish;
7c673cae
FG
326
327 // export & fragment
91327a77 328 CDir* export_dir = nullptr;
7c673cae
FG
329 dirfrag_t fragment_base;
330
331 // for internal ops doing lookup
332 filepath filepath1;
333 filepath filepath2;
334
91327a77 335 More() {}
7c673cae
FG
336 } *_more;
337
338
339 // ---------------------------------------------------
340 struct Params {
341 metareqid_t reqid;
342 __u32 attempt;
11fdf7f2
TL
343 MClientRequest::const_ref client_req;
344 Message::const_ref triggering_slave_req;
7c673cae
FG
345 mds_rank_t slave_to;
346 utime_t initiated;
347 utime_t throttled, all_read, dispatched;
348 int internal_op;
349 // keep these default values synced to MutationImpl's
11fdf7f2
TL
350 Params() : attempt(0), slave_to(MDS_RANK_NONE), internal_op(-1) {}
351 const utime_t& get_recv_stamp() const {
352 return initiated;
353 }
354 const utime_t& get_throttle_stamp() const {
355 return throttled;
356 }
357 const utime_t& get_recv_complete_stamp() const {
358 return all_read;
359 }
360 const utime_t& get_dispatch_stamp() const {
361 return dispatched;
362 }
7c673cae 363 };
11fdf7f2
TL
364 MDRequestImpl(const Params* params, OpTracker *tracker) :
365 MutationImpl(tracker, params->initiated,
366 params->reqid, params->attempt, params->slave_to),
7c673cae 367 session(NULL), item_session_request(this),
11fdf7f2 368 client_request(params->client_req), straydn(NULL), snapid(CEPH_NOSNAP),
7c673cae 369 tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0),
11fdf7f2 370 internal_op(params->internal_op), internal_op_finish(NULL),
7c673cae
FG
371 internal_op_private(NULL),
372 retry(0),
373 waited_for_osdmap(false), _more(NULL) {
374 in[0] = in[1] = NULL;
7c673cae
FG
375 }
376 ~MDRequestImpl() override;
377
378 More* more();
379 bool has_more() const;
380 bool has_witnesses();
381 bool slave_did_prepare();
382 bool slave_rolling_back();
383 bool did_ino_allocation() const;
384 bool freeze_auth_pin(CInode *inode);
385 void unfreeze_auth_pin(bool clear_inode=false);
386 void set_remote_frozen_auth_pin(CInode *inode);
387 bool can_auth_pin(MDSCacheObject *object);
388 void drop_local_auth_pins();
389 void set_ambiguous_auth(CInode *inode);
390 void clear_ambiguous_auth();
391 const filepath& get_filepath();
392 const filepath& get_filepath2();
393 void set_filepath(const filepath& fp);
394 void set_filepath2(const filepath& fp);
b32b8144 395 bool is_queued_for_replay() const;
7c673cae
FG
396
397 void print(ostream &out) const override;
398 void dump(Formatter *f) const override;
399
11fdf7f2
TL
400 MClientRequest::const_ref release_client_request();
401 void reset_slave_request(const MMDSSlaveRequest::const_ref& req=nullptr);
91327a77 402
7c673cae
FG
403 // TrackedOp stuff
404 typedef boost::intrusive_ptr<MDRequestImpl> Ref;
405protected:
406 void _dump(Formatter *f) const override;
407 void _dump_op_descriptor_unlocked(ostream& stream) const override;
91327a77 408private:
11fdf7f2 409 mutable ceph::spinlock msg_lock;
7c673cae
FG
410};
411
412typedef boost::intrusive_ptr<MDRequestImpl> MDRequestRef;
413
414
415struct MDSlaveUpdate {
416 int origop;
417 bufferlist rollback;
418 elist<MDSlaveUpdate*>::item item;
419 Context *waiter;
420 set<CInode*> olddirs;
421 set<CInode*> unlinked;
422 MDSlaveUpdate(int oo, bufferlist &rbl, elist<MDSlaveUpdate*> &list) :
423 origop(oo),
424 item(this),
425 waiter(0) {
426 rollback.claim(rbl);
427 list.push_back(&item);
428 }
429 ~MDSlaveUpdate() {
430 item.remove_myself();
431 if (waiter)
432 waiter->complete(0);
433 }
434};
435
436
437#endif