1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_MDS_MUTATION_H
16 #define CEPH_MDS_MUTATION_H
18 #include "include/interval_set.h"
19 #include "include/elist.h"
20 #include "include/filepath.h"
22 #include "MDSCacheObject.h"
24 #include "SimpleLock.h"
25 #include "Capability.h"
27 #include "common/TrackedOp.h"
37 class MMDSSlaveRequest
;
39 struct MutationImpl
: public TrackedOp
{
41 __u32 attempt
= 0; // which attempt for this request
42 LogSegment
*ls
= nullptr; // the log segment i'm committing to
45 utime_t mds_stamp
; ///< mds-local timestamp (real time)
46 utime_t op_stamp
; ///< op timestamp (client provided)
49 // flag mutation as slave
50 mds_rank_t slave_to_mds
= MDS_RANK_NONE
; // this is a slave request if >= 0.
52 // -- my pins and locks --
53 // cache pins (so things don't expire)
54 set
< MDSCacheObject
* > pins
;
55 set
<CInode
*> stickydirs
;
58 map
<MDSCacheObject
*, mds_rank_t
> remote_auth_pins
;
59 set
< MDSCacheObject
* > auth_pins
;
62 set
< SimpleLock
* > rdlocks
; // always local.
63 set
< SimpleLock
* > wrlocks
; // always local.
64 map
< SimpleLock
*, mds_rank_t
> remote_wrlocks
;
65 set
< SimpleLock
* > xlocks
; // local or remote.
66 set
< SimpleLock
*, SimpleLock::ptr_lt
> locks
; // full ordering
68 // lock we are currently trying to acquire. if we give up for some reason,
69 // be sure to eval() this.
70 SimpleLock
*locking
= nullptr;
71 mds_rank_t locking_target_mds
= -1;
73 // if this flag is set, do not attempt to acquire further locks.
74 // (useful for wrlock, which may be a moving auth target)
75 bool done_locking
= false;
76 bool committing
= false;
80 // for applying projected inode changes
81 list
<CInode
*> projected_inodes
;
82 list
<CDir
*> projected_fnodes
;
83 list
<ScatterLock
*> updated_locks
;
85 list
<CInode
*> dirty_cow_inodes
;
86 list
<pair
<CDentry
*,version_t
> > dirty_cow_dentries
;
88 // keep our default values synced with MDRequestParam's
89 MutationImpl() : TrackedOp(nullptr, utime_t()) {}
90 MutationImpl(OpTracker
*tracker
, utime_t initiated
,
91 metareqid_t ri
, __u32 att
=0, mds_rank_t slave_to
=MDS_RANK_NONE
)
92 : TrackedOp(tracker
, initiated
),
93 reqid(ri
), attempt(att
),
94 slave_to_mds(slave_to
) { }
95 ~MutationImpl() override
{
96 assert(locking
== NULL
);
98 assert(auth_pins
.empty());
99 assert(xlocks
.empty());
100 assert(rdlocks
.empty());
101 assert(wrlocks
.empty());
102 assert(remote_wrlocks
.empty());
105 bool is_master() const { return slave_to_mds
== MDS_RANK_NONE
; }
106 bool is_slave() const { return slave_to_mds
!= MDS_RANK_NONE
; }
108 client_t
get_client() const {
109 if (reqid
.name
.is_client())
110 return client_t(reqid
.name
.num());
114 void set_mds_stamp(utime_t t
) {
117 utime_t
get_mds_stamp() const {
120 void set_op_stamp(utime_t t
) {
123 utime_t
get_op_stamp() const {
124 if (op_stamp
!= utime_t())
126 return get_mds_stamp();
129 // pin items in cache
130 void pin(MDSCacheObject
*o
);
131 void unpin(MDSCacheObject
*o
);
132 void set_stickydirs(CInode
*in
);
135 void start_locking(SimpleLock
*lock
, int target
=-1);
136 void finish_locking(SimpleLock
*lock
);
139 bool is_auth_pinned(MDSCacheObject
*object
) const;
140 void auth_pin(MDSCacheObject
*object
);
141 void auth_unpin(MDSCacheObject
*object
);
142 void drop_local_auth_pins();
143 void add_projected_inode(CInode
*in
);
144 void pop_and_dirty_projected_inodes();
145 void add_projected_fnode(CDir
*dir
);
146 void pop_and_dirty_projected_fnodes();
147 void add_updated_lock(ScatterLock
*lock
);
148 void add_cow_inode(CInode
*in
);
149 void add_cow_dentry(CDentry
*dn
);
153 virtual void print(ostream
&out
) const {
154 out
<< "mutation(" << this << ")";
157 virtual void dump(Formatter
*f
) const {}
158 void _dump_op_descriptor_unlocked(ostream
& stream
) const override
;
161 inline ostream
& operator<<(ostream
&out
, const MutationImpl
&mut
)
167 typedef boost::intrusive_ptr
<MutationImpl
> MutationRef
;
172 * state we track for requests we are currently processing.
173 * mostly information about locks held, so that we can drop them all
174 * the request is finished or forwarded. see request_*().
176 struct MDRequestImpl
: public MutationImpl
{
178 elist
<MDRequestImpl
*>::item item_session_request
; // if not on list, op is aborted.
180 // -- i am a client (master) request
181 MClientRequest
*client_request
; // client request (if any)
183 // store up to two sets of dn vectors, inode pointers, for request path1 and path2.
184 vector
<CDentry
*> dn
[2];
192 inodeno_t alloc_ino
, used_prealloc_ino
;
193 interval_set
<inodeno_t
> prealloc_inos
;
196 int getattr_caps
= 0; ///< caps requested by getattr
197 bool no_early_reply
= false;
198 bool did_early_reply
= false;
199 bool o_trunc
= false; ///< request is an O_TRUNC mutation
200 bool has_completed
= false; ///< request has already completed
202 bufferlist reply_extra_bl
;
204 // inos we did a embedded cap release on, and may need to eval if we haven't since reissued
205 map
<vinodeno_t
, ceph_seq_t
> cap_releases
;
207 // -- i am a slave request
208 MMDSSlaveRequest
*slave_request
; // slave request (if one is pending; implies slave == true)
210 // -- i am an internal op
212 Context
*internal_op_finish
;
213 void *internal_op_private
;
215 // indicates how may retries of request have been made
218 // indicator for vxattr osdmap update
219 bool waited_for_osdmap
;
221 // break rarely-used fields into a separately allocated structure
222 // to save memory for most ops
225 set
<mds_rank_t
> slaves
; // mds nodes that have slave requests to me (implies client_request)
226 set
<mds_rank_t
> waiting_on_slave
; // peers i'm waiting for slavereq replies from.
228 // for rename/link/unlink
229 set
<mds_rank_t
> witnessed
; // nodes who have journaled a RenamePrepare
230 map
<MDSCacheObject
*,version_t
> pvmap
;
232 bool has_journaled_slaves
;
233 bool slave_update_journaled
;
234 bool slave_rolling_back
;
237 set
<mds_rank_t
> extra_witnesses
; // replica list from srcdn auth (rename)
238 mds_rank_t srcdn_auth_mds
;
239 bufferlist inode_import
;
240 version_t inode_import_v
;
241 CInode
* rename_inode
;
242 bool is_freeze_authpin
;
243 bool is_ambiguous_auth
;
244 bool is_remote_frozen_authpin
;
245 bool is_inode_exporter
;
247 map
<client_t
, pair
<Session
*, uint64_t> > imported_session_map
;
248 map
<CInode
*, map
<client_t
,Capability::Export
> > cap_imports
;
251 bool flock_was_waiting
;
257 // called when slave commits or aborts
258 Context
*slave_commit
;
259 bufferlist rollback_bl
;
261 list
<MDSInternalContextBase
*> waiting_for_finish
;
265 dirfrag_t fragment_base
;
267 // for internal ops doing lookup
273 has_journaled_slaves(false), slave_update_journaled(false),
274 slave_rolling_back(false),
275 srcdn_auth_mds(-1), inode_import_v(0), rename_inode(0),
276 is_freeze_authpin(false), is_ambiguous_auth(false),
277 is_remote_frozen_authpin(false), is_inode_exporter(false),
278 flock_was_waiting(false), stid(0), slave_commit(0), export_dir(NULL
) { }
282 // ---------------------------------------------------
286 MClientRequest
*client_req
;
287 class Message
*triggering_slave_req
;
290 utime_t throttled
, all_read
, dispatched
;
292 // keep these default values synced to MutationImpl's
293 Params() : attempt(0), client_req(NULL
),
294 triggering_slave_req(NULL
), slave_to(MDS_RANK_NONE
), internal_op(-1) {}
296 MDRequestImpl(const Params
& params
, OpTracker
*tracker
) :
297 MutationImpl(tracker
, params
.initiated
,
298 params
.reqid
, params
.attempt
, params
.slave_to
),
299 session(NULL
), item_session_request(this),
300 client_request(params
.client_req
), straydn(NULL
), snapid(CEPH_NOSNAP
),
301 tracei(NULL
), tracedn(NULL
), alloc_ino(0), used_prealloc_ino(0),
302 slave_request(NULL
), internal_op(params
.internal_op
), internal_op_finish(NULL
),
303 internal_op_private(NULL
),
305 waited_for_osdmap(false), _more(NULL
) {
306 in
[0] = in
[1] = NULL
;
307 if (!params
.throttled
.is_zero())
308 mark_event("throttled", params
.throttled
);
309 if (!params
.all_read
.is_zero())
310 mark_event("all_read", params
.all_read
);
311 if (!params
.dispatched
.is_zero())
312 mark_event("dispatched", params
.dispatched
);
314 ~MDRequestImpl() override
;
317 bool has_more() const;
318 bool has_witnesses();
319 bool slave_did_prepare();
320 bool slave_rolling_back();
321 bool did_ino_allocation() const;
322 bool freeze_auth_pin(CInode
*inode
);
323 void unfreeze_auth_pin(bool clear_inode
=false);
324 void set_remote_frozen_auth_pin(CInode
*inode
);
325 bool can_auth_pin(MDSCacheObject
*object
);
326 void drop_local_auth_pins();
327 void set_ambiguous_auth(CInode
*inode
);
328 void clear_ambiguous_auth();
329 const filepath
& get_filepath();
330 const filepath
& get_filepath2();
331 void set_filepath(const filepath
& fp
);
332 void set_filepath2(const filepath
& fp
);
333 bool is_queued_for_replay() const;
335 void print(ostream
&out
) const override
;
336 void dump(Formatter
*f
) const override
;
339 typedef boost::intrusive_ptr
<MDRequestImpl
> Ref
;
341 void _dump(Formatter
*f
) const override
;
342 void _dump_op_descriptor_unlocked(ostream
& stream
) const override
;
345 typedef boost::intrusive_ptr
<MDRequestImpl
> MDRequestRef
;
348 struct MDSlaveUpdate
{
351 elist
<MDSlaveUpdate
*>::item item
;
353 set
<CInode
*> olddirs
;
354 set
<CInode
*> unlinked
;
355 MDSlaveUpdate(int oo
, bufferlist
&rbl
, elist
<MDSlaveUpdate
*> &list
) :
360 list
.push_back(&item
);
363 item
.remove_myself();