1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_MDS_MUTATION_H
16 #define CEPH_MDS_MUTATION_H
18 #include "include/interval_set.h"
19 #include "include/elist.h"
20 #include "include/filepath.h"
22 #include "MDSCacheObject.h"
23 #include "MDSContext.h"
25 #include "SimpleLock.h"
26 #include "Capability.h"
29 #include "common/TrackedOp.h"
30 #include "messages/MClientRequest.h"
31 #include "messages/MMDSPeerRequest.h"
32 #include "messages/MClientReply.h"
43 struct MutationImpl
: public TrackedOp
{
45 // -- my pins and auth_pins --
48 bool auth_pinned
= false;
49 mds_rank_t remote_auth_pinned
= MDS_RANK_NONE
;
59 STATE_PIN
= 16, // no RW after locked, just pin lock state
62 LockOp(SimpleLock
*l
, unsigned f
=0, mds_rank_t t
=MDS_RANK_NONE
) :
63 lock(l
), flags(f
), wrlock_target(t
) {}
65 bool is_rdlock() const { return !!(flags
& RDLOCK
); }
66 bool is_xlock() const { return !!(flags
& XLOCK
); }
67 bool is_wrlock() const { return !!(flags
& WRLOCK
); }
68 void clear_wrlock() const { flags
&= ~WRLOCK
; }
69 bool is_remote_wrlock() const { return !!(flags
& REMOTE_WRLOCK
); }
70 void clear_remote_wrlock() const {
71 flags
&= ~REMOTE_WRLOCK
;
72 wrlock_target
= MDS_RANK_NONE
;
74 bool is_state_pin() const { return !!(flags
& STATE_PIN
); }
75 bool operator<(const LockOp
& r
) const {
80 mutable unsigned flags
;
81 mutable mds_rank_t wrlock_target
;
84 struct LockOpVec
: public std::vector
<LockOp
> {
89 void add_rdlock(SimpleLock
*lock
) {
90 emplace_back(lock
, LockOp::RDLOCK
);
92 void erase_rdlock(SimpleLock
*lock
);
93 void add_xlock(SimpleLock
*lock
, int idx
=-1) {
95 emplace(cbegin() + idx
, lock
, LockOp::XLOCK
);
97 emplace_back(lock
, LockOp::XLOCK
);
99 void add_wrlock(SimpleLock
*lock
, int idx
=-1) {
101 emplace(cbegin() + idx
, lock
, LockOp::WRLOCK
);
103 emplace_back(lock
, LockOp::WRLOCK
);
105 void add_remote_wrlock(SimpleLock
*lock
, mds_rank_t rank
) {
106 ceph_assert(rank
!= MDS_RANK_NONE
);
107 emplace_back(lock
, LockOp::REMOTE_WRLOCK
, rank
);
109 void lock_scatter_gather(SimpleLock
*lock
) {
110 emplace_back(lock
, LockOp::WRLOCK
| LockOp::STATE_PIN
);
112 void sort_and_merge();
115 using lock_set
= std::set
<LockOp
>;
116 using lock_iterator
= lock_set::iterator
;
118 // keep our default values synced with MDRequestParam's
119 MutationImpl() : TrackedOp(nullptr, utime_t()) {}
120 MutationImpl(OpTracker
*tracker
, utime_t initiated
,
121 const metareqid_t
&ri
, __u32 att
=0, mds_rank_t peer_to
=MDS_RANK_NONE
)
122 : TrackedOp(tracker
, initiated
),
123 reqid(ri
), attempt(att
),
124 peer_to_mds(peer_to
) {}
125 ~MutationImpl() override
{
126 ceph_assert(!locking
);
127 ceph_assert(!lock_cache
);
128 ceph_assert(num_pins
== 0);
129 ceph_assert(num_auth_pins
== 0);
132 const ObjectState
* find_object_state(MDSCacheObject
*obj
) const {
133 auto it
= object_states
.find(obj
);
134 return it
!= object_states
.end() ? &it
->second
: nullptr;
137 bool is_any_remote_auth_pin() const { return num_remote_auth_pins
> 0; }
139 void disable_lock_cache() {
140 lock_cache_disabled
= true;
143 lock_iterator
emplace_lock(SimpleLock
*l
, unsigned f
=0, mds_rank_t t
=MDS_RANK_NONE
) {
145 return locks
.emplace(l
, f
, t
).first
;
148 bool is_rdlocked(SimpleLock
*lock
) const;
149 bool is_wrlocked(SimpleLock
*lock
) const;
150 bool is_xlocked(SimpleLock
*lock
) const {
151 auto it
= locks
.find(lock
);
152 return it
!= locks
.end() && it
->is_xlock();
154 bool is_remote_wrlocked(SimpleLock
*lock
) const {
155 auto it
= locks
.find(lock
);
156 return it
!= locks
.end() && it
->is_remote_wrlock();
158 bool is_last_locked(SimpleLock
*lock
) const {
159 return lock
== last_locked
;
162 bool is_leader() const { return peer_to_mds
== MDS_RANK_NONE
; }
163 bool is_peer() const { return peer_to_mds
!= MDS_RANK_NONE
; }
165 client_t
get_client() const {
166 if (reqid
.name
.is_client())
167 return client_t(reqid
.name
.num());
171 void set_mds_stamp(utime_t t
) {
174 utime_t
get_mds_stamp() const {
177 void set_op_stamp(utime_t t
) {
180 utime_t
get_op_stamp() const {
181 if (op_stamp
!= utime_t())
183 return get_mds_stamp();
186 // pin items in cache
187 void pin(MDSCacheObject
*object
);
188 void unpin(MDSCacheObject
*object
);
189 void set_stickydirs(CInode
*in
);
190 void put_stickydirs();
193 void start_locking(SimpleLock
*lock
, int target
=-1);
194 void finish_locking(SimpleLock
*lock
);
197 bool is_auth_pinned(MDSCacheObject
*object
) const;
198 void auth_pin(MDSCacheObject
*object
);
199 void auth_unpin(MDSCacheObject
*object
);
200 void drop_local_auth_pins();
201 void set_remote_auth_pinned(MDSCacheObject
* object
, mds_rank_t from
);
202 void _clear_remote_auth_pinned(ObjectState
& stat
);
204 void add_projected_node(MDSCacheObject
* obj
) {
205 projected_nodes
.insert(obj
);
207 void remove_projected_node(MDSCacheObject
* obj
) {
208 projected_nodes
.erase(obj
);
210 bool is_projected(MDSCacheObject
*obj
) const {
211 return projected_nodes
.count(obj
);
213 void add_updated_lock(ScatterLock
*lock
);
214 void add_cow_inode(CInode
*in
);
215 void add_cow_dentry(CDentry
*dn
);
219 virtual void print(std::ostream
&out
) const {
220 out
<< "mutation(" << this << ")";
223 virtual void dump(ceph::Formatter
*f
) const {}
224 void _dump_op_descriptor_unlocked(std::ostream
& stream
) const override
;
227 __u32 attempt
= 0; // which attempt for this request
228 LogSegment
*ls
= nullptr; // the log segment i'm committing to
230 // flag mutation as peer
231 mds_rank_t peer_to_mds
= MDS_RANK_NONE
; // this is a peer request if >= 0.
233 ceph::unordered_map
<MDSCacheObject
*, ObjectState
> object_states
;
235 int num_auth_pins
= 0;
236 int num_remote_auth_pins
= 0;
237 // cache pins (so things don't expire)
238 CInode
* stickydiri
= nullptr;
240 lock_set locks
; // full ordering
241 MDLockCache
* lock_cache
= nullptr;
242 bool lock_cache_disabled
= false;
243 SimpleLock
*last_locked
= nullptr;
244 // Lock we are currently trying to acquire. If we give up for some reason,
245 // be sure to eval() this.
246 SimpleLock
*locking
= nullptr;
247 mds_rank_t locking_target_mds
= -1;
249 // if this flag is set, do not attempt to acquire further locks.
250 // (useful for wrlock, which may be a moving auth target)
257 int locking_state
= 0;
259 bool committing
= false;
260 bool aborted
= false;
263 // for applying projected inode changes
264 std::set
<MDSCacheObject
*> projected_nodes
;
265 std::list
<ScatterLock
*> updated_locks
;
267 std::list
<CInode
*> dirty_cow_inodes
;
268 std::list
<std::pair
<CDentry
*,version_t
> > dirty_cow_dentries
;
271 utime_t mds_stamp
; ///< mds-local timestamp (real time)
272 utime_t op_stamp
; ///< op timestamp (client provided)
276 * MDRequestImpl: state we track for requests we are currently processing.
277 * mostly information about locks held, so that we can drop them all
278 * the request is finished or forwarded. see request_*().
280 struct MDRequestImpl
: public MutationImpl
{
282 typedef boost::intrusive_ptr
<MDRequestImpl
> Ref
;
284 // break rarely-used fields into a separately allocated structure
285 // to save memory for most ops
290 std::set
<mds_rank_t
> peers
; // mds nodes that have peer requests to me (implies client_request)
291 std::set
<mds_rank_t
> waiting_on_peer
; // peers i'm waiting for peerreq replies from.
293 // for rename/link/unlink
294 std::set
<mds_rank_t
> witnessed
; // nodes who have journaled a RenamePrepare
295 std::map
<MDSCacheObject
*,version_t
> pvmap
;
297 bool has_journaled_peers
= false;
298 bool peer_update_journaled
= false;
299 bool peer_rolling_back
= false;
302 std::set
<mds_rank_t
> extra_witnesses
; // replica list from srcdn auth (rename)
303 mds_rank_t srcdn_auth_mds
= MDS_RANK_NONE
;
304 ceph::buffer::list inode_import
;
305 version_t inode_import_v
= 0;
306 CInode
* rename_inode
= nullptr;
307 bool is_freeze_authpin
= false;
308 bool is_ambiguous_auth
= false;
309 bool is_remote_frozen_authpin
= false;
310 bool is_inode_exporter
= false;
312 std::map
<client_t
, std::pair
<Session
*, uint64_t> > imported_session_map
;
313 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> > cap_imports
;
316 bool flock_was_waiting
= false;
320 ceph::buffer::list snapidbl
;
322 sr_t
*srci_srnode
= nullptr;
323 sr_t
*desti_srnode
= nullptr;
325 // called when peer commits or aborts
326 Context
*peer_commit
= nullptr;
327 ceph::buffer::list rollback_bl
;
329 MDSContext::vec waiting_for_finish
;
332 CDir
* export_dir
= nullptr;
333 dirfrag_t fragment_base
;
335 // for internal ops doing lookup
340 // ---------------------------------------------------
342 // keep these default values synced to MutationImpl's
344 const utime_t
& get_recv_stamp() const {
347 const utime_t
& get_throttle_stamp() const {
350 const utime_t
& get_recv_complete_stamp() const {
353 const utime_t
& get_dispatch_stamp() const {
358 ceph::cref_t
<MClientRequest
> client_req
;
359 ceph::cref_t
<Message
> triggering_peer_req
;
360 mds_rank_t peer_to
= MDS_RANK_NONE
;
362 utime_t throttled
, all_read
, dispatched
;
363 int internal_op
= -1;
365 MDRequestImpl(const Params
* params
, OpTracker
*tracker
) :
366 MutationImpl(tracker
, params
->initiated
,
367 params
->reqid
, params
->attempt
, params
->peer_to
),
368 item_session_request(this), client_request(params
->client_req
),
369 internal_op(params
->internal_op
) {}
370 ~MDRequestImpl() override
;
373 bool has_more() const;
374 bool has_witnesses();
375 bool peer_did_prepare();
376 bool peer_rolling_back();
377 bool freeze_auth_pin(CInode
*inode
);
378 void unfreeze_auth_pin(bool clear_inode
=false);
379 void set_remote_frozen_auth_pin(CInode
*inode
);
380 bool can_auth_pin(MDSCacheObject
*object
);
381 void drop_local_auth_pins();
382 void set_ambiguous_auth(CInode
*inode
);
383 void clear_ambiguous_auth();
384 const filepath
& get_filepath();
385 const filepath
& get_filepath2();
386 void set_filepath(const filepath
& fp
);
387 void set_filepath2(const filepath
& fp
);
388 bool is_queued_for_replay() const;
392 bool is_batch_head() {
393 return batch_op_map
!= nullptr;
395 std::unique_ptr
<BatchOp
> release_batch_op();
397 void print(std::ostream
&out
) const override
;
398 void dump(ceph::Formatter
*f
) const override
;
400 ceph::cref_t
<MClientRequest
> release_client_request();
401 void reset_peer_request(const ceph::cref_t
<MMDSPeerRequest
>& req
=nullptr);
403 Session
*session
= nullptr;
404 elist
<MDRequestImpl
*>::item item_session_request
; // if not on list, op is aborted.
406 // -- i am a client (leader) request
407 ceph::cref_t
<MClientRequest
> client_request
; // client request (if any)
409 // tree and depth info of path1 and path2
410 inodeno_t dir_root
[2] = {0, 0};
411 int dir_depth
[2] = {-1, -1};
412 file_layout_t dir_layout
;
413 // store up to two sets of dn vectors, inode pointers, for request path1 and path2.
414 std::vector
<CDentry
*> dn
[2];
416 CDentry
*straydn
= nullptr;
417 snapid_t snapid
= CEPH_NOSNAP
;
419 CInode
*tracei
= nullptr;
420 CDentry
*tracedn
= nullptr;
422 inodeno_t alloc_ino
= 0, used_prealloc_ino
= 0;
423 interval_set
<inodeno_t
> prealloc_inos
;
426 int getattr_caps
= 0; ///< caps requested by getattr
427 bool no_early_reply
= false;
428 bool did_early_reply
= false;
429 bool o_trunc
= false; ///< request is an O_TRUNC mutation
430 bool has_completed
= false; ///< request has already completed
432 ceph::buffer::list reply_extra_bl
;
434 // inos we did a embedded cap release on, and may need to eval if we haven't since reissued
435 std::map
<vinodeno_t
, ceph_seq_t
> cap_releases
;
437 // -- i am a peer request
438 ceph::cref_t
<MMDSPeerRequest
> peer_request
; // peer request (if one is pending; implies peer == true)
440 // -- i am an internal op
442 Context
*internal_op_finish
= nullptr;
443 void *internal_op_private
= nullptr;
445 // indicates how may retries of request have been made
448 std::map
<int, std::unique_ptr
<BatchOp
> > *batch_op_map
= nullptr;
450 // indicator for vxattr osdmap update
451 bool waited_for_osdmap
= false;
454 void _dump(ceph::Formatter
*f
) const override
;
455 void _dump_op_descriptor_unlocked(std::ostream
& stream
) const override
;
457 mutable ceph::spinlock msg_lock
;
460 struct MDPeerUpdate
{
461 MDPeerUpdate(int oo
, ceph::buffer::list
&rbl
) :
463 rollback
= std::move(rbl
);
470 ceph::buffer::list rollback
;
471 Context
*waiter
= nullptr;
472 std::set
<CInode
*> olddirs
;
473 std::set
<CInode
*> unlinked
;
476 struct MDLockCacheItem
{
477 MDLockCache
*parent
= nullptr;
478 elist
<MDLockCacheItem
*>::item item_lock
;
481 struct MDLockCache
: public MutationImpl
{
482 using LockItem
= MDLockCacheItem
;
485 MDLockCache
*parent
= nullptr;
486 elist
<DirItem
*>::item item_dir
;
489 MDLockCache(Capability
*cap
, int op
) :
490 MutationImpl(), diri(cap
->get_inode()), client_cap(cap
), opcode(op
) {
491 client_cap
->lock_caches
.push_back(&item_cap_lock_cache
);
494 CInode
*get_dir_inode() { return diri
; }
495 void set_dir_layout(file_layout_t
& layout
) {
498 const file_layout_t
& get_dir_layout() const {
503 void attach_dirfrags(std::vector
<CDir
*>&& dfv
);
505 void detach_dirfrags();
508 Capability
*client_cap
;
510 file_layout_t dir_layout
;
512 elist
<MDLockCache
*>::item item_cap_lock_cache
;
514 // link myself to locked locks
515 std::unique_ptr
<LockItem
[]> items_lock
;
517 // link myself to auth-pinned dirfrags
518 std::unique_ptr
<DirItem
[]> items_dir
;
519 std::vector
<CDir
*> auth_pinned_dirfrags
;
522 bool invalidating
= false;
525 typedef boost::intrusive_ptr
<MutationImpl
> MutationRef
;
526 typedef boost::intrusive_ptr
<MDRequestImpl
> MDRequestRef
;
528 inline std::ostream
& operator<<(std::ostream
&out
, const MutationImpl
&mut
)