]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/Mutation.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / mds / Mutation.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_MDS_MUTATION_H
16#define CEPH_MDS_MUTATION_H
17
18#include "include/interval_set.h"
19#include "include/elist.h"
20#include "include/filepath.h"
21
22#include "MDSCacheObject.h"
11fdf7f2 23#include "MDSContext.h"
7c673cae
FG
24
25#include "SimpleLock.h"
26#include "Capability.h"
f91f0fd5 27#include "BatchOp.h"
7c673cae
FG
28
29#include "common/TrackedOp.h"
11fdf7f2 30#include "messages/MClientRequest.h"
f67539c2 31#include "messages/MMDSPeerRequest.h"
9f95a23c 32#include "messages/MClientReply.h"
7c673cae
FG
33
34class LogSegment;
7c673cae
FG
35class CInode;
36class CDir;
37class CDentry;
38class Session;
39class ScatterLock;
11fdf7f2 40struct sr_t;
9f95a23c 41struct MDLockCache;
7c673cae
FG
42
43struct MutationImpl : public TrackedOp {
7c673cae 44public:
9f95a23c
TL
45 // -- my pins and auth_pins --
46 struct ObjectState {
47 bool pinned = false;
48 bool auth_pinned = false;
49 mds_rank_t remote_auth_pinned = MDS_RANK_NONE;
50 };
7c673cae 51
7c673cae 52 // held locks
11fdf7f2
TL
53 struct LockOp {
54 enum {
55 RDLOCK = 1,
56 WRLOCK = 2,
57 XLOCK = 4,
58 REMOTE_WRLOCK = 8,
9f95a23c 59 STATE_PIN = 16, // no RW after locked, just pin lock state
11fdf7f2 60 };
9f95a23c 61
11fdf7f2
TL
62 LockOp(SimpleLock *l, unsigned f=0, mds_rank_t t=MDS_RANK_NONE) :
63 lock(l), flags(f), wrlock_target(t) {}
9f95a23c 64
11fdf7f2
TL
65 bool is_rdlock() const { return !!(flags & RDLOCK); }
66 bool is_xlock() const { return !!(flags & XLOCK); }
67 bool is_wrlock() const { return !!(flags & WRLOCK); }
68 void clear_wrlock() const { flags &= ~WRLOCK; }
69 bool is_remote_wrlock() const { return !!(flags & REMOTE_WRLOCK); }
70 void clear_remote_wrlock() const {
71 flags &= ~REMOTE_WRLOCK;
72 wrlock_target = MDS_RANK_NONE;
73 }
9f95a23c
TL
74 bool is_state_pin() const { return !!(flags & STATE_PIN); }
75 bool operator<(const LockOp& r) const {
76 return lock < r.lock;
77 }
78
79 SimpleLock* lock;
80 mutable unsigned flags;
81 mutable mds_rank_t wrlock_target;
11fdf7f2
TL
82 };
83
f67539c2 84 struct LockOpVec : public std::vector<LockOp> {
9f95a23c
TL
85 LockOpVec() {
86 reserve(32);
87 }
88
11fdf7f2
TL
89 void add_rdlock(SimpleLock *lock) {
90 emplace_back(lock, LockOp::RDLOCK);
91 }
92 void erase_rdlock(SimpleLock *lock);
9f95a23c
TL
93 void add_xlock(SimpleLock *lock, int idx=-1) {
94 if (idx >= 0)
95 emplace(cbegin() + idx, lock, LockOp::XLOCK);
96 else
97 emplace_back(lock, LockOp::XLOCK);
11fdf7f2 98 }
9f95a23c
TL
99 void add_wrlock(SimpleLock *lock, int idx=-1) {
100 if (idx >= 0)
101 emplace(cbegin() + idx, lock, LockOp::WRLOCK);
102 else
103 emplace_back(lock, LockOp::WRLOCK);
11fdf7f2
TL
104 }
105 void add_remote_wrlock(SimpleLock *lock, mds_rank_t rank) {
106 ceph_assert(rank != MDS_RANK_NONE);
107 emplace_back(lock, LockOp::REMOTE_WRLOCK, rank);
108 }
9f95a23c
TL
109 void lock_scatter_gather(SimpleLock *lock) {
110 emplace_back(lock, LockOp::WRLOCK | LockOp::STATE_PIN);
11fdf7f2 111 }
9f95a23c 112 void sort_and_merge();
11fdf7f2 113 };
11fdf7f2 114
f67539c2 115 using lock_set = std::set<LockOp>;
9f95a23c
TL
116 using lock_iterator = lock_set::iterator;
117
118 // keep our default values synced with MDRequestParam's
20effc67 119 MutationImpl() : TrackedOp(nullptr, ceph_clock_now()) {}
9f95a23c 120 MutationImpl(OpTracker *tracker, utime_t initiated,
f67539c2 121 const metareqid_t &ri, __u32 att=0, mds_rank_t peer_to=MDS_RANK_NONE)
9f95a23c
TL
122 : TrackedOp(tracker, initiated),
123 reqid(ri), attempt(att),
f67539c2 124 peer_to_mds(peer_to) {}
9f95a23c
TL
125 ~MutationImpl() override {
126 ceph_assert(!locking);
127 ceph_assert(!lock_cache);
128 ceph_assert(num_pins == 0);
129 ceph_assert(num_auth_pins == 0);
130 }
131
132 const ObjectState* find_object_state(MDSCacheObject *obj) const {
133 auto it = object_states.find(obj);
134 return it != object_states.end() ? &it->second : nullptr;
135 }
136
137 bool is_any_remote_auth_pin() const { return num_remote_auth_pins > 0; }
138
139 void disable_lock_cache() {
140 lock_cache_disabled = true;
141 }
142
143 lock_iterator emplace_lock(SimpleLock *l, unsigned f=0, mds_rank_t t=MDS_RANK_NONE) {
144 last_locked = l;
145 return locks.emplace(l, f, t).first;
11fdf7f2 146 }
9f95a23c
TL
147
148 bool is_rdlocked(SimpleLock *lock) const;
149 bool is_wrlocked(SimpleLock *lock) const;
11fdf7f2
TL
150 bool is_xlocked(SimpleLock *lock) const {
151 auto it = locks.find(lock);
152 return it != locks.end() && it->is_xlock();
153 }
11fdf7f2
TL
154 bool is_remote_wrlocked(SimpleLock *lock) const {
155 auto it = locks.find(lock);
156 return it != locks.end() && it->is_remote_wrlock();
157 }
9f95a23c
TL
158 bool is_last_locked(SimpleLock *lock) const {
159 return lock == last_locked;
7c673cae
FG
160 }
161
f67539c2
TL
162 bool is_leader() const { return peer_to_mds == MDS_RANK_NONE; }
163 bool is_peer() const { return peer_to_mds != MDS_RANK_NONE; }
7c673cae
FG
164
165 client_t get_client() const {
166 if (reqid.name.is_client())
167 return client_t(reqid.name.num());
168 return -1;
169 }
170
171 void set_mds_stamp(utime_t t) {
172 mds_stamp = t;
173 }
174 utime_t get_mds_stamp() const {
175 return mds_stamp;
176 }
177 void set_op_stamp(utime_t t) {
178 op_stamp = t;
179 }
180 utime_t get_op_stamp() const {
181 if (op_stamp != utime_t())
182 return op_stamp;
183 return get_mds_stamp();
184 }
185
186 // pin items in cache
9f95a23c
TL
187 void pin(MDSCacheObject *object);
188 void unpin(MDSCacheObject *object);
7c673cae 189 void set_stickydirs(CInode *in);
11fdf7f2 190 void put_stickydirs();
7c673cae
FG
191 void drop_pins();
192
193 void start_locking(SimpleLock *lock, int target=-1);
194 void finish_locking(SimpleLock *lock);
195
196 // auth pins
197 bool is_auth_pinned(MDSCacheObject *object) const;
198 void auth_pin(MDSCacheObject *object);
199 void auth_unpin(MDSCacheObject *object);
200 void drop_local_auth_pins();
9f95a23c
TL
201 void set_remote_auth_pinned(MDSCacheObject* object, mds_rank_t from);
202 void _clear_remote_auth_pinned(ObjectState& stat);
203
f67539c2
TL
204 void add_projected_node(MDSCacheObject* obj) {
205 projected_nodes.insert(obj);
206 }
207 void remove_projected_node(MDSCacheObject* obj) {
208 projected_nodes.erase(obj);
209 }
210 bool is_projected(MDSCacheObject *obj) const {
211 return projected_nodes.count(obj);
212 }
7c673cae
FG
213 void add_updated_lock(ScatterLock *lock);
214 void add_cow_inode(CInode *in);
215 void add_cow_dentry(CDentry *dn);
216 void apply();
217 void cleanup();
218
f67539c2 219 virtual void print(std::ostream &out) const {
7c673cae
FG
220 out << "mutation(" << this << ")";
221 }
222
f67539c2 223 virtual void dump(ceph::Formatter *f) const {}
aee94f69 224 void _dump_op_descriptor(std::ostream& stream) const override;
7c673cae 225
9f95a23c
TL
226 metareqid_t reqid;
227 __u32 attempt = 0; // which attempt for this request
228 LogSegment *ls = nullptr; // the log segment i'm committing to
7c673cae 229
f67539c2
TL
230 // flag mutation as peer
231 mds_rank_t peer_to_mds = MDS_RANK_NONE; // this is a peer request if >= 0.
7c673cae 232
9f95a23c
TL
233 ceph::unordered_map<MDSCacheObject*, ObjectState> object_states;
234 int num_pins = 0;
235 int num_auth_pins = 0;
236 int num_remote_auth_pins = 0;
237 // cache pins (so things don't expire)
238 CInode* stickydiri = nullptr;
7c673cae 239
9f95a23c
TL
240 lock_set locks; // full ordering
241 MDLockCache* lock_cache = nullptr;
242 bool lock_cache_disabled = false;
243 SimpleLock *last_locked = nullptr;
244 // Lock we are currently trying to acquire. If we give up for some reason,
245 // be sure to eval() this.
246 SimpleLock *locking = nullptr;
247 mds_rank_t locking_target_mds = -1;
7c673cae 248
9f95a23c
TL
249 // if this flag is set, do not attempt to acquire further locks.
250 // (useful for wrlock, which may be a moving auth target)
251 enum {
252 SNAP_LOCKED = 1,
253 SNAP2_LOCKED = 2,
254 PATH_LOCKED = 4,
255 ALL_LOCKED = 8,
256 };
257 int locking_state = 0;
7c673cae 258
9f95a23c
TL
259 bool committing = false;
260 bool aborted = false;
261 bool killed = false;
7c673cae 262
9f95a23c 263 // for applying projected inode changes
f67539c2
TL
264 std::set<MDSCacheObject*> projected_nodes;
265 std::list<ScatterLock*> updated_locks;
7c673cae 266
f67539c2
TL
267 std::list<CInode*> dirty_cow_inodes;
268 std::list<std::pair<CDentry*,version_t> > dirty_cow_dentries;
7c673cae 269
9f95a23c
TL
270private:
271 utime_t mds_stamp; ///< mds-local timestamp (real time)
272 utime_t op_stamp; ///< op timestamp (client provided)
273};
7c673cae 274
9f95a23c
TL
275/**
276 * MDRequestImpl: state we track for requests we are currently processing.
277 * mostly information about locks held, so that we can drop them all
278 * the request is finished or forwarded. see request_*().
279 */
280struct MDRequestImpl : public MutationImpl {
281 // TrackedOp stuff
282 typedef boost::intrusive_ptr<MDRequestImpl> Ref;
7c673cae
FG
283
284 // break rarely-used fields into a separately allocated structure
285 // to save memory for most ops
286 struct More {
9f95a23c
TL
287 More() {}
288
f67539c2
TL
289 int peer_error = 0;
290 std::set<mds_rank_t> peers; // mds nodes that have peer requests to me (implies client_request)
291 std::set<mds_rank_t> waiting_on_peer; // peers i'm waiting for peerreq replies from.
7c673cae
FG
292
293 // for rename/link/unlink
f67539c2
TL
294 std::set<mds_rank_t> witnessed; // nodes who have journaled a RenamePrepare
295 std::map<MDSCacheObject*,version_t> pvmap;
7c673cae 296
f67539c2
TL
297 bool has_journaled_peers = false;
298 bool peer_update_journaled = false;
299 bool peer_rolling_back = false;
7c673cae
FG
300
301 // for rename
f67539c2 302 std::set<mds_rank_t> extra_witnesses; // replica list from srcdn auth (rename)
91327a77 303 mds_rank_t srcdn_auth_mds = MDS_RANK_NONE;
f67539c2 304 ceph::buffer::list inode_import;
91327a77
AA
305 version_t inode_import_v = 0;
306 CInode* rename_inode = nullptr;
307 bool is_freeze_authpin = false;
308 bool is_ambiguous_auth = false;
309 bool is_remote_frozen_authpin = false;
310 bool is_inode_exporter = false;
b3b6e05e 311 bool rdonly_checks = false;
7c673cae 312
f67539c2
TL
313 std::map<client_t, std::pair<Session*, uint64_t> > imported_session_map;
314 std::map<CInode*, std::map<client_t,Capability::Export> > cap_imports;
7c673cae
FG
315
316 // for lock/flock
91327a77 317 bool flock_was_waiting = false;
7c673cae
FG
318
319 // for snaps
91327a77 320 version_t stid = 0;
f67539c2 321 ceph::buffer::list snapidbl;
7c673cae 322
11fdf7f2
TL
323 sr_t *srci_srnode = nullptr;
324 sr_t *desti_srnode = nullptr;
325
f67539c2
TL
326 // called when peer commits or aborts
327 Context *peer_commit = nullptr;
328 ceph::buffer::list rollback_bl;
7c673cae 329
11fdf7f2 330 MDSContext::vec waiting_for_finish;
7c673cae
FG
331
332 // export & fragment
91327a77 333 CDir* export_dir = nullptr;
7c673cae
FG
334 dirfrag_t fragment_base;
335
336 // for internal ops doing lookup
337 filepath filepath1;
338 filepath filepath2;
9f95a23c 339 } *_more = nullptr;
7c673cae
FG
340
341 // ---------------------------------------------------
342 struct Params {
7c673cae 343 // keep these default values synced to MutationImpl's
9f95a23c 344 Params() {}
11fdf7f2
TL
345 const utime_t& get_recv_stamp() const {
346 return initiated;
347 }
348 const utime_t& get_throttle_stamp() const {
349 return throttled;
350 }
351 const utime_t& get_recv_complete_stamp() const {
352 return all_read;
353 }
354 const utime_t& get_dispatch_stamp() const {
355 return dispatched;
356 }
9f95a23c
TL
357 metareqid_t reqid;
358 __u32 attempt = 0;
f67539c2
TL
359 ceph::cref_t<MClientRequest> client_req;
360 ceph::cref_t<Message> triggering_peer_req;
361 mds_rank_t peer_to = MDS_RANK_NONE;
9f95a23c
TL
362 utime_t initiated;
363 utime_t throttled, all_read, dispatched;
364 int internal_op = -1;
7c673cae 365 };
11fdf7f2
TL
366 MDRequestImpl(const Params* params, OpTracker *tracker) :
367 MutationImpl(tracker, params->initiated,
f67539c2 368 params->reqid, params->attempt, params->peer_to),
9f95a23c
TL
369 item_session_request(this), client_request(params->client_req),
370 internal_op(params->internal_op) {}
7c673cae
FG
371 ~MDRequestImpl() override;
372
373 More* more();
374 bool has_more() const;
375 bool has_witnesses();
f67539c2
TL
376 bool peer_did_prepare();
377 bool peer_rolling_back();
7c673cae
FG
378 bool freeze_auth_pin(CInode *inode);
379 void unfreeze_auth_pin(bool clear_inode=false);
380 void set_remote_frozen_auth_pin(CInode *inode);
381 bool can_auth_pin(MDSCacheObject *object);
382 void drop_local_auth_pins();
383 void set_ambiguous_auth(CInode *inode);
384 void clear_ambiguous_auth();
385 const filepath& get_filepath();
386 const filepath& get_filepath2();
387 void set_filepath(const filepath& fp);
388 void set_filepath2(const filepath& fp);
b32b8144 389 bool is_queued_for_replay() const;
9f95a23c 390 int compare_paths();
7c673cae 391
f91f0fd5
TL
392 bool can_batch();
393 bool is_batch_head() {
394 return batch_op_map != nullptr;
395 }
396 std::unique_ptr<BatchOp> release_batch_op();
397
f67539c2 398 void print(std::ostream &out) const override;
aee94f69
TL
399 void dump_with_mds_lock(ceph::Formatter* f) const {
400 return _dump(f, true);
401 }
7c673cae 402
f67539c2
TL
403 ceph::cref_t<MClientRequest> release_client_request();
404 void reset_peer_request(const ceph::cref_t<MMDSPeerRequest>& req=nullptr);
91327a77 405
9f95a23c
TL
406 Session *session = nullptr;
407 elist<MDRequestImpl*>::item item_session_request; // if not on list, op is aborted.
408
f67539c2
TL
409 // -- i am a client (leader) request
410 ceph::cref_t<MClientRequest> client_request; // client request (if any)
9f95a23c
TL
411
412 // tree and depth info of path1 and path2
413 inodeno_t dir_root[2] = {0, 0};
414 int dir_depth[2] = {-1, -1};
415 file_layout_t dir_layout;
416 // store up to two sets of dn vectors, inode pointers, for request path1 and path2.
f67539c2 417 std::vector<CDentry*> dn[2];
9f95a23c
TL
418 CInode *in[2] = {};
419 CDentry *straydn = nullptr;
420 snapid_t snapid = CEPH_NOSNAP;
aee94f69 421 snapid_t snapid_diff_other = CEPH_NOSNAP;
9f95a23c
TL
422
423 CInode *tracei = nullptr;
424 CDentry *tracedn = nullptr;
425
426 inodeno_t alloc_ino = 0, used_prealloc_ino = 0;
427 interval_set<inodeno_t> prealloc_inos;
428
429 int snap_caps = 0;
430 int getattr_caps = 0; ///< caps requested by getattr
431 bool no_early_reply = false;
432 bool did_early_reply = false;
433 bool o_trunc = false; ///< request is an O_TRUNC mutation
434 bool has_completed = false; ///< request has already completed
435
f67539c2 436 ceph::buffer::list reply_extra_bl;
9f95a23c
TL
437
438 // inos we did a embedded cap release on, and may need to eval if we haven't since reissued
f67539c2 439 std::map<vinodeno_t, ceph_seq_t> cap_releases;
9f95a23c 440
f67539c2
TL
441 // -- i am a peer request
442 ceph::cref_t<MMDSPeerRequest> peer_request; // peer request (if one is pending; implies peer == true)
9f95a23c
TL
443
444 // -- i am an internal op
445 int internal_op;
446 Context *internal_op_finish = nullptr;
447 void *internal_op_private = nullptr;
448
449 // indicates how may retries of request have been made
450 int retry = 0;
451
f91f0fd5 452 std::map<int, std::unique_ptr<BatchOp> > *batch_op_map = nullptr;
9f95a23c
TL
453
454 // indicator for vxattr osdmap update
455 bool waited_for_osdmap = false;
456
7c673cae 457protected:
aee94f69
TL
458 void _dump(ceph::Formatter *f) const override {
459 _dump(f, false);
460 }
461 void _dump(ceph::Formatter *f, bool has_mds_lock) const;
462 void _dump_op_descriptor(std::ostream& stream) const override;
7c673cae
FG
463};
464
f67539c2
TL
465struct MDPeerUpdate {
466 MDPeerUpdate(int oo, ceph::buffer::list &rbl) :
e306af50 467 origop(oo) {
f67539c2 468 rollback = std::move(rbl);
7c673cae 469 }
f67539c2 470 ~MDPeerUpdate() {
7c673cae
FG
471 if (waiter)
472 waiter->complete(0);
473 }
9f95a23c 474 int origop;
f67539c2 475 ceph::buffer::list rollback;
9f95a23c 476 Context *waiter = nullptr;
f67539c2
TL
477 std::set<CInode*> olddirs;
478 std::set<CInode*> unlinked;
9f95a23c
TL
479};
480
481struct MDLockCacheItem {
482 MDLockCache *parent = nullptr;
483 elist<MDLockCacheItem*>::item item_lock;
7c673cae
FG
484};
485
9f95a23c
TL
486struct MDLockCache : public MutationImpl {
487 using LockItem = MDLockCacheItem;
488
489 struct DirItem {
490 MDLockCache *parent = nullptr;
491 elist<DirItem*>::item item_dir;
492 };
493
494 MDLockCache(Capability *cap, int op) :
495 MutationImpl(), diri(cap->get_inode()), client_cap(cap), opcode(op) {
496 client_cap->lock_caches.push_back(&item_cap_lock_cache);
497 }
498
499 CInode *get_dir_inode() { return diri; }
500 void set_dir_layout(file_layout_t& layout) {
501 dir_layout = layout;
502 }
503 const file_layout_t& get_dir_layout() const {
504 return dir_layout;
505 }
506
507 void attach_locks();
508 void attach_dirfrags(std::vector<CDir*>&& dfv);
1911f103
TL
509 void detach_locks();
510 void detach_dirfrags();
7c673cae 511
9f95a23c
TL
512 CInode *diri;
513 Capability *client_cap;
514 int opcode;
515 file_layout_t dir_layout;
516
517 elist<MDLockCache*>::item item_cap_lock_cache;
518
519 // link myself to locked locks
520 std::unique_ptr<LockItem[]> items_lock;
521
522 // link myself to auth-pinned dirfrags
523 std::unique_ptr<DirItem[]> items_dir;
524 std::vector<CDir*> auth_pinned_dirfrags;
525
526 int ref = 1;
527 bool invalidating = false;
528};
529
530typedef boost::intrusive_ptr<MutationImpl> MutationRef;
531typedef boost::intrusive_ptr<MDRequestImpl> MDRequestRef;
532
f67539c2 533inline std::ostream& operator<<(std::ostream &out, const MutationImpl &mut)
9f95a23c
TL
534{
535 mut.print(out);
536 return out;
537}
7c673cae 538#endif