]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/Mutation.h
update sources to 12.2.10
[ceph.git] / ceph / src / mds / Mutation.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_MDS_MUTATION_H
16#define CEPH_MDS_MUTATION_H
17
18#include "include/interval_set.h"
19#include "include/elist.h"
20#include "include/filepath.h"
21
22#include "MDSCacheObject.h"
23
24#include "SimpleLock.h"
25#include "Capability.h"
26
27#include "common/TrackedOp.h"
28
29class LogSegment;
30class Capability;
31class CInode;
32class CDir;
33class CDentry;
34class Session;
35class ScatterLock;
36class MClientRequest;
37class MMDSSlaveRequest;
38
39struct MutationImpl : public TrackedOp {
40 metareqid_t reqid;
41 __u32 attempt = 0; // which attempt for this request
42 LogSegment *ls = nullptr; // the log segment i'm committing to
43
44private:
45 utime_t mds_stamp; ///< mds-local timestamp (real time)
46 utime_t op_stamp; ///< op timestamp (client provided)
47
48public:
49 // flag mutation as slave
50 mds_rank_t slave_to_mds = MDS_RANK_NONE; // this is a slave request if >= 0.
51
52 // -- my pins and locks --
53 // cache pins (so things don't expire)
54 set< MDSCacheObject* > pins;
55 set<CInode*> stickydirs;
56
57 // auth pins
58 map<MDSCacheObject*, mds_rank_t> remote_auth_pins;
59 set< MDSCacheObject* > auth_pins;
60
61 // held locks
62 set< SimpleLock* > rdlocks; // always local.
63 set< SimpleLock* > wrlocks; // always local.
64 map< SimpleLock*, mds_rank_t > remote_wrlocks;
65 set< SimpleLock* > xlocks; // local or remote.
66 set< SimpleLock*, SimpleLock::ptr_lt > locks; // full ordering
67
68 // lock we are currently trying to acquire. if we give up for some reason,
69 // be sure to eval() this.
70 SimpleLock *locking = nullptr;
71 mds_rank_t locking_target_mds = -1;
72
73 // if this flag is set, do not attempt to acquire further locks.
74 // (useful for wrlock, which may be a moving auth target)
75 bool done_locking = false;
76 bool committing = false;
77 bool aborted = false;
78 bool killed = false;
79
80 // for applying projected inode changes
81 list<CInode*> projected_inodes;
82 list<CDir*> projected_fnodes;
83 list<ScatterLock*> updated_locks;
84
85 list<CInode*> dirty_cow_inodes;
86 list<pair<CDentry*,version_t> > dirty_cow_dentries;
87
88 // keep our default values synced with MDRequestParam's
89 MutationImpl() : TrackedOp(nullptr, utime_t()) {}
90 MutationImpl(OpTracker *tracker, utime_t initiated,
91 metareqid_t ri, __u32 att=0, mds_rank_t slave_to=MDS_RANK_NONE)
92 : TrackedOp(tracker, initiated),
93 reqid(ri), attempt(att),
94 slave_to_mds(slave_to) { }
95 ~MutationImpl() override {
96 assert(locking == NULL);
97 assert(pins.empty());
98 assert(auth_pins.empty());
99 assert(xlocks.empty());
100 assert(rdlocks.empty());
101 assert(wrlocks.empty());
102 assert(remote_wrlocks.empty());
103 }
104
105 bool is_master() const { return slave_to_mds == MDS_RANK_NONE; }
106 bool is_slave() const { return slave_to_mds != MDS_RANK_NONE; }
107
108 client_t get_client() const {
109 if (reqid.name.is_client())
110 return client_t(reqid.name.num());
111 return -1;
112 }
113
114 void set_mds_stamp(utime_t t) {
115 mds_stamp = t;
116 }
117 utime_t get_mds_stamp() const {
118 return mds_stamp;
119 }
120 void set_op_stamp(utime_t t) {
121 op_stamp = t;
122 }
123 utime_t get_op_stamp() const {
124 if (op_stamp != utime_t())
125 return op_stamp;
126 return get_mds_stamp();
127 }
128
129 // pin items in cache
130 void pin(MDSCacheObject *o);
131 void unpin(MDSCacheObject *o);
132 void set_stickydirs(CInode *in);
133 void drop_pins();
134
135 void start_locking(SimpleLock *lock, int target=-1);
136 void finish_locking(SimpleLock *lock);
137
138 // auth pins
139 bool is_auth_pinned(MDSCacheObject *object) const;
140 void auth_pin(MDSCacheObject *object);
141 void auth_unpin(MDSCacheObject *object);
142 void drop_local_auth_pins();
143 void add_projected_inode(CInode *in);
144 void pop_and_dirty_projected_inodes();
145 void add_projected_fnode(CDir *dir);
146 void pop_and_dirty_projected_fnodes();
147 void add_updated_lock(ScatterLock *lock);
148 void add_cow_inode(CInode *in);
149 void add_cow_dentry(CDentry *dn);
150 void apply();
151 void cleanup();
152
153 virtual void print(ostream &out) const {
154 out << "mutation(" << this << ")";
155 }
156
157 virtual void dump(Formatter *f) const {}
158 void _dump_op_descriptor_unlocked(ostream& stream) const override;
159};
160
161inline ostream& operator<<(ostream &out, const MutationImpl &mut)
162{
163 mut.print(out);
164 return out;
165}
166
167typedef boost::intrusive_ptr<MutationImpl> MutationRef;
168
169
170
171/** active_request_t
172 * state we track for requests we are currently processing.
173 * mostly information about locks held, so that we can drop them all
174 * the request is finished or forwarded. see request_*().
175 */
176struct MDRequestImpl : public MutationImpl {
177 Session *session;
178 elist<MDRequestImpl*>::item item_session_request; // if not on list, op is aborted.
179
180 // -- i am a client (master) request
181 MClientRequest *client_request; // client request (if any)
182
183 // store up to two sets of dn vectors, inode pointers, for request path1 and path2.
184 vector<CDentry*> dn[2];
185 CDentry *straydn;
186 CInode *in[2];
187 snapid_t snapid;
188
189 CInode *tracei;
190 CDentry *tracedn;
191
192 inodeno_t alloc_ino, used_prealloc_ino;
193 interval_set<inodeno_t> prealloc_inos;
194
b32b8144
FG
195 int snap_caps = 0;
196 int getattr_caps = 0; ///< caps requested by getattr
197 bool no_early_reply = false;
198 bool did_early_reply = false;
199 bool o_trunc = false; ///< request is an O_TRUNC mutation
200 bool has_completed = false; ///< request has already completed
7c673cae
FG
201
202 bufferlist reply_extra_bl;
203
204 // inos we did a embedded cap release on, and may need to eval if we haven't since reissued
205 map<vinodeno_t, ceph_seq_t> cap_releases;
206
207 // -- i am a slave request
208 MMDSSlaveRequest *slave_request; // slave request (if one is pending; implies slave == true)
209
210 // -- i am an internal op
211 int internal_op;
212 Context *internal_op_finish;
213 void *internal_op_private;
214
215 // indicates how may retries of request have been made
216 int retry;
217
218 // indicator for vxattr osdmap update
219 bool waited_for_osdmap;
220
221 // break rarely-used fields into a separately allocated structure
222 // to save memory for most ops
223 struct More {
91327a77 224 int slave_error = 0;
7c673cae
FG
225 set<mds_rank_t> slaves; // mds nodes that have slave requests to me (implies client_request)
226 set<mds_rank_t> waiting_on_slave; // peers i'm waiting for slavereq replies from.
227
228 // for rename/link/unlink
229 set<mds_rank_t> witnessed; // nodes who have journaled a RenamePrepare
230 map<MDSCacheObject*,version_t> pvmap;
231
91327a77
AA
232 bool has_journaled_slaves = false;
233 bool slave_update_journaled = false;
234 bool slave_rolling_back = false;
7c673cae
FG
235
236 // for rename
237 set<mds_rank_t> extra_witnesses; // replica list from srcdn auth (rename)
91327a77 238 mds_rank_t srcdn_auth_mds = MDS_RANK_NONE;
7c673cae 239 bufferlist inode_import;
91327a77
AA
240 version_t inode_import_v = 0;
241 CInode* rename_inode = nullptr;
242 bool is_freeze_authpin = false;
243 bool is_ambiguous_auth = false;
244 bool is_remote_frozen_authpin = false;
245 bool is_inode_exporter = false;
7c673cae 246
28e407b8 247 map<client_t, pair<Session*, uint64_t> > imported_session_map;
7c673cae
FG
248 map<CInode*, map<client_t,Capability::Export> > cap_imports;
249
250 // for lock/flock
91327a77 251 bool flock_was_waiting = false;
7c673cae
FG
252
253 // for snaps
91327a77 254 version_t stid = 0;
7c673cae
FG
255 bufferlist snapidbl;
256
257 // called when slave commits or aborts
91327a77 258 Context *slave_commit = nullptr;
7c673cae
FG
259 bufferlist rollback_bl;
260
261 list<MDSInternalContextBase*> waiting_for_finish;
262
263 // export & fragment
91327a77 264 CDir* export_dir = nullptr;
7c673cae
FG
265 dirfrag_t fragment_base;
266
267 // for internal ops doing lookup
268 filepath filepath1;
269 filepath filepath2;
270
91327a77 271 More() {}
7c673cae
FG
272 } *_more;
273
274
275 // ---------------------------------------------------
276 struct Params {
277 metareqid_t reqid;
278 __u32 attempt;
279 MClientRequest *client_req;
280 class Message *triggering_slave_req;
281 mds_rank_t slave_to;
282 utime_t initiated;
283 utime_t throttled, all_read, dispatched;
284 int internal_op;
285 // keep these default values synced to MutationImpl's
286 Params() : attempt(0), client_req(NULL),
287 triggering_slave_req(NULL), slave_to(MDS_RANK_NONE), internal_op(-1) {}
288 };
289 MDRequestImpl(const Params& params, OpTracker *tracker) :
290 MutationImpl(tracker, params.initiated,
291 params.reqid, params.attempt, params.slave_to),
292 session(NULL), item_session_request(this),
293 client_request(params.client_req), straydn(NULL), snapid(CEPH_NOSNAP),
294 tracei(NULL), tracedn(NULL), alloc_ino(0), used_prealloc_ino(0),
7c673cae
FG
295 slave_request(NULL), internal_op(params.internal_op), internal_op_finish(NULL),
296 internal_op_private(NULL),
297 retry(0),
298 waited_for_osdmap(false), _more(NULL) {
299 in[0] = in[1] = NULL;
300 if (!params.throttled.is_zero())
301 mark_event("throttled", params.throttled);
302 if (!params.all_read.is_zero())
303 mark_event("all_read", params.all_read);
304 if (!params.dispatched.is_zero())
305 mark_event("dispatched", params.dispatched);
306 }
307 ~MDRequestImpl() override;
308
309 More* more();
310 bool has_more() const;
311 bool has_witnesses();
312 bool slave_did_prepare();
313 bool slave_rolling_back();
314 bool did_ino_allocation() const;
315 bool freeze_auth_pin(CInode *inode);
316 void unfreeze_auth_pin(bool clear_inode=false);
317 void set_remote_frozen_auth_pin(CInode *inode);
318 bool can_auth_pin(MDSCacheObject *object);
319 void drop_local_auth_pins();
320 void set_ambiguous_auth(CInode *inode);
321 void clear_ambiguous_auth();
322 const filepath& get_filepath();
323 const filepath& get_filepath2();
324 void set_filepath(const filepath& fp);
325 void set_filepath2(const filepath& fp);
b32b8144 326 bool is_queued_for_replay() const;
7c673cae
FG
327
328 void print(ostream &out) const override;
329 void dump(Formatter *f) const override;
330
91327a77
AA
331 MClientRequest* release_client_request();
332 void reset_slave_request(MMDSSlaveRequest *req=nullptr);
333
7c673cae
FG
334 // TrackedOp stuff
335 typedef boost::intrusive_ptr<MDRequestImpl> Ref;
336protected:
337 void _dump(Formatter *f) const override;
338 void _dump_op_descriptor_unlocked(ostream& stream) const override;
91327a77
AA
339private:
340 class {
341 std::atomic_flag _lock = ATOMIC_FLAG_INIT;
342 public:
343 void lock() {
344 while(_lock.test_and_set(std::memory_order_acquire))
345 ;
346 }
347 void unlock() {
348 _lock.clear(std::memory_order_release);
349 }
350 } mutable msg_lock;
7c673cae
FG
351};
352
353typedef boost::intrusive_ptr<MDRequestImpl> MDRequestRef;
354
355
356struct MDSlaveUpdate {
357 int origop;
358 bufferlist rollback;
359 elist<MDSlaveUpdate*>::item item;
360 Context *waiter;
361 set<CInode*> olddirs;
362 set<CInode*> unlinked;
363 MDSlaveUpdate(int oo, bufferlist &rbl, elist<MDSlaveUpdate*> &list) :
364 origop(oo),
365 item(this),
366 waiter(0) {
367 rollback.claim(rbl);
368 list.push_back(&item);
369 }
370 ~MDSlaveUpdate() {
371 item.remove_myself();
372 if (waiter)
373 waiter->complete(0);
374 }
375};
376
377
378#endif