]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
7c673cae FG |
2 | // vim: ts=8 sw=2 smarttab |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
7c673cae FG |
14 | #ifndef CEPH_MDCACHE_H |
15 | #define CEPH_MDCACHE_H | |
16 | ||
eafe8130 | 17 | #include <atomic> |
11fdf7f2 | 18 | #include <string_view> |
eafe8130 | 19 | #include <thread> |
94b18763 | 20 | |
a8e16298 | 21 | #include "common/DecayCounter.h" |
9f95a23c | 22 | #include "include/common_fwd.h" |
7c673cae FG |
23 | #include "include/types.h" |
24 | #include "include/filepath.h" | |
25 | #include "include/elist.h" | |
26 | ||
11fdf7f2 TL |
27 | #include "messages/MCacheExpire.h" |
28 | #include "messages/MClientQuota.h" | |
29 | #include "messages/MClientRequest.h" | |
30 | #include "messages/MClientSnap.h" | |
31 | #include "messages/MDentryLink.h" | |
32 | #include "messages/MDentryUnlink.h" | |
33 | #include "messages/MDirUpdate.h" | |
34 | #include "messages/MDiscover.h" | |
35 | #include "messages/MDiscoverReply.h" | |
36 | #include "messages/MGatherCaps.h" | |
37 | #include "messages/MGenericMessage.h" | |
38 | #include "messages/MInodeFileCaps.h" | |
39 | #include "messages/MLock.h" | |
40 | #include "messages/MMDSCacheRejoin.h" | |
41 | #include "messages/MMDSFindIno.h" | |
42 | #include "messages/MMDSFindInoReply.h" | |
43 | #include "messages/MMDSFragmentNotify.h" | |
44 | #include "messages/MMDSFragmentNotifyAck.h" | |
45 | #include "messages/MMDSOpenIno.h" | |
46 | #include "messages/MMDSOpenInoReply.h" | |
47 | #include "messages/MMDSResolve.h" | |
48 | #include "messages/MMDSResolveAck.h" | |
49 | #include "messages/MMDSSlaveRequest.h" | |
50 | #include "messages/MMDSSnapUpdate.h" | |
51 | ||
7c673cae FG |
52 | #include "osdc/Filer.h" |
53 | #include "CInode.h" | |
54 | #include "CDentry.h" | |
55 | #include "CDir.h" | |
56 | #include "include/Context.h" | |
57 | #include "events/EMetaBlob.h" | |
58 | #include "RecoveryQueue.h" | |
59 | #include "StrayManager.h" | |
11fdf7f2 | 60 | #include "OpenFileTable.h" |
7c673cae FG |
61 | #include "MDSContext.h" |
62 | #include "MDSMap.h" | |
63 | #include "Mutation.h" | |
64 | ||
7c673cae FG |
65 | class MDSRank; |
66 | class Session; | |
67 | class Migrator; | |
68 | ||
7c673cae FG |
69 | class Session; |
70 | ||
7c673cae FG |
71 | class ESubtreeMap; |
72 | ||
73 | enum { | |
74 | l_mdc_first = 3000, | |
75 | // How many inodes currently in stray dentries | |
76 | l_mdc_num_strays, | |
77 | // How many stray dentries are currently delayed for purge due to refs | |
78 | l_mdc_num_strays_delayed, | |
79 | // How many stray dentries are currently being enqueued for purge | |
80 | l_mdc_num_strays_enqueuing, | |
81 | ||
82 | // How many dentries have ever been added to stray dir | |
83 | l_mdc_strays_created, | |
84 | // How many dentries have been passed on to PurgeQueue | |
85 | l_mdc_strays_enqueued, | |
86 | // How many strays have been reintegrated? | |
87 | l_mdc_strays_reintegrated, | |
88 | // How many strays have been migrated? | |
89 | l_mdc_strays_migrated, | |
90 | ||
91 | // How many inode sizes currently being recovered | |
92 | l_mdc_num_recovering_processing, | |
93 | // How many inodes currently waiting to have size recovered | |
94 | l_mdc_num_recovering_enqueued, | |
95 | // How many inodes waiting with elevated priority for recovery | |
96 | l_mdc_num_recovering_prioritized, | |
97 | // How many inodes ever started size recovery | |
98 | l_mdc_recovery_started, | |
99 | // How many inodes ever completed size recovery | |
100 | l_mdc_recovery_completed, | |
101 | ||
d2e6a577 FG |
102 | l_mdss_ireq_enqueue_scrub, |
103 | l_mdss_ireq_exportdir, | |
104 | l_mdss_ireq_flush, | |
105 | l_mdss_ireq_fragmentdir, | |
106 | l_mdss_ireq_fragstats, | |
107 | l_mdss_ireq_inodestats, | |
108 | ||
7c673cae FG |
109 | l_mdc_last, |
110 | }; | |
111 | ||
9f95a23c TL |
112 | // flags for path_traverse(); |
113 | static const int MDS_TRAVERSE_DISCOVER = (1 << 0); | |
114 | static const int MDS_TRAVERSE_PATH_LOCKED = (1 << 1); | |
115 | static const int MDS_TRAVERSE_WANT_DENTRY = (1 << 2); | |
116 | static const int MDS_TRAVERSE_WANT_AUTH = (1 << 3); | |
117 | static const int MDS_TRAVERSE_RDLOCK_SNAP = (1 << 4); | |
118 | static const int MDS_TRAVERSE_RDLOCK_SNAP2 = (1 << 5); | |
119 | static const int MDS_TRAVERSE_WANT_DIRLAYOUT = (1 << 6); | |
120 | static const int MDS_TRAVERSE_RDLOCK_PATH = (1 << 7); | |
121 | static const int MDS_TRAVERSE_XLOCK_DENTRY = (1 << 8); | |
122 | static const int MDS_TRAVERSE_RDLOCK_AUTHLOCK = (1 << 9); | |
123 | static const int MDS_TRAVERSE_CHECK_LOCKCACHE = (1 << 10); | |
124 | ||
7c673cae FG |
125 | |
126 | // flags for predirty_journal_parents() | |
127 | static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting | |
128 | static const int PREDIRTY_DIR = 2; // update parent dir mtime/size | |
129 | static const int PREDIRTY_SHALLOW = 4; // only go to immediate parent (for easier rollback) | |
130 | ||
131 | class MDCache { | |
132 | public: | |
9f95a23c TL |
133 | typedef std::map<mds_rank_t, ref_t<MCacheExpire>> expiremap; |
134 | ||
91327a77 AA |
135 | using clock = ceph::coarse_mono_clock; |
136 | using time = ceph::coarse_mono_time; | |
137 | ||
9f95a23c TL |
138 | // -- discover -- |
139 | struct discover_info_t { | |
140 | discover_info_t() {} | |
141 | ~discover_info_t() { | |
142 | if (basei) | |
143 | basei->put(MDSCacheObject::PIN_DISCOVERBASE); | |
144 | } | |
145 | void pin_base(CInode *b) { | |
146 | basei = b; | |
147 | basei->get(MDSCacheObject::PIN_DISCOVERBASE); | |
148 | } | |
7c673cae | 149 | |
9f95a23c TL |
150 | ceph_tid_t tid = 0; |
151 | mds_rank_t mds = -1; | |
152 | inodeno_t ino; | |
153 | frag_t frag; | |
154 | snapid_t snap = CEPH_NOSNAP; | |
155 | filepath want_path; | |
156 | CInode *basei = nullptr; | |
157 | bool want_base_dir = false; | |
158 | bool path_locked = false; | |
159 | }; | |
7c673cae | 160 | |
9f95a23c TL |
161 | // [reconnect/rejoin caps] |
162 | struct reconnected_cap_info_t { | |
163 | reconnected_cap_info_t() {} | |
164 | inodeno_t realm_ino = 0; | |
165 | snapid_t snap_follows = 0; | |
166 | int dirty_caps = 0; | |
167 | bool snapflush = 0; | |
168 | }; | |
7c673cae | 169 | |
9f95a23c TL |
170 | // -- find_ino_peer -- |
171 | struct find_ino_peer_info_t { | |
172 | find_ino_peer_info_t() {} | |
173 | inodeno_t ino; | |
174 | ceph_tid_t tid = 0; | |
175 | MDSContext *fin = nullptr; | |
176 | bool path_locked = false; | |
177 | mds_rank_t hint = MDS_RANK_NONE; | |
178 | mds_rank_t checking = MDS_RANK_NONE; | |
179 | set<mds_rank_t> checked; | |
180 | }; | |
7c673cae | 181 | |
9f95a23c TL |
182 | friend class C_MDC_RejoinOpenInoFinish; |
183 | friend class C_MDC_RejoinSessionsOpened; | |
7c673cae | 184 | |
9f95a23c TL |
185 | friend class Locker; |
186 | friend class Migrator; | |
187 | friend class MDBalancer; | |
7c673cae | 188 | |
9f95a23c TL |
189 | // StrayManager needs to be able to remove_inode() from us |
190 | // when it is done purging | |
191 | friend class StrayManager; | |
7c673cae | 192 | |
9f95a23c TL |
193 | explicit MDCache(MDSRank *m, PurgeQueue &purge_queue_); |
194 | ~MDCache(); | |
91327a77 | 195 | |
91327a77 AA |
196 | uint64_t cache_limit_memory(void) { |
197 | return cache_memory_limit; | |
181888fb FG |
198 | } |
199 | double cache_toofull_ratio(void) const { | |
91327a77 | 200 | double memory_reserve = cache_memory_limit*(1.0-cache_reservation); |
9f95a23c | 201 | return fmax(0.0, (cache_size()-memory_reserve)/memory_reserve); |
181888fb FG |
202 | } |
203 | bool cache_toofull(void) const { | |
204 | return cache_toofull_ratio() > 0.0; | |
205 | } | |
206 | uint64_t cache_size(void) const { | |
207 | return mempool::get_pool(mempool::mds_co::id).allocated_bytes(); | |
208 | } | |
209 | bool cache_overfull(void) const { | |
9f95a23c | 210 | return cache_size() > cache_memory_limit*cache_health_threshold; |
181888fb FG |
211 | } |
212 | ||
7c673cae FG |
213 | void advance_stray() { |
214 | stray_index = (stray_index+1)%NUM_STRAY; | |
215 | } | |
216 | ||
f6b5b4d7 TL |
217 | bool get_export_ephemeral_distributed_config(void) const { |
218 | return export_ephemeral_distributed_config; | |
219 | } | |
220 | ||
221 | bool get_export_ephemeral_random_config(void) const { | |
222 | return export_ephemeral_random_config; | |
223 | } | |
224 | ||
7c673cae FG |
225 | /** |
226 | * Call this when you know that a CDentry is ready to be passed | |
227 | * on to StrayManager (i.e. this is a stray you've just created) | |
228 | */ | |
229 | void notify_stray(CDentry *dn) { | |
11fdf7f2 | 230 | ceph_assert(dn->get_dir()->get_inode()->is_stray()); |
a8e16298 TL |
231 | if (dn->state_test(CDentry::STATE_PURGING)) |
232 | return; | |
233 | ||
7c673cae FG |
234 | stray_manager.eval_stray(dn); |
235 | } | |
236 | ||
f6b5b4d7 TL |
237 | mds_rank_t hash_into_rank_bucket(inodeno_t ino); |
238 | ||
7c673cae | 239 | void maybe_eval_stray(CInode *in, bool delay=false); |
31f18b77 FG |
240 | void clear_dirty_bits_for_stray(CInode* diri); |
241 | ||
7c673cae FG |
242 | bool is_readonly() { return readonly; } |
243 | void force_readonly(); | |
244 | ||
7c673cae FG |
245 | static file_layout_t gen_default_file_layout(const MDSMap &mdsmap); |
246 | static file_layout_t gen_default_log_layout(const MDSMap &mdsmap); | |
247 | ||
7c673cae FG |
248 | void register_perfcounters(); |
249 | ||
7c673cae FG |
250 | void touch_client_lease(ClientLease *r, int pool, utime_t ttl) { |
251 | client_leases[pool].push_back(&r->item_lease); | |
252 | r->ttl = ttl; | |
253 | } | |
254 | ||
255 | void notify_stray_removed() | |
256 | { | |
257 | stray_manager.notify_stray_removed(); | |
258 | } | |
259 | ||
260 | void notify_stray_created() | |
261 | { | |
262 | stray_manager.notify_stray_created(); | |
263 | } | |
264 | ||
31f18b77 FG |
265 | void eval_remote(CDentry *dn) |
266 | { | |
267 | stray_manager.eval_remote(dn); | |
268 | } | |
269 | ||
7c673cae FG |
270 | void _send_discover(discover_info_t& dis); |
271 | discover_info_t& _create_discover(mds_rank_t mds) { | |
272 | ceph_tid_t t = ++discover_last_tid; | |
273 | discover_info_t& d = discovers[t]; | |
274 | d.tid = t; | |
275 | d.mds = mds; | |
276 | return d; | |
277 | } | |
278 | ||
11fdf7f2 TL |
279 | void discover_base_ino(inodeno_t want_ino, MDSContext *onfinish, mds_rank_t from=MDS_RANK_NONE); |
280 | void discover_dir_frag(CInode *base, frag_t approx_fg, MDSContext *onfinish, | |
7c673cae | 281 | mds_rank_t from=MDS_RANK_NONE); |
11fdf7f2 | 282 | void discover_path(CInode *base, snapid_t snap, filepath want_path, MDSContext *onfinish, |
9f95a23c | 283 | bool path_locked=false, mds_rank_t from=MDS_RANK_NONE); |
11fdf7f2 | 284 | void discover_path(CDir *base, snapid_t snap, filepath want_path, MDSContext *onfinish, |
9f95a23c | 285 | bool path_locked=false); |
7c673cae FG |
286 | void kick_discovers(mds_rank_t who); // after a failure. |
287 | ||
7c673cae FG |
288 | // adjust subtree auth specification |
289 | // dir->dir_auth | |
290 | // imports/exports/nested_exports | |
291 | // join/split subtrees as appropriate | |
7c673cae | 292 | bool is_subtrees() { return !subtrees.empty(); } |
11fdf7f2 TL |
293 | template<typename T> |
294 | void get_subtrees(T& c) { | |
295 | if constexpr (std::is_same_v<T, std::vector<CDir*>>) | |
296 | c.reserve(c.size() + subtrees.size()); | |
297 | for (const auto& p : subtrees) { | |
298 | c.push_back(p.first); | |
299 | } | |
300 | } | |
28e407b8 | 301 | void adjust_subtree_auth(CDir *root, mds_authority_t auth, bool adjust_pop=true); |
224ce89b WB |
302 | void adjust_subtree_auth(CDir *root, mds_rank_t a, mds_rank_t b=CDIR_AUTH_UNKNOWN) { |
303 | adjust_subtree_auth(root, mds_authority_t(a,b)); | |
7c673cae | 304 | } |
11fdf7f2 TL |
305 | void adjust_bounded_subtree_auth(CDir *dir, const set<CDir*>& bounds, mds_authority_t auth); |
306 | void adjust_bounded_subtree_auth(CDir *dir, const set<CDir*>& bounds, mds_rank_t a) { | |
7c673cae FG |
307 | adjust_bounded_subtree_auth(dir, bounds, mds_authority_t(a, CDIR_AUTH_UNKNOWN)); |
308 | } | |
11fdf7f2 TL |
309 | void adjust_bounded_subtree_auth(CDir *dir, const vector<dirfrag_t>& bounds, const mds_authority_t &auth); |
310 | void adjust_bounded_subtree_auth(CDir *dir, const vector<dirfrag_t>& bounds, mds_rank_t a) { | |
7c673cae FG |
311 | adjust_bounded_subtree_auth(dir, bounds, mds_authority_t(a, CDIR_AUTH_UNKNOWN)); |
312 | } | |
11fdf7f2 | 313 | void map_dirfrag_set(const list<dirfrag_t>& dfs, set<CDir*>& result); |
7c673cae | 314 | void try_subtree_merge(CDir *root); |
28e407b8 | 315 | void try_subtree_merge_at(CDir *root, set<CInode*> *to_eval, bool adjust_pop=true); |
7c673cae FG |
316 | void subtree_merge_writebehind_finish(CInode *in, MutationRef& mut); |
317 | void eval_subtree_root(CInode *diri); | |
318 | CDir *get_subtree_root(CDir *dir); | |
319 | CDir *get_projected_subtree_root(CDir *dir); | |
320 | bool is_leaf_subtree(CDir *dir) { | |
11fdf7f2 | 321 | ceph_assert(subtrees.count(dir)); |
7c673cae FG |
322 | return subtrees[dir].empty(); |
323 | } | |
324 | void remove_subtree(CDir *dir); | |
325 | bool is_subtree(CDir *root) { | |
326 | return subtrees.count(root); | |
327 | } | |
328 | void get_subtree_bounds(CDir *root, set<CDir*>& bounds); | |
329 | void get_wouldbe_subtree_bounds(CDir *root, set<CDir*>& bounds); | |
330 | void verify_subtree_bounds(CDir *root, const set<CDir*>& bounds); | |
331 | void verify_subtree_bounds(CDir *root, const list<dirfrag_t>& bounds); | |
332 | ||
333 | void project_subtree_rename(CInode *diri, CDir *olddir, CDir *newdir); | |
224ce89b | 334 | void adjust_subtree_after_rename(CInode *diri, CDir *olddir, bool pop); |
7c673cae | 335 | |
11fdf7f2 TL |
336 | auto get_auth_subtrees() { |
337 | std::vector<CDir*> c; | |
338 | for (auto& p : subtrees) { | |
339 | auto& root = p.first; | |
340 | if (root->is_auth()) { | |
341 | c.push_back(root); | |
342 | } | |
343 | } | |
344 | return c; | |
345 | } | |
7c673cae | 346 | |
11fdf7f2 TL |
347 | auto get_fullauth_subtrees() { |
348 | std::vector<CDir*> c; | |
349 | for (auto& p : subtrees) { | |
350 | auto& root = p.first; | |
351 | if (root->is_full_dir_auth()) { | |
352 | c.push_back(root); | |
353 | } | |
354 | } | |
355 | return c; | |
356 | } | |
357 | auto num_subtrees_fullauth() const { | |
358 | std::size_t n = 0; | |
359 | for (auto& p : subtrees) { | |
360 | auto& root = p.first; | |
361 | if (root->is_full_dir_auth()) { | |
362 | ++n; | |
363 | } | |
364 | } | |
365 | return n; | |
366 | } | |
7c673cae | 367 | |
11fdf7f2 TL |
368 | auto num_subtrees_fullnonauth() const { |
369 | std::size_t n = 0; | |
370 | for (auto& p : subtrees) { | |
371 | auto& root = p.first; | |
372 | if (root->is_full_dir_nonauth()) { | |
373 | ++n; | |
374 | } | |
375 | } | |
376 | return n; | |
377 | } | |
7c673cae | 378 | |
11fdf7f2 TL |
379 | auto num_subtrees() const { |
380 | return subtrees.size(); | |
381 | } | |
7c673cae | 382 | |
7c673cae FG |
383 | int get_num_client_requests(); |
384 | ||
9f95a23c TL |
385 | MDRequestRef request_start(const cref_t<MClientRequest>& req); |
386 | MDRequestRef request_start_slave(metareqid_t rid, __u32 attempt, const cref_t<Message> &m); | |
7c673cae FG |
387 | MDRequestRef request_start_internal(int op); |
388 | bool have_request(metareqid_t rid) { | |
389 | return active_requests.count(rid); | |
390 | } | |
391 | MDRequestRef request_get(metareqid_t rid); | |
392 | void request_pin_ref(MDRequestRef& r, CInode *ref, vector<CDentry*>& trace); | |
393 | void request_finish(MDRequestRef& mdr); | |
394 | void request_forward(MDRequestRef& mdr, mds_rank_t mds, int port=0); | |
395 | void dispatch_request(MDRequestRef& mdr); | |
396 | void request_drop_foreign_locks(MDRequestRef& mdr); | |
397 | void request_drop_non_rdlocks(MDRequestRef& r); | |
398 | void request_drop_locks(MDRequestRef& r); | |
399 | void request_cleanup(MDRequestRef& r); | |
400 | ||
401 | void request_kill(MDRequestRef& r); // called when session closes | |
402 | ||
403 | // journal/snap helpers | |
404 | CInode *pick_inode_snap(CInode *in, snapid_t follows); | |
405 | CInode *cow_inode(CInode *in, snapid_t last); | |
406 | void journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, CDentry *dn, | |
407 | snapid_t follows=CEPH_NOSNAP, | |
408 | CInode **pcow_inode=0, CDentry::linkage_t *dnl=0); | |
409 | void journal_cow_inode(MutationRef& mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP, | |
410 | CInode **pcow_inode=0); | |
411 | void journal_dirty_inode(MutationImpl *mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP); | |
412 | ||
413 | void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first, | |
414 | int linkunlink, SnapRealm *prealm); | |
94b18763 | 415 | void _project_rstat_inode_to_frag(CInode::mempool_inode & inode, snapid_t ofirst, snapid_t last, |
7c673cae FG |
416 | CDir *parent, int linkunlink, bool update_inode); |
417 | void project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat, | |
418 | snapid_t ofirst, snapid_t last, | |
419 | CInode *pin, bool cow_head); | |
a8e16298 | 420 | void broadcast_quota_to_client(CInode *in, client_t exclude_ct = -1, bool quota_change = false); |
7c673cae FG |
421 | void predirty_journal_parents(MutationRef mut, EMetaBlob *blob, |
422 | CInode *in, CDir *parent, | |
423 | int flags, int linkunlink=0, | |
424 | snapid_t follows=CEPH_NOSNAP); | |
425 | ||
426 | // slaves | |
427 | void add_uncommitted_master(metareqid_t reqid, LogSegment *ls, set<mds_rank_t> &slaves, bool safe=false) { | |
428 | uncommitted_masters[reqid].ls = ls; | |
429 | uncommitted_masters[reqid].slaves = slaves; | |
430 | uncommitted_masters[reqid].safe = safe; | |
431 | } | |
11fdf7f2 | 432 | void wait_for_uncommitted_master(metareqid_t reqid, MDSContext *c) { |
7c673cae FG |
433 | uncommitted_masters[reqid].waiters.push_back(c); |
434 | } | |
435 | bool have_uncommitted_master(metareqid_t reqid, mds_rank_t from) { | |
436 | auto p = uncommitted_masters.find(reqid); | |
437 | return p != uncommitted_masters.end() && p->second.slaves.count(from) > 0; | |
438 | } | |
439 | void log_master_commit(metareqid_t reqid); | |
440 | void logged_master_update(metareqid_t reqid); | |
441 | void _logged_master_commit(metareqid_t reqid); | |
442 | void committed_master_slave(metareqid_t r, mds_rank_t from); | |
443 | void finish_committed_masters(); | |
444 | ||
e306af50 TL |
445 | void add_uncommitted_slave(metareqid_t reqid, LogSegment*, mds_rank_t, MDSlaveUpdate *su=nullptr); |
446 | void wait_for_uncommitted_slave(metareqid_t reqid, MDSContext *c) { | |
447 | uncommitted_slaves.at(reqid).waiters.push_back(c); | |
448 | } | |
449 | void finish_uncommitted_slave(metareqid_t reqid, bool assert_exist=true); | |
450 | MDSlaveUpdate* get_uncommitted_slave(metareqid_t reqid, mds_rank_t master); | |
7c673cae FG |
451 | void _logged_slave_commit(mds_rank_t from, metareqid_t reqid); |
452 | ||
7c673cae FG |
453 | void set_recovery_set(set<mds_rank_t>& s); |
454 | void handle_mds_failure(mds_rank_t who); | |
455 | void handle_mds_recovery(mds_rank_t who); | |
456 | ||
7c673cae FG |
457 | void recalc_auth_bits(bool replay); |
458 | void remove_inode_recursive(CInode *in); | |
459 | ||
460 | bool is_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) { | |
461 | auto p = ambiguous_slave_updates.find(master); | |
462 | return p != ambiguous_slave_updates.end() && p->second.count(reqid); | |
463 | } | |
464 | void add_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) { | |
465 | ambiguous_slave_updates[master].insert(reqid); | |
466 | } | |
467 | void remove_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) { | |
468 | auto p = ambiguous_slave_updates.find(master); | |
469 | auto q = p->second.find(reqid); | |
11fdf7f2 | 470 | ceph_assert(q != p->second.end()); |
7c673cae FG |
471 | p->second.erase(q); |
472 | if (p->second.empty()) | |
473 | ambiguous_slave_updates.erase(p); | |
474 | } | |
475 | ||
476 | void add_rollback(metareqid_t reqid, mds_rank_t master) { | |
11fdf7f2 | 477 | resolve_need_rollback[reqid] = master; |
7c673cae | 478 | } |
e306af50 | 479 | void finish_rollback(metareqid_t reqid, MDRequestRef& mdr); |
7c673cae FG |
480 | |
481 | // ambiguous imports | |
482 | void add_ambiguous_import(dirfrag_t base, const vector<dirfrag_t>& bounds); | |
483 | void add_ambiguous_import(CDir *base, const set<CDir*>& bounds); | |
484 | bool have_ambiguous_import(dirfrag_t base) { | |
485 | return my_ambiguous_imports.count(base); | |
486 | } | |
487 | void get_ambiguous_import_bounds(dirfrag_t base, vector<dirfrag_t>& bounds) { | |
11fdf7f2 | 488 | ceph_assert(my_ambiguous_imports.count(base)); |
7c673cae FG |
489 | bounds = my_ambiguous_imports[base]; |
490 | } | |
491 | void cancel_ambiguous_import(CDir *); | |
492 | void finish_ambiguous_import(dirfrag_t dirino); | |
11fdf7f2 | 493 | void resolve_start(MDSContext *resolve_done_); |
7c673cae | 494 | void send_resolves(); |
7c673cae FG |
495 | void maybe_send_pending_resolves() { |
496 | if (resolves_pending) | |
497 | send_subtree_resolves(); | |
498 | } | |
499 | ||
500 | void _move_subtree_map_bound(dirfrag_t df, dirfrag_t oldparent, dirfrag_t newparent, | |
501 | map<dirfrag_t,vector<dirfrag_t> >& subtrees); | |
502 | ESubtreeMap *create_subtree_map(); | |
503 | ||
7c673cae | 504 | void clean_open_file_lists(); |
11fdf7f2 TL |
505 | void dump_openfiles(Formatter *f); |
506 | bool dump_inode(Formatter *f, uint64_t number); | |
7c673cae | 507 | |
11fdf7f2 | 508 | void rejoin_start(MDSContext *rejoin_done_); |
7c673cae FG |
509 | void rejoin_gather_finish(); |
510 | void rejoin_send_rejoins(); | |
511 | void rejoin_export_caps(inodeno_t ino, client_t client, const cap_reconnect_t& icr, | |
11fdf7f2 | 512 | int target=-1, bool drop_path=false) { |
28e407b8 AA |
513 | auto& ex = cap_exports[ino]; |
514 | ex.first = target; | |
11fdf7f2 TL |
515 | auto &_icr = ex.second[client] = icr; |
516 | if (drop_path) | |
517 | _icr.path.clear(); | |
7c673cae FG |
518 | } |
519 | void rejoin_recovered_caps(inodeno_t ino, client_t client, const cap_reconnect_t& icr, | |
11fdf7f2 TL |
520 | mds_rank_t frommds=MDS_RANK_NONE, bool drop_path=false) { |
521 | auto &_icr = cap_imports[ino][client][frommds] = icr; | |
522 | if (drop_path) | |
523 | _icr.path.clear(); | |
7c673cae | 524 | } |
28e407b8 AA |
525 | void rejoin_recovered_client(client_t client, const entity_inst_t& inst) { |
526 | rejoin_client_map.emplace(client, inst); | |
527 | } | |
11fdf7f2 TL |
528 | bool rejoin_has_cap_reconnect(inodeno_t ino) const { |
529 | return cap_imports.count(ino); | |
530 | } | |
531 | void add_replay_ino_alloc(inodeno_t ino) { | |
532 | cap_imports_missing.insert(ino); // avoid opening ino during cache rejoin | |
533 | } | |
7c673cae FG |
534 | const cap_reconnect_t *get_replay_cap_reconnect(inodeno_t ino, client_t client) { |
535 | if (cap_imports.count(ino) && | |
536 | cap_imports[ino].count(client) && | |
537 | cap_imports[ino][client].count(MDS_RANK_NONE)) { | |
538 | return &cap_imports[ino][client][MDS_RANK_NONE]; | |
539 | } | |
540 | return NULL; | |
541 | } | |
542 | void remove_replay_cap_reconnect(inodeno_t ino, client_t client) { | |
11fdf7f2 TL |
543 | ceph_assert(cap_imports[ino].size() == 1); |
544 | ceph_assert(cap_imports[ino][client].size() == 1); | |
7c673cae FG |
545 | cap_imports.erase(ino); |
546 | } | |
11fdf7f2 | 547 | void wait_replay_cap_reconnect(inodeno_t ino, MDSContext *c) { |
7c673cae FG |
548 | cap_reconnect_waiters[ino].push_back(c); |
549 | } | |
550 | ||
7c673cae FG |
551 | void add_reconnected_cap(client_t client, inodeno_t ino, const cap_reconnect_t& icr) { |
552 | reconnected_cap_info_t &info = reconnected_caps[ino][client]; | |
553 | info.realm_ino = inodeno_t(icr.capinfo.snaprealm); | |
554 | info.snap_follows = icr.snap_follows; | |
555 | } | |
11fdf7f2 | 556 | void set_reconnected_dirty_caps(client_t client, inodeno_t ino, int dirty, bool snapflush) { |
7c673cae FG |
557 | reconnected_cap_info_t &info = reconnected_caps[ino][client]; |
558 | info.dirty_caps |= dirty; | |
11fdf7f2 TL |
559 | if (snapflush) |
560 | info.snapflush = snapflush; | |
7c673cae FG |
561 | } |
562 | void add_reconnected_snaprealm(client_t client, inodeno_t ino, snapid_t seq) { | |
563 | reconnected_snaprealms[ino][client] = seq; | |
564 | } | |
565 | ||
7c673cae | 566 | void rejoin_open_ino_finish(inodeno_t ino, int ret); |
11fdf7f2 | 567 | void rejoin_prefetch_ino_finish(inodeno_t ino, int ret); |
28e407b8 | 568 | void rejoin_open_sessions_finish(map<client_t,pair<Session*,uint64_t> >& session_map); |
7c673cae FG |
569 | bool process_imported_caps(); |
570 | void choose_lock_states_and_reconnect_caps(); | |
571 | void prepare_realm_split(SnapRealm *realm, client_t client, inodeno_t ino, | |
9f95a23c TL |
572 | map<client_t,ref_t<MClientSnap>>& splits); |
573 | void prepare_realm_merge(SnapRealm *realm, SnapRealm *parent_realm, map<client_t,ref_t<MClientSnap>>& splits); | |
574 | void send_snaps(map<client_t,ref_t<MClientSnap>>& splits); | |
7c673cae | 575 | Capability* rejoin_import_cap(CInode *in, client_t client, const cap_reconnect_t& icr, mds_rank_t frommds); |
11fdf7f2 | 576 | void finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snapid_t seq, |
9f95a23c | 577 | map<client_t,ref_t<MClientSnap>>& updates); |
a8e16298 | 578 | Capability* try_reconnect_cap(CInode *in, Session *session); |
7c673cae FG |
579 | void export_remaining_imported_caps(); |
580 | ||
7c673cae FG |
581 | void do_cap_import(Session *session, CInode *in, Capability *cap, |
582 | uint64_t p_cap_id, ceph_seq_t p_seq, ceph_seq_t p_mseq, | |
583 | int peer, int p_flags); | |
584 | void do_delayed_cap_imports(); | |
585 | void rebuild_need_snapflush(CInode *head_in, SnapRealm *realm, client_t client, | |
586 | snapid_t snap_follows); | |
11fdf7f2 | 587 | void open_snaprealms(); |
7c673cae FG |
588 | |
589 | bool open_undef_inodes_dirfrags(); | |
590 | void opened_undef_inode(CInode *in); | |
591 | void opened_undef_dirfrag(CDir *dir) { | |
592 | rejoin_undef_dirfrags.erase(dir); | |
593 | } | |
594 | ||
595 | void reissue_all_caps(); | |
7c673cae | 596 | |
7c673cae FG |
597 | void start_files_to_recover(); |
598 | void do_file_recover(); | |
599 | void queue_file_recover(CInode *in); | |
600 | void _queued_file_recover_cow(CInode *in, MutationRef& mut); | |
601 | ||
92f5a8d4 | 602 | void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map); |
7c673cae FG |
603 | |
604 | // debug | |
605 | void log_stat(); | |
606 | ||
607 | // root inode | |
608 | CInode *get_root() { return root; } | |
609 | CInode *get_myin() { return myin; } | |
610 | ||
7c673cae FG |
611 | size_t get_cache_size() { return lru.lru_get_size(); } |
612 | ||
613 | // trimming | |
a8e16298 | 614 | std::pair<bool, uint64_t> trim(uint64_t count=0); |
9f95a23c | 615 | |
7c673cae FG |
616 | bool trim_non_auth_subtree(CDir *directory); |
617 | void standby_trim_segment(LogSegment *ls); | |
618 | void try_trim_non_auth_subtree(CDir *dir); | |
619 | bool can_trim_non_auth_dirfrag(CDir *dir) { | |
620 | return my_ambiguous_imports.count((dir)->dirfrag()) == 0 && | |
621 | uncommitted_slave_rename_olddir.count(dir->inode) == 0; | |
622 | } | |
623 | ||
624 | /** | |
625 | * For all unreferenced inodes, dirs, dentries below an inode, compose | |
626 | * expiry messages. This is used when giving up all replicas of entities | |
627 | * for an MDS peer in the 'stopping' state, such that the peer can | |
628 | * empty its cache and finish shutting down. | |
629 | * | |
630 | * We have to make sure we're only expiring un-referenced items to | |
631 | * avoid interfering with ongoing stray-movement (we can't distinguish | |
632 | * between the "moving my strays" and "waiting for my cache to empty" | |
633 | * phases within 'stopping') | |
634 | * | |
635 | * @return false if we completed cleanly, true if caller should stop | |
636 | * expiring because we hit something with refs. | |
637 | */ | |
11fdf7f2 | 638 | bool expire_recursive(CInode *in, expiremap& expiremap); |
7c673cae FG |
639 | |
640 | void trim_client_leases(); | |
641 | void check_memory_usage(); | |
642 | ||
7c673cae FG |
643 | void shutdown_start(); |
644 | void shutdown_check(); | |
645 | bool shutdown_pass(); | |
7c673cae | 646 | bool shutdown(); // clear cache (ie at shutodwn) |
f64942e4 AA |
647 | bool shutdown_export_strays(); |
648 | void shutdown_export_stray_finish(inodeno_t ino) { | |
649 | if (shutdown_exporting_strays.erase(ino)) | |
650 | shutdown_export_strays(); | |
651 | } | |
7c673cae | 652 | |
7c673cae FG |
653 | // inode_map |
654 | bool have_inode(vinodeno_t vino) { | |
b32b8144 FG |
655 | if (vino.snapid == CEPH_NOSNAP) |
656 | return inode_map.count(vino.ino) ? true : false; | |
657 | else | |
658 | return snap_inode_map.count(vino) ? true : false; | |
7c673cae FG |
659 | } |
660 | bool have_inode(inodeno_t ino, snapid_t snap=CEPH_NOSNAP) { | |
661 | return have_inode(vinodeno_t(ino, snap)); | |
662 | } | |
663 | CInode* get_inode(vinodeno_t vino) { | |
b32b8144 FG |
664 | if (vino.snapid == CEPH_NOSNAP) { |
665 | auto p = inode_map.find(vino.ino); | |
666 | if (p != inode_map.end()) | |
667 | return p->second; | |
668 | } else { | |
669 | auto p = snap_inode_map.find(vino); | |
670 | if (p != snap_inode_map.end()) | |
671 | return p->second; | |
672 | } | |
7c673cae FG |
673 | return NULL; |
674 | } | |
675 | CInode* get_inode(inodeno_t ino, snapid_t s=CEPH_NOSNAP) { | |
676 | return get_inode(vinodeno_t(ino, s)); | |
677 | } | |
11fdf7f2 TL |
678 | CInode* lookup_snap_inode(vinodeno_t vino) { |
679 | auto p = snap_inode_map.lower_bound(vino); | |
680 | if (p != snap_inode_map.end() && | |
681 | p->second->ino() == vino.ino && p->second->first <= vino.snapid) | |
682 | return p->second; | |
683 | return NULL; | |
684 | } | |
7c673cae FG |
685 | |
686 | CDir* get_dirfrag(dirfrag_t df) { | |
687 | CInode *in = get_inode(df.ino); | |
688 | if (!in) | |
689 | return NULL; | |
690 | return in->get_dirfrag(df.frag); | |
691 | } | |
11fdf7f2 | 692 | CDir* get_dirfrag(inodeno_t ino, std::string_view dn) { |
7c673cae FG |
693 | CInode *in = get_inode(ino); |
694 | if (!in) | |
695 | return NULL; | |
696 | frag_t fg = in->pick_dirfrag(dn); | |
697 | return in->get_dirfrag(fg); | |
698 | } | |
699 | CDir* get_force_dirfrag(dirfrag_t df, bool replay) { | |
700 | CInode *diri = get_inode(df.ino); | |
701 | if (!diri) | |
702 | return NULL; | |
703 | CDir *dir = force_dir_fragment(diri, df.frag, replay); | |
704 | if (!dir) | |
705 | dir = diri->get_dirfrag(df.frag); | |
706 | return dir; | |
707 | } | |
708 | ||
11fdf7f2 | 709 | MDSCacheObject *get_object(const MDSCacheObjectInfo &info); |
7c673cae | 710 | |
7c673cae FG |
711 | void add_inode(CInode *in); |
712 | ||
713 | void remove_inode(CInode *in); | |
9f95a23c | 714 | |
7c673cae | 715 | void touch_dentry(CDentry *dn) { |
31f18b77 FG |
716 | if (dn->state_test(CDentry::STATE_BOTTOMLRU)) { |
717 | bottom_lru.lru_midtouch(dn); | |
718 | } else { | |
719 | if (dn->is_auth()) | |
720 | lru.lru_touch(dn); | |
721 | else | |
722 | lru.lru_midtouch(dn); | |
723 | } | |
7c673cae FG |
724 | } |
725 | void touch_dentry_bottom(CDentry *dn) { | |
31f18b77 FG |
726 | if (dn->state_test(CDentry::STATE_BOTTOMLRU)) |
727 | return; | |
7c673cae | 728 | lru.lru_bottouch(dn); |
7c673cae | 729 | } |
7c673cae | 730 | |
7c673cae FG |
731 | // truncate |
732 | void truncate_inode(CInode *in, LogSegment *ls); | |
733 | void _truncate_inode(CInode *in, LogSegment *ls); | |
734 | void truncate_inode_finish(CInode *in, LogSegment *ls); | |
735 | void truncate_inode_logged(CInode *in, MutationRef& mut); | |
736 | ||
737 | void add_recovered_truncate(CInode *in, LogSegment *ls); | |
738 | void remove_recovered_truncate(CInode *in, LogSegment *ls); | |
739 | void start_recovered_truncates(); | |
740 | ||
9f95a23c TL |
741 | // purge unsafe inodes |
742 | void start_purge_inodes(); | |
743 | void purge_inodes(const interval_set<inodeno_t>& i, LogSegment *ls); | |
7c673cae | 744 | |
7c673cae FG |
745 | CDir *get_auth_container(CDir *in); |
746 | CDir *get_export_container(CDir *dir); | |
747 | void find_nested_exports(CDir *dir, set<CDir*>& s); | |
748 | void find_nested_exports_under(CDir *import, CDir *dir, set<CDir*>& s); | |
749 | ||
7c673cae FG |
750 | void init_layouts(); |
751 | void create_unlinked_system_inode(CInode *in, inodeno_t ino, | |
752 | int mode) const; | |
753 | CInode *create_system_inode(inodeno_t ino, int mode); | |
754 | CInode *create_root_inode(); | |
755 | ||
756 | void create_empty_hierarchy(MDSGather *gather); | |
757 | void create_mydir_hierarchy(MDSGather *gather); | |
758 | ||
759 | bool is_open() { return open; } | |
11fdf7f2 | 760 | void wait_for_open(MDSContext *c) { |
7c673cae FG |
761 | waiting_for_open.push_back(c); |
762 | } | |
763 | ||
11fdf7f2 | 764 | void open_root_inode(MDSContext *c); |
7c673cae | 765 | void open_root(); |
11fdf7f2 TL |
766 | void open_mydir_inode(MDSContext *c); |
767 | void open_mydir_frag(MDSContext *c); | |
7c673cae FG |
768 | void populate_mydir(); |
769 | ||
11fdf7f2 | 770 | void _create_system_file(CDir *dir, std::string_view name, CInode *in, MDSContext *fin); |
7c673cae | 771 | void _create_system_file_finish(MutationRef& mut, CDentry *dn, |
11fdf7f2 | 772 | version_t dpv, MDSContext *fin); |
7c673cae | 773 | |
11fdf7f2 | 774 | void open_foreign_mdsdir(inodeno_t ino, MDSContext *c); |
7c673cae FG |
775 | CDir *get_stray_dir(CInode *in); |
776 | CDentry *get_or_create_stray_dentry(CInode *in); | |
777 | ||
7c673cae FG |
778 | /** |
779 | * Find the given dentry (and whether it exists or not), its ancestors, | |
780 | * and get them all into memory and usable on this MDS. This function | |
781 | * makes a best-effort attempt to load everything; if it needs to | |
782 | * go away and do something then it will put the request on a waitlist. | |
783 | * It prefers the mdr, then the req, then the fin. (At least one of these | |
784 | * must be non-null.) | |
785 | * | |
786 | * At least one of the params mdr, req, and fin must be non-null. | |
787 | * | |
788 | * @param mdr The MDRequest associated with the path. Can be null. | |
11fdf7f2 | 789 | * @param cf A MDSContextFactory for waiter building. |
7c673cae | 790 | * @param path The path to traverse to. |
9f95a23c TL |
791 | * |
792 | * @param flags Specifies different lookup behaviors. | |
793 | * By default, path_traverse() forwards the request to the auth MDS if that | |
794 | * is appropriate (ie, if it doesn't know the contents of a directory). | |
795 | * MDS_TRAVERSE_DISCOVER: Instead of forwarding request, path_traverse() | |
796 | * attempts to look up the path from a different MDS (and bring them into | |
797 | * its cache as replicas). | |
798 | * MDS_TRAVERSE_PATH_LOCKED: path_traverse() will procceed when xlocked | |
799 | * dentry is encountered. | |
800 | * MDS_TRAVERSE_WANT_DENTRY: Caller wants tail dentry. Add a null dentry if | |
801 | * tail dentry does not exist. return 0 even tail dentry is null. | |
802 | * MDS_TRAVERSE_WANT_AUTH: Always forward request to auth MDS of target inode | |
803 | * or auth MDS of tail dentry (MDS_TRAVERSE_WANT_DENTRY is set). | |
804 | * | |
7c673cae FG |
805 | * @param pdnvec Data return parameter -- on success, contains a |
806 | * vector of dentries. On failure, is either empty or contains the | |
807 | * full trace of traversable dentries. | |
808 | * @param pin Data return parameter -- if successful, points to the inode | |
809 | * associated with filepath. If unsuccessful, is null. | |
7c673cae FG |
810 | * |
811 | * @returns 0 on success, 1 on "not done yet", 2 on "forwarding", -errno otherwise. | |
812 | * If it returns 1, the requester associated with this call has been placed | |
813 | * on the appropriate waitlist, and it should unwind itself and back out. | |
814 | * If it returns 2 the request has been forwarded, and again the requester | |
815 | * should unwind itself and back out. | |
816 | */ | |
9f95a23c TL |
817 | int path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, |
818 | const filepath& path, int flags, | |
819 | vector<CDentry*> *pdnvec, CInode **pin=nullptr); | |
7c673cae FG |
820 | |
821 | CInode *cache_traverse(const filepath& path); | |
822 | ||
11fdf7f2 | 823 | void open_remote_dirfrag(CInode *diri, frag_t fg, MDSContext *fin); |
7c673cae FG |
824 | CInode *get_dentry_inode(CDentry *dn, MDRequestRef& mdr, bool projected=false); |
825 | ||
826 | bool parallel_fetch(map<inodeno_t,filepath>& pathmap, set<inodeno_t>& missing); | |
827 | bool parallel_fetch_traverse_dir(inodeno_t ino, filepath& path, | |
828 | set<CDir*>& fetch_queue, set<inodeno_t>& missing, | |
829 | C_GatherBuilder &gather_bld); | |
830 | ||
11fdf7f2 | 831 | void open_remote_dentry(CDentry *dn, bool projected, MDSContext *fin, |
7c673cae | 832 | bool want_xlocked=false); |
11fdf7f2 | 833 | void _open_remote_dentry_finish(CDentry *dn, inodeno_t ino, MDSContext *fin, |
7c673cae FG |
834 | bool want_xlocked, int r); |
835 | ||
836 | void make_trace(vector<CDentry*>& trace, CInode *in); | |
837 | ||
7c673cae | 838 | void kick_open_ino_peers(mds_rank_t who); |
11fdf7f2 | 839 | void open_ino(inodeno_t ino, int64_t pool, MDSContext *fin, |
f91f0fd5 TL |
840 | bool want_replica=true, bool want_xlocked=false, |
841 | vector<inode_backpointer_t> *ancestors_hint=nullptr, | |
842 | mds_rank_t auth_hint=MDS_RANK_NONE); | |
7c673cae | 843 | |
9f95a23c TL |
844 | void find_ino_peers(inodeno_t ino, MDSContext *c, |
845 | mds_rank_t hint=MDS_RANK_NONE, bool path_locked=false); | |
7c673cae | 846 | void _do_find_ino_peer(find_ino_peer_info_t& fip); |
9f95a23c TL |
847 | void handle_find_ino(const cref_t<MMDSFindIno> &m); |
848 | void handle_find_ino_reply(const cref_t<MMDSFindInoReply> &m); | |
7c673cae FG |
849 | void kick_find_ino_peers(mds_rank_t who); |
850 | ||
11fdf7f2 TL |
851 | SnapRealm *get_global_snaprealm() const { return global_snaprealm; } |
852 | void create_global_snaprealm(); | |
853 | void do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool notify_clients=true); | |
854 | void send_snap_update(CInode *in, version_t stid, int snap_op); | |
9f95a23c | 855 | void handle_snap_update(const cref_t<MMDSSnapUpdate> &m); |
11fdf7f2 | 856 | void notify_global_snaprealm_update(int snap_op); |
7c673cae FG |
857 | |
858 | // -- stray -- | |
7c673cae FG |
859 | void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin); |
860 | uint64_t get_num_strays() const { return stray_manager.get_num_strays(); } | |
861 | ||
7c673cae | 862 | // == messages == |
9f95a23c | 863 | void dispatch(const cref_t<Message> &m); |
7c673cae | 864 | |
9f95a23c TL |
865 | void encode_replica_dir(CDir *dir, mds_rank_t to, bufferlist& bl); |
866 | void encode_replica_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl); | |
867 | void encode_replica_inode(CInode *in, mds_rank_t to, bufferlist& bl, | |
b32b8144 | 868 | uint64_t features); |
7c673cae | 869 | |
9f95a23c TL |
870 | void decode_replica_dir(CDir *&dir, bufferlist::const_iterator& p, CInode *diri, mds_rank_t from, MDSContext::vec& finished); |
871 | void decode_replica_dentry(CDentry *&dn, bufferlist::const_iterator& p, CDir *dir, MDSContext::vec& finished); | |
872 | void decode_replica_inode(CInode *&in, bufferlist::const_iterator& p, CDentry *dn, MDSContext::vec& finished); | |
7c673cae | 873 | |
9f95a23c TL |
874 | void encode_replica_stray(CDentry *straydn, mds_rank_t who, bufferlist& bl); |
875 | void decode_replica_stray(CDentry *&straydn, const bufferlist &bl, mds_rank_t from); | |
7c673cae FG |
876 | |
877 | // -- namespace -- | |
9f95a23c TL |
878 | void encode_remote_dentry_link(CDentry::linkage_t *dnl, bufferlist& bl); |
879 | void decode_remote_dentry_link(CDir *dir, CDentry *dn, bufferlist::const_iterator& p); | |
7c673cae FG |
880 | void send_dentry_link(CDentry *dn, MDRequestRef& mdr); |
881 | void send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr); | |
a8e16298 | 882 | |
11fdf7f2 | 883 | void wait_for_uncommitted_fragment(dirfrag_t dirfrag, MDSContext *c) { |
e306af50 TL |
884 | uncommitted_fragments.at(dirfrag).waiters.push_back(c); |
885 | } | |
886 | bool is_any_uncommitted_fragment() const { | |
887 | return !uncommitted_fragments.empty(); | |
7c673cae | 888 | } |
f91f0fd5 | 889 | void wait_for_uncommitted_fragments(MDSContext* finisher); |
e306af50 TL |
890 | void rollback_uncommitted_fragments(); |
891 | ||
7c673cae FG |
892 | void split_dir(CDir *dir, int byn); |
893 | void merge_dir(CInode *diri, frag_t fg); | |
7c673cae FG |
894 | |
895 | void find_stale_fragment_freeze(); | |
896 | void fragment_freeze_inc_num_waiters(CDir *dir); | |
897 | bool fragment_are_all_frozen(CDir *dir); | |
898 | int get_num_fragmenting_dirs() { return fragments.size(); } | |
899 | ||
900 | // -- updates -- | |
901 | //int send_inode_updates(CInode *in); | |
902 | //void handle_inode_update(MInodeUpdate *m); | |
903 | ||
904 | int send_dir_updates(CDir *in, bool bcast=false); | |
9f95a23c | 905 | void handle_dir_update(const cref_t<MDirUpdate> &m); |
7c673cae FG |
906 | |
907 | // -- cache expiration -- | |
9f95a23c | 908 | void handle_cache_expire(const cref_t<MCacheExpire> &m); |
7c673cae FG |
909 | void process_delayed_expire(CDir *dir); |
910 | void discard_delayed_expire(CDir *dir); | |
911 | ||
eafe8130 | 912 | // -- mdsmap -- |
f6b5b4d7 | 913 | void handle_mdsmap(const MDSMap &mdsmap, const MDSMap &oldmap); |
eafe8130 | 914 | |
9f95a23c | 915 | int dump_cache() { return dump_cache({}, nullptr); } |
11fdf7f2 | 916 | int dump_cache(std::string_view filename); |
31f18b77 | 917 | int dump_cache(Formatter *f); |
11fdf7f2 | 918 | void dump_tree(CInode *in, const int cur_depth, const int max_depth, Formatter *f); |
7c673cae | 919 | |
f64942e4 | 920 | void cache_status(Formatter *f); |
181888fb | 921 | |
7c673cae FG |
922 | void dump_resolve_status(Formatter *f) const; |
923 | void dump_rejoin_status(Formatter *f) const; | |
924 | ||
925 | // == crap fns == | |
7c673cae | 926 | void show_cache(); |
81eedcae | 927 | void show_subtrees(int dbl=10, bool force_print=false); |
7c673cae FG |
928 | |
929 | CInode *hack_pick_random_inode() { | |
11fdf7f2 | 930 | ceph_assert(!inode_map.empty()); |
7c673cae | 931 | int n = rand() % inode_map.size(); |
b32b8144 | 932 | auto p = inode_map.begin(); |
7c673cae FG |
933 | while (n--) ++p; |
934 | return p->second; | |
935 | } | |
936 | ||
11fdf7f2 | 937 | void flush_dentry(std::string_view path, Context *fin); |
7c673cae FG |
938 | /** |
939 | * Create and start an OP_ENQUEUE_SCRUB | |
940 | */ | |
11fdf7f2 | 941 | void enqueue_scrub(std::string_view path, std::string_view tag, |
7c673cae FG |
942 | bool force, bool recursive, bool repair, |
943 | Formatter *f, Context *fin); | |
944 | void repair_inode_stats(CInode *diri); | |
945 | void repair_dirfrag_stats(CDir *dir); | |
11fdf7f2 | 946 | void upgrade_inode_snaprealm(CInode *in); |
7c673cae | 947 | |
9f95a23c TL |
948 | // my master |
949 | MDSRank *mds; | |
950 | ||
951 | // -- my cache -- | |
952 | LRU lru; // dentry lru for expiring items from cache | |
953 | LRU bottom_lru; // dentries that should be trimmed ASAP | |
954 | ||
955 | DecayRate decayrate; | |
956 | ||
957 | int num_shadow_inodes = 0; | |
958 | ||
959 | int num_inodes_with_caps = 0; | |
960 | ||
961 | unsigned max_dir_commit_size; | |
962 | ||
963 | file_layout_t default_file_layout; | |
964 | file_layout_t default_log_layout; | |
965 | ||
966 | // -- client leases -- | |
967 | static constexpr std::size_t client_lease_pools = 3; | |
968 | std::array<float, client_lease_pools> client_lease_durations{5.0, 30.0, 300.0}; | |
969 | ||
970 | // -- client caps -- | |
971 | uint64_t last_cap_id = 0; | |
972 | ||
973 | map<ceph_tid_t, discover_info_t> discovers; | |
974 | ceph_tid_t discover_last_tid = 0; | |
975 | ||
976 | // waiters | |
977 | map<int, map<inodeno_t, MDSContext::vec > > waiting_for_base_ino; | |
978 | ||
979 | map<inodeno_t,map<client_t, reconnected_cap_info_t> > reconnected_caps; // inode -> client -> snap_follows,realmino | |
980 | map<inodeno_t,map<client_t, snapid_t> > reconnected_snaprealms; // realmino -> client -> realmseq | |
981 | ||
982 | // realm inodes | |
983 | set<CInode*> rejoin_pending_snaprealms; | |
984 | // cap imports. delayed snap parent opens. | |
985 | map<client_t,set<CInode*> > delayed_imported_caps; | |
986 | ||
987 | // subsystems | |
988 | std::unique_ptr<Migrator> migrator; | |
989 | ||
990 | bool did_shutdown_log_cap = false; | |
991 | ||
992 | map<ceph_tid_t, find_ino_peer_info_t> find_ino_peer; | |
993 | ceph_tid_t find_ino_peer_last_tid = 0; | |
994 | ||
995 | // delayed cache expire | |
996 | map<CDir*, expiremap> delayed_expire; // subtree root -> expire msg | |
997 | ||
7c673cae FG |
998 | /* Because exports may fail, this set lets us keep track of inodes that need exporting. */ |
999 | std::set<CInode *> export_pin_queue; | |
eafe8130 | 1000 | std::set<CInode *> export_pin_delayed_queue; |
f6b5b4d7 TL |
1001 | std::set<CInode *> rand_ephemeral_pins; |
1002 | std::set<CInode *> dist_ephemeral_pins; | |
11fdf7f2 TL |
1003 | |
1004 | OpenFileTable open_file_table; | |
eafe8130 | 1005 | |
f6b5b4d7 TL |
1006 | double export_ephemeral_random_max = 0.0; |
1007 | ||
9f95a23c TL |
1008 | protected: |
1009 | // track master requests whose slaves haven't acknowledged commit | |
1010 | struct umaster { | |
1011 | umaster() {} | |
1012 | set<mds_rank_t> slaves; | |
1013 | LogSegment *ls = nullptr; | |
1014 | MDSContext::vec waiters; | |
1015 | bool safe = false; | |
1016 | bool committing = false; | |
1017 | bool recovering = false; | |
1018 | }; | |
1019 | ||
e306af50 TL |
1020 | struct uslave { |
1021 | uslave() {} | |
1022 | mds_rank_t master; | |
1023 | LogSegment *ls = nullptr; | |
1024 | MDSlaveUpdate *su = nullptr; | |
1025 | MDSContext::vec waiters; | |
1026 | }; | |
1027 | ||
9f95a23c TL |
1028 | struct open_ino_info_t { |
1029 | open_ino_info_t() {} | |
1030 | vector<inode_backpointer_t> ancestors; | |
1031 | set<mds_rank_t> checked; | |
1032 | mds_rank_t checking = MDS_RANK_NONE; | |
1033 | mds_rank_t auth_hint = MDS_RANK_NONE; | |
1034 | bool check_peers = true; | |
1035 | bool fetch_backtrace = true; | |
1036 | bool discover = false; | |
1037 | bool want_replica = false; | |
1038 | bool want_xlocked = false; | |
1039 | version_t tid = 0; | |
1040 | int64_t pool = -1; | |
1041 | int last_err = 0; | |
1042 | MDSContext::vec waiters; | |
1043 | }; | |
1044 | ||
1045 | friend struct C_MDC_OpenInoTraverseDir; | |
1046 | friend struct C_MDC_OpenInoParentOpened; | |
1047 | friend struct C_MDC_RetryScanStray; | |
1048 | ||
1049 | friend class C_IO_MDC_OpenInoBacktraceFetched; | |
1050 | friend class C_MDC_Join; | |
1051 | friend class C_MDC_RespondInternalRequest; | |
1052 | ||
1053 | friend class ESlaveUpdate; | |
1054 | friend class ECommitted; | |
1055 | ||
1056 | void set_readonly() { readonly = true; } | |
1057 | ||
1058 | void handle_resolve(const cref_t<MMDSResolve> &m); | |
1059 | void handle_resolve_ack(const cref_t<MMDSResolveAck> &m); | |
1060 | void process_delayed_resolve(); | |
1061 | void discard_delayed_resolve(mds_rank_t who); | |
1062 | void maybe_resolve_finish(); | |
1063 | void disambiguate_my_imports(); | |
1064 | void disambiguate_other_imports(); | |
1065 | void trim_unlinked_inodes(); | |
9f95a23c TL |
1066 | |
1067 | void send_slave_resolves(); | |
1068 | void send_subtree_resolves(); | |
1069 | void maybe_finish_slave_resolve(); | |
1070 | ||
1071 | void rejoin_walk(CDir *dir, const ref_t<MMDSCacheRejoin> &rejoin); | |
1072 | void handle_cache_rejoin(const cref_t<MMDSCacheRejoin> &m); | |
1073 | void handle_cache_rejoin_weak(const cref_t<MMDSCacheRejoin> &m); | |
1074 | CInode* rejoin_invent_inode(inodeno_t ino, snapid_t last); | |
1075 | CDir* rejoin_invent_dirfrag(dirfrag_t df); | |
1076 | void handle_cache_rejoin_strong(const cref_t<MMDSCacheRejoin> &m); | |
1077 | void rejoin_scour_survivor_replicas(mds_rank_t from, const cref_t<MMDSCacheRejoin> &ack, | |
1078 | set<vinodeno_t>& acked_inodes, | |
1079 | set<SimpleLock *>& gather_locks); | |
1080 | void handle_cache_rejoin_ack(const cref_t<MMDSCacheRejoin> &m); | |
1081 | void rejoin_send_acks(); | |
1082 | void rejoin_trim_undef_inodes(); | |
1083 | void maybe_send_pending_rejoins() { | |
1084 | if (rejoins_pending) | |
1085 | rejoin_send_rejoins(); | |
1086 | } | |
1087 | ||
1088 | void touch_inode(CInode *in) { | |
1089 | if (in->get_parent_dn()) | |
1090 | touch_dentry(in->get_projected_parent_dn()); | |
1091 | } | |
1092 | ||
1093 | void inode_remove_replica(CInode *in, mds_rank_t rep, bool rejoin, | |
1094 | set<SimpleLock *>& gather_locks); | |
1095 | void dentry_remove_replica(CDentry *dn, mds_rank_t rep, set<SimpleLock *>& gather_locks); | |
1096 | ||
1097 | void rename_file(CDentry *srcdn, CDentry *destdn); | |
1098 | ||
1099 | void _open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err); | |
1100 | void _open_ino_parent_opened(inodeno_t ino, int ret); | |
1101 | void _open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int err); | |
1102 | void _open_ino_fetch_dir(inodeno_t ino, const cref_t<MMDSOpenIno> &m, CDir *dir, bool parent); | |
1103 | int open_ino_traverse_dir(inodeno_t ino, const cref_t<MMDSOpenIno> &m, | |
1104 | const vector<inode_backpointer_t>& ancestors, | |
1105 | bool discover, bool want_xlocked, mds_rank_t *hint); | |
1106 | void open_ino_finish(inodeno_t ino, open_ino_info_t& info, int err); | |
1107 | void do_open_ino(inodeno_t ino, open_ino_info_t& info, int err); | |
1108 | void do_open_ino_peer(inodeno_t ino, open_ino_info_t& info); | |
1109 | void handle_open_ino(const cref_t<MMDSOpenIno> &m, int err=0); | |
1110 | void handle_open_ino_reply(const cref_t<MMDSOpenInoReply> &m); | |
1111 | ||
1112 | void scan_stray_dir(dirfrag_t next=dirfrag_t()); | |
1113 | // -- replicas -- | |
1114 | void handle_discover(const cref_t<MDiscover> &dis); | |
1115 | void handle_discover_reply(const cref_t<MDiscoverReply> &m); | |
1116 | void handle_dentry_link(const cref_t<MDentryLink> &m); | |
1117 | void handle_dentry_unlink(const cref_t<MDentryUnlink> &m); | |
1118 | ||
1119 | int dump_cache(std::string_view fn, Formatter *f); | |
1120 | ||
1121 | void flush_dentry_work(MDRequestRef& mdr); | |
1122 | /** | |
1123 | * Resolve path to a dentry and pass it onto the ScrubStack. | |
1124 | * | |
1125 | * TODO: return enough information to the original mdr formatter | |
1126 | * and completion that they can subsequeuntly check the progress of | |
1127 | * this scrub (we won't block them on a whole scrub as it can take a very | |
1128 | * long time) | |
1129 | */ | |
1130 | void enqueue_scrub_work(MDRequestRef& mdr); | |
1131 | void recursive_scrub_finish(const ScrubHeaderRef& header); | |
1132 | void repair_inode_stats_work(MDRequestRef& mdr); | |
1133 | void repair_dirfrag_stats_work(MDRequestRef& mdr); | |
1134 | void upgrade_inode_snaprealm_work(MDRequestRef& mdr); | |
1135 | ||
1136 | ceph::unordered_map<inodeno_t,CInode*> inode_map; // map of head inodes by ino | |
1137 | map<vinodeno_t, CInode*> snap_inode_map; // map of snap inodes by ino | |
1138 | CInode *root = nullptr; // root inode | |
1139 | CInode *myin = nullptr; // .ceph/mds%d dir | |
1140 | ||
1141 | bool readonly = false; | |
1142 | ||
1143 | int stray_index = 0; | |
1144 | ||
1145 | set<CInode*> base_inodes; | |
1146 | ||
1147 | std::unique_ptr<PerfCounters> logger; | |
1148 | ||
1149 | Filer filer; | |
1150 | bool exceeded_size_limit = false; | |
1151 | std::array<xlist<ClientLease*>, client_lease_pools> client_leases{}; | |
1152 | ||
1153 | /* subtree keys and each tree's non-recursive nested subtrees (the "bounds") */ | |
1154 | map<CDir*,set<CDir*> > subtrees; | |
1155 | map<CInode*,list<pair<CDir*,CDir*> > > projected_subtree_renames; // renamed ino -> target dir | |
1156 | ||
1157 | // -- requests -- | |
1158 | ceph::unordered_map<metareqid_t, MDRequestRef> active_requests; | |
1159 | ||
1160 | // -- recovery -- | |
1161 | set<mds_rank_t> recovery_set; | |
1162 | ||
1163 | // [resolve] | |
1164 | // from EImportStart w/o EImportFinish during journal replay | |
1165 | map<dirfrag_t, vector<dirfrag_t> > my_ambiguous_imports; | |
1166 | // from MMDSResolves | |
1167 | map<mds_rank_t, map<dirfrag_t, vector<dirfrag_t> > > other_ambiguous_imports; | |
1168 | ||
9f95a23c TL |
1169 | map<CInode*, int> uncommitted_slave_rename_olddir; // slave: preserve the non-auth dir until seeing commit. |
1170 | map<CInode*, int> uncommitted_slave_unlink; // slave: preserve the unlinked inode until seeing commit. | |
1171 | ||
1172 | map<metareqid_t, umaster> uncommitted_masters; // master: req -> slave set | |
e306af50 | 1173 | map<metareqid_t, uslave> uncommitted_slaves; // slave: preserve the slave req until seeing commit. |
9f95a23c TL |
1174 | |
1175 | set<metareqid_t> pending_masters; | |
1176 | map<int, set<metareqid_t> > ambiguous_slave_updates; | |
1177 | ||
1178 | bool resolves_pending = false; | |
1179 | set<mds_rank_t> resolve_gather; // nodes i need resolves from | |
1180 | set<mds_rank_t> resolve_ack_gather; // nodes i need a resolve_ack from | |
1181 | set<version_t> resolve_snapclient_commits; | |
1182 | map<metareqid_t, mds_rank_t> resolve_need_rollback; // rollbacks i'm writing to the journal | |
1183 | map<mds_rank_t, cref_t<MMDSResolve>> delayed_resolve; | |
1184 | ||
1185 | // [rejoin] | |
1186 | bool rejoins_pending = false; | |
1187 | set<mds_rank_t> rejoin_gather; // nodes from whom i need a rejoin | |
1188 | set<mds_rank_t> rejoin_sent; // nodes i sent a rejoin to | |
1189 | set<mds_rank_t> rejoin_ack_sent; // nodes i sent a rejoin to | |
1190 | set<mds_rank_t> rejoin_ack_gather; // nodes from whom i need a rejoin ack | |
1191 | map<mds_rank_t,map<inodeno_t,map<client_t,Capability::Import> > > rejoin_imported_caps; | |
1192 | map<inodeno_t,pair<mds_rank_t,map<client_t,Capability::Export> > > rejoin_slave_exports; | |
1193 | ||
1194 | map<client_t,entity_inst_t> rejoin_client_map; | |
1195 | map<client_t,client_metadata_t> rejoin_client_metadata_map; | |
1196 | map<client_t,pair<Session*,uint64_t> > rejoin_session_map; | |
1197 | ||
1198 | map<inodeno_t,pair<mds_rank_t,map<client_t,cap_reconnect_t> > > cap_exports; // ino -> target, client -> capex | |
1199 | ||
1200 | map<inodeno_t,map<client_t,map<mds_rank_t,cap_reconnect_t> > > cap_imports; // ino -> client -> frommds -> capex | |
1201 | set<inodeno_t> cap_imports_missing; | |
1202 | map<inodeno_t, MDSContext::vec > cap_reconnect_waiters; | |
1203 | int cap_imports_num_opening = 0; | |
1204 | ||
1205 | set<CInode*> rejoin_undef_inodes; | |
1206 | set<CInode*> rejoin_potential_updated_scatterlocks; | |
1207 | set<CDir*> rejoin_undef_dirfrags; | |
1208 | map<mds_rank_t, set<CInode*> > rejoin_unlinked_inodes; | |
1209 | ||
1210 | vector<CInode*> rejoin_recover_q, rejoin_check_q; | |
1211 | list<SimpleLock*> rejoin_eval_locks; | |
1212 | MDSContext::vec rejoin_waiters; | |
1213 | ||
1214 | std::unique_ptr<MDSContext> rejoin_done; | |
1215 | std::unique_ptr<MDSContext> resolve_done; | |
1216 | ||
1217 | ceph_tid_t open_ino_last_tid = 0; | |
1218 | map<inodeno_t,open_ino_info_t> opening_inodes; | |
1219 | ||
1220 | StrayManager stray_manager; | |
1221 | ||
1222 | private: | |
1223 | // -- fragmenting -- | |
1224 | struct ufragment { | |
1225 | ufragment() {} | |
1226 | int bits = 0; | |
1227 | bool committed = false; | |
1228 | LogSegment *ls = nullptr; | |
1229 | MDSContext::vec waiters; | |
1230 | frag_vec_t old_frags; | |
1231 | bufferlist rollback; | |
1232 | }; | |
1233 | ||
1234 | struct fragment_info_t { | |
1235 | fragment_info_t() {} | |
1236 | bool is_fragmenting() { return !resultfrags.empty(); } | |
1237 | uint64_t get_tid() { return mdr ? mdr->reqid.tid : 0; } | |
1238 | int bits; | |
1239 | std::vector<CDir*> dirs; | |
1240 | std::vector<CDir*> resultfrags; | |
1241 | MDRequestRef mdr; | |
1242 | set<mds_rank_t> notify_ack_waiting; | |
1243 | bool finishing = false; | |
1244 | ||
1245 | // for deadlock detection | |
1246 | bool all_frozen = false; | |
1247 | utime_t last_cum_auth_pins_change; | |
1248 | int last_cum_auth_pins = 0; | |
1249 | int num_remote_waiters = 0; // number of remote authpin waiters | |
1250 | }; | |
1251 | ||
1252 | typedef map<dirfrag_t,fragment_info_t>::iterator fragment_info_iterator; | |
1253 | ||
1254 | friend class EFragment; | |
1255 | friend class C_MDC_FragmentFrozen; | |
1256 | friend class C_MDC_FragmentMarking; | |
1257 | friend class C_MDC_FragmentPrep; | |
1258 | friend class C_MDC_FragmentStore; | |
1259 | friend class C_MDC_FragmentCommit; | |
1260 | friend class C_IO_MDC_FragmentPurgeOld; | |
1261 | ||
1262 | // -- subtrees -- | |
1263 | static const unsigned int SUBTREES_COUNT_THRESHOLD = 5; | |
1264 | static const unsigned int SUBTREES_DEPTH_THRESHOLD = 5; | |
1265 | ||
1266 | CInode *get_stray() { | |
1267 | return strays[stray_index]; | |
1268 | } | |
1269 | ||
1270 | void identify_files_to_recover(); | |
1271 | ||
1272 | std::pair<bool, uint64_t> trim_lru(uint64_t count, expiremap& expiremap); | |
1273 | bool trim_dentry(CDentry *dn, expiremap& expiremap); | |
1274 | void trim_dirfrag(CDir *dir, CDir *con, expiremap& expiremap); | |
1275 | bool trim_inode(CDentry *dn, CInode *in, CDir *con, expiremap&); | |
1276 | void send_expire_messages(expiremap& expiremap); | |
1277 | void trim_non_auth(); // trim out trimmable non-auth items | |
1278 | ||
1279 | void adjust_dir_fragments(CInode *diri, frag_t basefrag, int bits, | |
1280 | std::vector<CDir*>* frags, MDSContext::vec& waiters, bool replay); | |
1281 | void adjust_dir_fragments(CInode *diri, | |
1282 | const std::vector<CDir*>& srcfrags, | |
1283 | frag_t basefrag, int bits, | |
1284 | std::vector<CDir*>* resultfrags, | |
1285 | MDSContext::vec& waiters, | |
1286 | bool replay); | |
1287 | CDir *force_dir_fragment(CInode *diri, frag_t fg, bool replay=true); | |
1288 | void get_force_dirfrag_bound_set(const vector<dirfrag_t>& dfs, set<CDir*>& bounds); | |
1289 | ||
1290 | bool can_fragment(CInode *diri, const std::vector<CDir*>& dirs); | |
1291 | void fragment_freeze_dirs(const std::vector<CDir*>& dirs); | |
1292 | void fragment_mark_and_complete(MDRequestRef& mdr); | |
1293 | void fragment_frozen(MDRequestRef& mdr, int r); | |
1294 | void fragment_unmark_unfreeze_dirs(const std::vector<CDir*>& dirs); | |
1295 | void fragment_drop_locks(fragment_info_t &info); | |
1296 | void fragment_maybe_finish(const fragment_info_iterator& it); | |
1297 | void dispatch_fragment_dir(MDRequestRef& mdr); | |
1298 | void _fragment_logged(MDRequestRef& mdr); | |
1299 | void _fragment_stored(MDRequestRef& mdr); | |
1300 | void _fragment_committed(dirfrag_t f, const MDRequestRef& mdr); | |
1301 | void _fragment_old_purged(dirfrag_t f, int bits, const MDRequestRef& mdr); | |
1302 | ||
1303 | void handle_fragment_notify(const cref_t<MMDSFragmentNotify> &m); | |
1304 | void handle_fragment_notify_ack(const cref_t<MMDSFragmentNotifyAck> &m); | |
1305 | ||
1306 | void add_uncommitted_fragment(dirfrag_t basedirfrag, int bits, const frag_vec_t& old_frag, | |
1307 | LogSegment *ls, bufferlist *rollback=NULL); | |
1308 | void finish_uncommitted_fragment(dirfrag_t basedirfrag, int op); | |
1309 | void rollback_uncommitted_fragment(dirfrag_t basedirfrag, frag_vec_t&& old_frags); | |
1310 | ||
1311 | uint64_t cache_memory_limit; | |
1312 | double cache_reservation; | |
1313 | double cache_health_threshold; | |
9f95a23c TL |
1314 | std::array<CInode *, NUM_STRAY> strays{}; // my stray dir |
1315 | ||
f6b5b4d7 TL |
1316 | bool export_ephemeral_distributed_config; |
1317 | bool export_ephemeral_random_config; | |
1318 | ||
9f95a23c TL |
1319 | // File size recovery |
1320 | RecoveryQueue recovery_queue; | |
1321 | ||
1322 | // shutdown | |
1323 | set<inodeno_t> shutdown_exporting_strays; | |
1324 | pair<dirfrag_t, string> shutdown_export_next; | |
1325 | ||
1326 | bool opening_root = false, open = false; | |
1327 | MDSContext::vec waiting_for_open; | |
1328 | ||
1329 | // -- snaprealms -- | |
1330 | SnapRealm *global_snaprealm = nullptr; | |
1331 | ||
1332 | map<dirfrag_t, ufragment> uncommitted_fragments; | |
1333 | ||
1334 | map<dirfrag_t,fragment_info_t> fragments; | |
1335 | ||
1336 | DecayCounter trim_counter; | |
1337 | ||
eafe8130 TL |
1338 | std::thread upkeeper; |
1339 | ceph::mutex upkeep_mutex = ceph::make_mutex("MDCache::upkeep_mutex"); | |
1340 | ceph::condition_variable upkeep_cvar; | |
1341 | time upkeep_last_trim = time::min(); | |
92f5a8d4 | 1342 | time upkeep_last_release = time::min(); |
eafe8130 | 1343 | std::atomic<bool> upkeep_trim_shutdown{false}; |
7c673cae FG |
1344 | }; |
1345 | ||
1346 | class C_MDS_RetryRequest : public MDSInternalContext { | |
1347 | MDCache *cache; | |
1348 | MDRequestRef mdr; | |
1349 | public: | |
1350 | C_MDS_RetryRequest(MDCache *c, MDRequestRef& r); | |
1351 | void finish(int r) override; | |
1352 | }; | |
1353 | ||
1354 | #endif |