1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #ifndef CEPH_MDCACHE_H
18 #define CEPH_MDCACHE_H
20 #include <boost/utility/string_view.hpp>
22 #include "include/types.h"
23 #include "include/filepath.h"
24 #include "include/elist.h"
26 #include "osdc/Filer.h"
30 #include "include/Context.h"
31 #include "events/EMetaBlob.h"
32 #include "RecoveryQueue.h"
33 #include "StrayManager.h"
34 #include "MDSContext.h"
38 #include "messages/MClientRequest.h"
39 #include "messages/MMDSSlaveRequest.h"
52 class MMDSCacheRejoin
;
61 struct MMDSFindInoReply
;
63 struct MMDSOpenInoReply
;
67 class MMDSSlaveRequest
;
70 class MMDSFragmentNotify
;
76 // How many inodes currently in stray dentries
78 // How many stray dentries are currently delayed for purge due to refs
79 l_mdc_num_strays_delayed
,
80 // How many stray dentries are currently being enqueued for purge
81 l_mdc_num_strays_enqueuing
,
83 // How many dentries have ever been added to stray dir
85 // How many dentries have been passed on to PurgeQueue
86 l_mdc_strays_enqueued
,
87 // How many strays have been reintegrated?
88 l_mdc_strays_reintegrated
,
89 // How many strays have been migrated?
90 l_mdc_strays_migrated
,
92 // How many inode sizes currently being recovered
93 l_mdc_num_recovering_processing
,
94 // How many inodes currently waiting to have size recovered
95 l_mdc_num_recovering_enqueued
,
96 // How many inodes waiting with elevated priority for recovery
97 l_mdc_num_recovering_prioritized
,
98 // How many inodes ever started size recovery
99 l_mdc_recovery_started
,
100 // How many inodes ever completed size recovery
101 l_mdc_recovery_completed
,
103 l_mdss_ireq_enqueue_scrub
,
104 l_mdss_ireq_exportdir
,
106 l_mdss_ireq_fragmentdir
,
107 l_mdss_ireq_fragstats
,
108 l_mdss_ireq_inodestats
,
114 // flags for predirty_journal_parents()
115 static const int PREDIRTY_PRIMARY
= 1; // primary dn, adjust nested accounting
116 static const int PREDIRTY_DIR
= 2; // update parent dir mtime/size
117 static const int PREDIRTY_SHALLOW
= 4; // only go to immediate parent (for easier rollback)
121 using clock
= ceph::coarse_mono_clock
;
122 using time
= ceph::coarse_mono_time
;
128 LRU lru
; // dentry lru for expiring items from cache
129 LRU bottom_lru
; // dentries that should be trimmed ASAP
131 ceph::unordered_map
<inodeno_t
,CInode
*> inode_map
; // map of head inodes by ino
132 map
<vinodeno_t
, CInode
*> snap_inode_map
; // map of snap inodes by ino
133 CInode
*root
; // root inode
134 CInode
*myin
; // .ceph/mds%d dir
137 void set_readonly() { readonly
= true; }
139 CInode
*strays
[NUM_STRAY
]; // my stray dir
142 CInode
*get_stray() {
143 return strays
[stray_index
];
146 set
<CInode
*> base_inodes
;
148 std::unique_ptr
<PerfCounters
> logger
;
152 bool exceeded_size_limit
;
155 uint64_t cache_inode_limit
;
156 uint64_t cache_memory_limit
;
157 double cache_reservation
;
158 double cache_health_threshold
;
161 uint64_t cache_limit_inodes(void) {
162 return cache_inode_limit
;
164 uint64_t cache_limit_memory(void) {
165 return cache_memory_limit
;
167 double cache_toofull_ratio(void) const {
168 double inode_reserve
= cache_inode_limit
*(1.0-cache_reservation
);
169 double memory_reserve
= cache_memory_limit
*(1.0-cache_reservation
);
170 return fmax(0.0, fmax((cache_size()-memory_reserve
)/memory_reserve
, cache_inode_limit
== 0 ? 0.0 : (CInode::count()-inode_reserve
)/inode_reserve
));
172 bool cache_toofull(void) const {
173 return cache_toofull_ratio() > 0.0;
175 uint64_t cache_size(void) const {
176 return mempool::get_pool(mempool::mds_co::id
).allocated_bytes();
178 bool cache_overfull(void) const {
179 return (cache_inode_limit
> 0 && CInode::count() > cache_inode_limit
*cache_health_threshold
) || (cache_size() > cache_memory_limit
*cache_health_threshold
);
182 void advance_stray() {
183 stray_index
= (stray_index
+1)%NUM_STRAY
;
186 void activate_stray_manager();
189 * Call this when you know that a CDentry is ready to be passed
190 * on to StrayManager (i.e. this is a stray you've just created)
192 void notify_stray(CDentry
*dn
) {
193 assert(dn
->get_dir()->get_inode()->is_stray());
194 stray_manager
.eval_stray(dn
);
197 void maybe_eval_stray(CInode
*in
, bool delay
=false);
198 void clear_dirty_bits_for_stray(CInode
* diri
);
200 bool is_readonly() { return readonly
; }
201 void force_readonly();
205 int num_shadow_inodes
;
207 int num_inodes_with_caps
;
209 unsigned max_dir_commit_size
;
211 static file_layout_t
gen_default_file_layout(const MDSMap
&mdsmap
);
212 static file_layout_t
gen_default_log_layout(const MDSMap
&mdsmap
);
214 file_layout_t default_file_layout
;
215 file_layout_t default_log_layout
;
217 void register_perfcounters();
219 // -- client leases --
221 static const int client_lease_pools
= 3;
222 float client_lease_durations
[client_lease_pools
];
224 xlist
<ClientLease
*> client_leases
[client_lease_pools
];
226 void touch_client_lease(ClientLease
*r
, int pool
, utime_t ttl
) {
227 client_leases
[pool
].push_back(&r
->item_lease
);
231 void notify_stray_removed()
233 stray_manager
.notify_stray_removed();
236 void notify_stray_created()
238 stray_manager
.notify_stray_created();
241 void eval_remote(CDentry
*dn
)
243 stray_manager
.eval_remote(dn
);
247 uint64_t last_cap_id
;
252 struct discover_info_t
{
264 tid(0), mds(-1), snap(CEPH_NOSNAP
), basei(NULL
),
265 want_base_dir(false), want_xlocked(false) {}
268 basei
->put(MDSCacheObject::PIN_DISCOVERBASE
);
270 void pin_base(CInode
*b
) {
272 basei
->get(MDSCacheObject::PIN_DISCOVERBASE
);
276 map
<ceph_tid_t
, discover_info_t
> discovers
;
277 ceph_tid_t discover_last_tid
;
279 void _send_discover(discover_info_t
& dis
);
280 discover_info_t
& _create_discover(mds_rank_t mds
) {
281 ceph_tid_t t
= ++discover_last_tid
;
282 discover_info_t
& d
= discovers
[t
];
289 map
<int, map
<inodeno_t
, list
<MDSInternalContextBase
*> > > waiting_for_base_ino
;
291 void discover_base_ino(inodeno_t want_ino
, MDSInternalContextBase
*onfinish
, mds_rank_t from
=MDS_RANK_NONE
);
292 void discover_dir_frag(CInode
*base
, frag_t approx_fg
, MDSInternalContextBase
*onfinish
,
293 mds_rank_t from
=MDS_RANK_NONE
);
294 void discover_path(CInode
*base
, snapid_t snap
, filepath want_path
, MDSInternalContextBase
*onfinish
,
295 bool want_xlocked
=false, mds_rank_t from
=MDS_RANK_NONE
);
296 void discover_path(CDir
*base
, snapid_t snap
, filepath want_path
, MDSInternalContextBase
*onfinish
,
297 bool want_xlocked
=false);
298 void kick_discovers(mds_rank_t who
); // after a failure.
303 /* subtree keys and each tree's non-recursive nested subtrees (the "bounds") */
304 map
<CDir
*,set
<CDir
*> > subtrees
;
305 map
<CInode
*,list
<pair
<CDir
*,CDir
*> > > projected_subtree_renames
; // renamed ino -> target dir
307 // adjust subtree auth specification
309 // imports/exports/nested_exports
310 // join/split subtrees as appropriate
312 bool is_subtrees() { return !subtrees
.empty(); }
313 void list_subtrees(list
<CDir
*>& ls
);
314 void adjust_subtree_auth(CDir
*root
, mds_authority_t auth
, bool adjust_pop
=true);
315 void adjust_subtree_auth(CDir
*root
, mds_rank_t a
, mds_rank_t b
=CDIR_AUTH_UNKNOWN
) {
316 adjust_subtree_auth(root
, mds_authority_t(a
,b
));
318 void adjust_bounded_subtree_auth(CDir
*dir
, set
<CDir
*>& bounds
, mds_authority_t auth
);
319 void adjust_bounded_subtree_auth(CDir
*dir
, set
<CDir
*>& bounds
, mds_rank_t a
) {
320 adjust_bounded_subtree_auth(dir
, bounds
, mds_authority_t(a
, CDIR_AUTH_UNKNOWN
));
322 void adjust_bounded_subtree_auth(CDir
*dir
, vector
<dirfrag_t
>& bounds
, mds_authority_t auth
);
323 void adjust_bounded_subtree_auth(CDir
*dir
, vector
<dirfrag_t
>& bounds
, mds_rank_t a
) {
324 adjust_bounded_subtree_auth(dir
, bounds
, mds_authority_t(a
, CDIR_AUTH_UNKNOWN
));
326 void map_dirfrag_set(list
<dirfrag_t
>& dfs
, set
<CDir
*>& result
);
327 void try_subtree_merge(CDir
*root
);
328 void try_subtree_merge_at(CDir
*root
, set
<CInode
*> *to_eval
, bool adjust_pop
=true);
329 void subtree_merge_writebehind_finish(CInode
*in
, MutationRef
& mut
);
330 void eval_subtree_root(CInode
*diri
);
331 CDir
*get_subtree_root(CDir
*dir
);
332 CDir
*get_projected_subtree_root(CDir
*dir
);
333 bool is_leaf_subtree(CDir
*dir
) {
334 assert(subtrees
.count(dir
));
335 return subtrees
[dir
].empty();
337 void remove_subtree(CDir
*dir
);
338 bool is_subtree(CDir
*root
) {
339 return subtrees
.count(root
);
341 void get_subtree_bounds(CDir
*root
, set
<CDir
*>& bounds
);
342 void get_wouldbe_subtree_bounds(CDir
*root
, set
<CDir
*>& bounds
);
343 void verify_subtree_bounds(CDir
*root
, const set
<CDir
*>& bounds
);
344 void verify_subtree_bounds(CDir
*root
, const list
<dirfrag_t
>& bounds
);
346 void project_subtree_rename(CInode
*diri
, CDir
*olddir
, CDir
*newdir
);
347 void adjust_subtree_after_rename(CInode
*diri
, CDir
*olddir
, bool pop
);
349 void get_auth_subtrees(set
<CDir
*>& s
);
350 void get_fullauth_subtrees(set
<CDir
*>& s
);
353 int num_subtrees_fullauth();
354 int num_subtrees_fullnonauth();
358 // delayed cache expire
359 map
<CDir
*, map
<mds_rank_t
, MCacheExpire
*> > delayed_expire
; // subtree root -> expire msg
363 ceph::unordered_map
<metareqid_t
, MDRequestRef
> active_requests
;
366 int get_num_client_requests();
368 MDRequestRef
request_start(MClientRequest
*req
);
369 MDRequestRef
request_start_slave(metareqid_t rid
, __u32 attempt
, Message
*m
);
370 MDRequestRef
request_start_internal(int op
);
371 bool have_request(metareqid_t rid
) {
372 return active_requests
.count(rid
);
374 MDRequestRef
request_get(metareqid_t rid
);
375 void request_pin_ref(MDRequestRef
& r
, CInode
*ref
, vector
<CDentry
*>& trace
);
376 void request_finish(MDRequestRef
& mdr
);
377 void request_forward(MDRequestRef
& mdr
, mds_rank_t mds
, int port
=0);
378 void dispatch_request(MDRequestRef
& mdr
);
379 void request_drop_foreign_locks(MDRequestRef
& mdr
);
380 void request_drop_non_rdlocks(MDRequestRef
& r
);
381 void request_drop_locks(MDRequestRef
& r
);
382 void request_cleanup(MDRequestRef
& r
);
384 void request_kill(MDRequestRef
& r
); // called when session closes
386 // journal/snap helpers
387 CInode
*pick_inode_snap(CInode
*in
, snapid_t follows
);
388 CInode
*cow_inode(CInode
*in
, snapid_t last
);
389 void journal_cow_dentry(MutationImpl
*mut
, EMetaBlob
*metablob
, CDentry
*dn
,
390 snapid_t follows
=CEPH_NOSNAP
,
391 CInode
**pcow_inode
=0, CDentry::linkage_t
*dnl
=0);
392 void journal_cow_inode(MutationRef
& mut
, EMetaBlob
*metablob
, CInode
*in
, snapid_t follows
=CEPH_NOSNAP
,
393 CInode
**pcow_inode
=0);
394 void journal_dirty_inode(MutationImpl
*mut
, EMetaBlob
*metablob
, CInode
*in
, snapid_t follows
=CEPH_NOSNAP
);
396 void project_rstat_inode_to_frag(CInode
*cur
, CDir
*parent
, snapid_t first
,
397 int linkunlink
, SnapRealm
*prealm
);
398 void _project_rstat_inode_to_frag(CInode::mempool_inode
& inode
, snapid_t ofirst
, snapid_t last
,
399 CDir
*parent
, int linkunlink
, bool update_inode
);
400 void project_rstat_frag_to_inode(nest_info_t
& rstat
, nest_info_t
& accounted_rstat
,
401 snapid_t ofirst
, snapid_t last
,
402 CInode
*pin
, bool cow_head
);
403 void broadcast_quota_to_client(CInode
*in
, client_t exclude_ct
= -1);
404 void predirty_journal_parents(MutationRef mut
, EMetaBlob
*blob
,
405 CInode
*in
, CDir
*parent
,
406 int flags
, int linkunlink
=0,
407 snapid_t follows
=CEPH_NOSNAP
);
410 void add_uncommitted_master(metareqid_t reqid
, LogSegment
*ls
, set
<mds_rank_t
> &slaves
, bool safe
=false) {
411 uncommitted_masters
[reqid
].ls
= ls
;
412 uncommitted_masters
[reqid
].slaves
= slaves
;
413 uncommitted_masters
[reqid
].safe
= safe
;
415 void wait_for_uncommitted_master(metareqid_t reqid
, MDSInternalContextBase
*c
) {
416 uncommitted_masters
[reqid
].waiters
.push_back(c
);
418 bool have_uncommitted_master(metareqid_t reqid
, mds_rank_t from
) {
419 auto p
= uncommitted_masters
.find(reqid
);
420 return p
!= uncommitted_masters
.end() && p
->second
.slaves
.count(from
) > 0;
422 void log_master_commit(metareqid_t reqid
);
423 void logged_master_update(metareqid_t reqid
);
424 void _logged_master_commit(metareqid_t reqid
);
425 void committed_master_slave(metareqid_t r
, mds_rank_t from
);
426 void finish_committed_masters();
428 void _logged_slave_commit(mds_rank_t from
, metareqid_t reqid
);
432 set
<mds_rank_t
> recovery_set
;
435 void set_recovery_set(set
<mds_rank_t
>& s
);
436 void handle_mds_failure(mds_rank_t who
);
437 void handle_mds_recovery(mds_rank_t who
);
441 // from EImportStart w/o EImportFinish during journal replay
442 map
<dirfrag_t
, vector
<dirfrag_t
> > my_ambiguous_imports
;
444 map
<mds_rank_t
, map
<dirfrag_t
, vector
<dirfrag_t
> > > other_ambiguous_imports
;
446 map
<mds_rank_t
, map
<metareqid_t
, MDSlaveUpdate
*> > uncommitted_slave_updates
; // slave: for replay.
447 map
<CInode
*, int> uncommitted_slave_rename_olddir
; // slave: preserve the non-auth dir until seeing commit.
448 map
<CInode
*, int> uncommitted_slave_unlink
; // slave: preserve the unlinked inode until seeing commit.
450 // track master requests whose slaves haven't acknowledged commit
452 set
<mds_rank_t
> slaves
;
454 list
<MDSInternalContextBase
*> waiters
;
458 umaster() : ls(NULL
), safe(false), committing(false), recovering(false) {}
460 map
<metareqid_t
, umaster
> uncommitted_masters
; // master: req -> slave set
462 set
<metareqid_t
> pending_masters
;
463 map
<int, set
<metareqid_t
> > ambiguous_slave_updates
;
465 friend class ESlaveUpdate
;
466 friend class ECommitted
;
468 bool resolves_pending
;
469 set
<mds_rank_t
> resolve_gather
; // nodes i need resolves from
470 set
<mds_rank_t
> resolve_ack_gather
; // nodes i need a resolve_ack from
471 map
<metareqid_t
, mds_rank_t
> need_resolve_rollback
; // rollbacks i'm writing to the journal
472 map
<mds_rank_t
, MMDSResolve
*> delayed_resolve
;
474 void handle_resolve(MMDSResolve
*m
);
475 void handle_resolve_ack(MMDSResolveAck
*m
);
476 void process_delayed_resolve();
477 void discard_delayed_resolve(mds_rank_t who
);
478 void maybe_resolve_finish();
479 void disambiguate_my_imports();
480 void disambiguate_other_imports();
481 void trim_unlinked_inodes();
482 void add_uncommitted_slave_update(metareqid_t reqid
, mds_rank_t master
, MDSlaveUpdate
*);
483 void finish_uncommitted_slave_update(metareqid_t reqid
, mds_rank_t master
);
484 MDSlaveUpdate
* get_uncommitted_slave_update(metareqid_t reqid
, mds_rank_t master
);
486 void recalc_auth_bits(bool replay
);
487 void remove_inode_recursive(CInode
*in
);
489 bool is_ambiguous_slave_update(metareqid_t reqid
, mds_rank_t master
) {
490 auto p
= ambiguous_slave_updates
.find(master
);
491 return p
!= ambiguous_slave_updates
.end() && p
->second
.count(reqid
);
493 void add_ambiguous_slave_update(metareqid_t reqid
, mds_rank_t master
) {
494 ambiguous_slave_updates
[master
].insert(reqid
);
496 void remove_ambiguous_slave_update(metareqid_t reqid
, mds_rank_t master
) {
497 auto p
= ambiguous_slave_updates
.find(master
);
498 auto q
= p
->second
.find(reqid
);
499 assert(q
!= p
->second
.end());
501 if (p
->second
.empty())
502 ambiguous_slave_updates
.erase(p
);
505 void add_rollback(metareqid_t reqid
, mds_rank_t master
) {
506 need_resolve_rollback
[reqid
] = master
;
508 void finish_rollback(metareqid_t reqid
);
511 void add_ambiguous_import(dirfrag_t base
, const vector
<dirfrag_t
>& bounds
);
512 void add_ambiguous_import(CDir
*base
, const set
<CDir
*>& bounds
);
513 bool have_ambiguous_import(dirfrag_t base
) {
514 return my_ambiguous_imports
.count(base
);
516 void get_ambiguous_import_bounds(dirfrag_t base
, vector
<dirfrag_t
>& bounds
) {
517 assert(my_ambiguous_imports
.count(base
));
518 bounds
= my_ambiguous_imports
[base
];
520 void cancel_ambiguous_import(CDir
*);
521 void finish_ambiguous_import(dirfrag_t dirino
);
522 void resolve_start(MDSInternalContext
*resolve_done_
);
523 void send_resolves();
524 void send_slave_resolves();
525 void send_subtree_resolves();
526 void maybe_send_pending_resolves() {
527 if (resolves_pending
)
528 send_subtree_resolves();
531 void _move_subtree_map_bound(dirfrag_t df
, dirfrag_t oldparent
, dirfrag_t newparent
,
532 map
<dirfrag_t
,vector
<dirfrag_t
> >& subtrees
);
533 ESubtreeMap
*create_subtree_map();
536 void clean_open_file_lists();
540 bool rejoins_pending
;
541 set
<mds_rank_t
> rejoin_gather
; // nodes from whom i need a rejoin
542 set
<mds_rank_t
> rejoin_sent
; // nodes i sent a rejoin to
543 set
<mds_rank_t
> rejoin_ack_sent
; // nodes i sent a rejoin to
544 set
<mds_rank_t
> rejoin_ack_gather
; // nodes from whom i need a rejoin ack
545 map
<mds_rank_t
,map
<inodeno_t
,map
<client_t
,Capability::Import
> > > rejoin_imported_caps
;
546 map
<inodeno_t
,pair
<mds_rank_t
,map
<client_t
,Capability::Export
> > > rejoin_slave_exports
;
547 map
<client_t
,entity_inst_t
> rejoin_client_map
;
548 map
<client_t
,pair
<Session
*,uint64_t> > rejoin_session_map
;
550 map
<inodeno_t
,pair
<mds_rank_t
,map
<client_t
,cap_reconnect_t
> > > cap_exports
; // ino -> target, client -> capex
552 map
<inodeno_t
,map
<client_t
,map
<mds_rank_t
,cap_reconnect_t
> > > cap_imports
; // ino -> client -> frommds -> capex
553 set
<inodeno_t
> cap_imports_missing
;
554 map
<inodeno_t
, list
<MDSInternalContextBase
*> > cap_reconnect_waiters
;
555 int cap_imports_num_opening
;
557 set
<CInode
*> rejoin_undef_inodes
;
558 set
<CInode
*> rejoin_potential_updated_scatterlocks
;
559 set
<CDir
*> rejoin_undef_dirfrags
;
560 map
<mds_rank_t
, set
<CInode
*> > rejoin_unlinked_inodes
;
562 vector
<CInode
*> rejoin_recover_q
, rejoin_check_q
;
563 list
<SimpleLock
*> rejoin_eval_locks
;
564 list
<MDSInternalContextBase
*> rejoin_waiters
;
566 void rejoin_walk(CDir
*dir
, MMDSCacheRejoin
*rejoin
);
567 void handle_cache_rejoin(MMDSCacheRejoin
*m
);
568 void handle_cache_rejoin_weak(MMDSCacheRejoin
*m
);
569 CInode
* rejoin_invent_inode(inodeno_t ino
, snapid_t last
);
570 CDir
* rejoin_invent_dirfrag(dirfrag_t df
);
571 void handle_cache_rejoin_strong(MMDSCacheRejoin
*m
);
572 void rejoin_scour_survivor_replicas(mds_rank_t from
, MMDSCacheRejoin
*ack
,
573 set
<vinodeno_t
>& acked_inodes
,
574 set
<SimpleLock
*>& gather_locks
);
575 void handle_cache_rejoin_ack(MMDSCacheRejoin
*m
);
576 void rejoin_send_acks();
577 void rejoin_trim_undef_inodes();
578 void maybe_send_pending_rejoins() {
580 rejoin_send_rejoins();
582 std::unique_ptr
<MDSInternalContext
> rejoin_done
;
583 std::unique_ptr
<MDSInternalContext
> resolve_done
;
585 void rejoin_start(MDSInternalContext
*rejoin_done_
);
586 void rejoin_gather_finish();
587 void rejoin_send_rejoins();
588 void rejoin_export_caps(inodeno_t ino
, client_t client
, const cap_reconnect_t
& icr
,
590 auto& ex
= cap_exports
[ino
];
592 ex
.second
[client
] = icr
;
594 void rejoin_recovered_caps(inodeno_t ino
, client_t client
, const cap_reconnect_t
& icr
,
595 mds_rank_t frommds
=MDS_RANK_NONE
) {
596 cap_imports
[ino
][client
][frommds
] = icr
;
598 void rejoin_recovered_client(client_t client
, const entity_inst_t
& inst
) {
599 rejoin_client_map
.emplace(client
, inst
);
601 const cap_reconnect_t
*get_replay_cap_reconnect(inodeno_t ino
, client_t client
) {
602 if (cap_imports
.count(ino
) &&
603 cap_imports
[ino
].count(client
) &&
604 cap_imports
[ino
][client
].count(MDS_RANK_NONE
)) {
605 return &cap_imports
[ino
][client
][MDS_RANK_NONE
];
609 void remove_replay_cap_reconnect(inodeno_t ino
, client_t client
) {
610 assert(cap_imports
[ino
].size() == 1);
611 assert(cap_imports
[ino
][client
].size() == 1);
612 cap_imports
.erase(ino
);
614 void wait_replay_cap_reconnect(inodeno_t ino
, MDSInternalContextBase
*c
) {
615 cap_reconnect_waiters
[ino
].push_back(c
);
618 // [reconnect/rejoin caps]
619 struct reconnected_cap_info_t
{
621 snapid_t snap_follows
;
623 reconnected_cap_info_t() :
624 realm_ino(0), snap_follows(0), dirty_caps(0) {}
626 map
<inodeno_t
,map
<client_t
, reconnected_cap_info_t
> > reconnected_caps
; // inode -> client -> snap_follows,realmino
627 map
<inodeno_t
,map
<client_t
, snapid_t
> > reconnected_snaprealms
; // realmino -> client -> realmseq
629 void add_reconnected_cap(client_t client
, inodeno_t ino
, const cap_reconnect_t
& icr
) {
630 reconnected_cap_info_t
&info
= reconnected_caps
[ino
][client
];
631 info
.realm_ino
= inodeno_t(icr
.capinfo
.snaprealm
);
632 info
.snap_follows
= icr
.snap_follows
;
634 void set_reconnected_dirty_caps(client_t client
, inodeno_t ino
, int dirty
) {
635 reconnected_cap_info_t
&info
= reconnected_caps
[ino
][client
];
636 info
.dirty_caps
|= dirty
;
638 void add_reconnected_snaprealm(client_t client
, inodeno_t ino
, snapid_t seq
) {
639 reconnected_snaprealms
[ino
][client
] = seq
;
642 friend class C_MDC_RejoinOpenInoFinish
;
643 friend class C_MDC_RejoinSessionsOpened
;
644 void rejoin_open_ino_finish(inodeno_t ino
, int ret
);
645 void rejoin_open_sessions_finish(map
<client_t
,pair
<Session
*,uint64_t> >& session_map
);
646 bool process_imported_caps();
647 void choose_lock_states_and_reconnect_caps();
648 void prepare_realm_split(SnapRealm
*realm
, client_t client
, inodeno_t ino
,
649 map
<client_t
,MClientSnap
*>& splits
);
650 void do_realm_invalidate_and_update_notify(CInode
*in
, int snapop
, bool nosend
=false);
651 void send_snaps(map
<client_t
,MClientSnap
*>& splits
);
652 Capability
* rejoin_import_cap(CInode
*in
, client_t client
, const cap_reconnect_t
& icr
, mds_rank_t frommds
);
653 void finish_snaprealm_reconnect(client_t client
, SnapRealm
*realm
, snapid_t seq
);
654 void try_reconnect_cap(CInode
*in
, Session
*session
);
655 void export_remaining_imported_caps();
657 // cap imports. delayed snap parent opens.
658 // realm inode -> client -> cap inodes needing to split to this realm
659 map
<CInode
*,set
<CInode
*> > missing_snap_parents
;
660 map
<client_t
,set
<CInode
*> > delayed_imported_caps
;
662 void do_cap_import(Session
*session
, CInode
*in
, Capability
*cap
,
663 uint64_t p_cap_id
, ceph_seq_t p_seq
, ceph_seq_t p_mseq
,
664 int peer
, int p_flags
);
665 void do_delayed_cap_imports();
666 void rebuild_need_snapflush(CInode
*head_in
, SnapRealm
*realm
, client_t client
,
667 snapid_t snap_follows
);
668 void check_realm_past_parents(SnapRealm
*realm
, bool reconnect
);
669 void open_snap_parents();
671 bool open_undef_inodes_dirfrags();
672 void opened_undef_inode(CInode
*in
);
673 void opened_undef_dirfrag(CDir
*dir
) {
674 rejoin_undef_dirfrags
.erase(dir
);
677 void reissue_all_caps();
681 friend class Migrator
;
682 friend class MDBalancer
;
684 // StrayManager needs to be able to remove_inode() from us
685 // when it is done purging
686 friend class StrayManager
;
688 // File size recovery
690 RecoveryQueue recovery_queue
;
691 void identify_files_to_recover();
693 void start_files_to_recover();
694 void do_file_recover();
695 void queue_file_recover(CInode
*in
);
696 void _queued_file_recover_cow(CInode
*in
, MutationRef
& mut
);
699 std::unique_ptr
<Migrator
> migrator
;
702 explicit MDCache(MDSRank
*m
, PurgeQueue
&purge_queue_
);
704 void handle_conf_change(const struct md_config_t
*conf
,
705 const std::set
<std::string
> &changed
,
706 const MDSMap
&mds_map
);
712 CInode
*get_root() { return root
; }
713 CInode
*get_myin() { return myin
; }
715 size_t get_cache_size() { return lru
.lru_get_size(); }
718 bool trim(uint64_t count
=0);
720 void trim_lru(uint64_t count
, map
<mds_rank_t
, MCacheExpire
*>& expiremap
);
721 bool trim_dentry(CDentry
*dn
, map
<mds_rank_t
, MCacheExpire
*>& expiremap
);
722 void trim_dirfrag(CDir
*dir
, CDir
*con
,
723 map
<mds_rank_t
, MCacheExpire
*>& expiremap
);
724 bool trim_inode(CDentry
*dn
, CInode
*in
, CDir
*con
,
725 map
<mds_rank_t
,class MCacheExpire
*>& expiremap
);
726 void send_expire_messages(map
<mds_rank_t
, MCacheExpire
*>& expiremap
);
727 void trim_non_auth(); // trim out trimmable non-auth items
729 bool trim_non_auth_subtree(CDir
*directory
);
730 void standby_trim_segment(LogSegment
*ls
);
731 void try_trim_non_auth_subtree(CDir
*dir
);
732 bool can_trim_non_auth_dirfrag(CDir
*dir
) {
733 return my_ambiguous_imports
.count((dir
)->dirfrag()) == 0 &&
734 uncommitted_slave_rename_olddir
.count(dir
->inode
) == 0;
738 * For all unreferenced inodes, dirs, dentries below an inode, compose
739 * expiry messages. This is used when giving up all replicas of entities
740 * for an MDS peer in the 'stopping' state, such that the peer can
741 * empty its cache and finish shutting down.
743 * We have to make sure we're only expiring un-referenced items to
744 * avoid interfering with ongoing stray-movement (we can't distinguish
745 * between the "moving my strays" and "waiting for my cache to empty"
746 * phases within 'stopping')
748 * @return false if we completed cleanly, true if caller should stop
749 * expiring because we hit something with refs.
751 bool expire_recursive(
753 std::map
<mds_rank_t
, MCacheExpire
*>& expiremap
);
755 void trim_client_leases();
756 void check_memory_usage();
758 time last_recall_state
;
762 set
<inodeno_t
> shutdown_exported_strays
;
764 void shutdown_start();
765 void shutdown_check();
766 bool shutdown_pass();
767 bool shutdown_export_strays();
768 bool shutdown(); // clear cache (ie at shutodwn)
770 bool did_shutdown_log_cap
;
773 bool have_inode(vinodeno_t vino
) {
774 if (vino
.snapid
== CEPH_NOSNAP
)
775 return inode_map
.count(vino
.ino
) ? true : false;
777 return snap_inode_map
.count(vino
) ? true : false;
779 bool have_inode(inodeno_t ino
, snapid_t snap
=CEPH_NOSNAP
) {
780 return have_inode(vinodeno_t(ino
, snap
));
782 CInode
* get_inode(vinodeno_t vino
) {
783 if (vino
.snapid
== CEPH_NOSNAP
) {
784 auto p
= inode_map
.find(vino
.ino
);
785 if (p
!= inode_map
.end())
788 auto p
= snap_inode_map
.find(vino
);
789 if (p
!= snap_inode_map
.end())
794 CInode
* get_inode(inodeno_t ino
, snapid_t s
=CEPH_NOSNAP
) {
795 return get_inode(vinodeno_t(ino
, s
));
798 CDir
* get_dirfrag(dirfrag_t df
) {
799 CInode
*in
= get_inode(df
.ino
);
802 return in
->get_dirfrag(df
.frag
);
804 CDir
* get_dirfrag(inodeno_t ino
, boost::string_view dn
) {
805 CInode
*in
= get_inode(ino
);
808 frag_t fg
= in
->pick_dirfrag(dn
);
809 return in
->get_dirfrag(fg
);
811 CDir
* get_force_dirfrag(dirfrag_t df
, bool replay
) {
812 CInode
*diri
= get_inode(df
.ino
);
815 CDir
*dir
= force_dir_fragment(diri
, df
.frag
, replay
);
817 dir
= diri
->get_dirfrag(df
.frag
);
821 MDSCacheObject
*get_object(MDSCacheObjectInfo
&info
);
826 void add_inode(CInode
*in
);
828 void remove_inode(CInode
*in
);
830 void touch_inode(CInode
*in
) {
831 if (in
->get_parent_dn())
832 touch_dentry(in
->get_projected_parent_dn());
835 void touch_dentry(CDentry
*dn
) {
836 if (dn
->state_test(CDentry::STATE_BOTTOMLRU
)) {
837 bottom_lru
.lru_midtouch(dn
);
842 lru
.lru_midtouch(dn
);
845 void touch_dentry_bottom(CDentry
*dn
) {
846 if (dn
->state_test(CDentry::STATE_BOTTOMLRU
))
848 lru
.lru_bottouch(dn
);
852 void inode_remove_replica(CInode
*in
, mds_rank_t rep
, bool rejoin
,
853 set
<SimpleLock
*>& gather_locks
);
854 void dentry_remove_replica(CDentry
*dn
, mds_rank_t rep
, set
<SimpleLock
*>& gather_locks
);
856 void rename_file(CDentry
*srcdn
, CDentry
*destdn
);
860 void truncate_inode(CInode
*in
, LogSegment
*ls
);
861 void _truncate_inode(CInode
*in
, LogSegment
*ls
);
862 void truncate_inode_finish(CInode
*in
, LogSegment
*ls
);
863 void truncate_inode_logged(CInode
*in
, MutationRef
& mut
);
865 void add_recovered_truncate(CInode
*in
, LogSegment
*ls
);
866 void remove_recovered_truncate(CInode
*in
, LogSegment
*ls
);
867 void start_recovered_truncates();
871 CDir
*get_auth_container(CDir
*in
);
872 CDir
*get_export_container(CDir
*dir
);
873 void find_nested_exports(CDir
*dir
, set
<CDir
*>& s
);
874 void find_nested_exports_under(CDir
*import
, CDir
*dir
, set
<CDir
*>& s
);
878 bool opening_root
, open
;
879 list
<MDSInternalContextBase
*> waiting_for_open
;
883 void create_unlinked_system_inode(CInode
*in
, inodeno_t ino
,
885 CInode
*create_system_inode(inodeno_t ino
, int mode
);
886 CInode
*create_root_inode();
888 void create_empty_hierarchy(MDSGather
*gather
);
889 void create_mydir_hierarchy(MDSGather
*gather
);
891 bool is_open() { return open
; }
892 void wait_for_open(MDSInternalContextBase
*c
) {
893 waiting_for_open
.push_back(c
);
896 void open_root_inode(MDSInternalContextBase
*c
);
898 void open_mydir_inode(MDSInternalContextBase
*c
);
899 void open_mydir_frag(MDSInternalContextBase
*c
);
900 void populate_mydir();
902 void _create_system_file(CDir
*dir
, const char *name
, CInode
*in
, MDSInternalContextBase
*fin
);
903 void _create_system_file_finish(MutationRef
& mut
, CDentry
*dn
,
904 version_t dpv
, MDSInternalContextBase
*fin
);
906 void open_foreign_mdsdir(inodeno_t ino
, MDSInternalContextBase
*c
);
907 CDir
*get_stray_dir(CInode
*in
);
908 CDentry
*get_or_create_stray_dentry(CInode
*in
);
910 MDSInternalContextBase
*_get_waiter(MDRequestRef
& mdr
, Message
*req
, MDSInternalContextBase
*fin
);
913 * Find the given dentry (and whether it exists or not), its ancestors,
914 * and get them all into memory and usable on this MDS. This function
915 * makes a best-effort attempt to load everything; if it needs to
916 * go away and do something then it will put the request on a waitlist.
917 * It prefers the mdr, then the req, then the fin. (At least one of these
920 * At least one of the params mdr, req, and fin must be non-null.
922 * @param mdr The MDRequest associated with the path. Can be null.
923 * @param req The Message associated with the path. Can be null.
924 * @param fin The Context associated with the path. Can be null.
925 * @param path The path to traverse to.
926 * @param pdnvec Data return parameter -- on success, contains a
927 * vector of dentries. On failure, is either empty or contains the
928 * full trace of traversable dentries.
929 * @param pin Data return parameter -- if successful, points to the inode
930 * associated with filepath. If unsuccessful, is null.
931 * @param onfail Specifies different lookup failure behaviors. If set to
932 * MDS_TRAVERSE_DISCOVERXLOCK, path_traverse will succeed on null
933 * dentries (instead of returning -ENOENT). If set to
934 * MDS_TRAVERSE_FORWARD, it will forward the request to the auth
935 * MDS if that becomes appropriate (ie, if it doesn't know the contents
936 * of a directory). If set to MDS_TRAVERSE_DISCOVER, it
937 * will attempt to look up the path from a different MDS (and bring them
938 * into its cache as replicas).
940 * @returns 0 on success, 1 on "not done yet", 2 on "forwarding", -errno otherwise.
941 * If it returns 1, the requester associated with this call has been placed
942 * on the appropriate waitlist, and it should unwind itself and back out.
943 * If it returns 2 the request has been forwarded, and again the requester
944 * should unwind itself and back out.
946 int path_traverse(MDRequestRef
& mdr
, Message
*req
, MDSInternalContextBase
*fin
, const filepath
& path
,
947 vector
<CDentry
*> *pdnvec
, CInode
**pin
, int onfail
);
949 CInode
*cache_traverse(const filepath
& path
);
951 void open_remote_dirfrag(CInode
*diri
, frag_t fg
, MDSInternalContextBase
*fin
);
952 CInode
*get_dentry_inode(CDentry
*dn
, MDRequestRef
& mdr
, bool projected
=false);
954 bool parallel_fetch(map
<inodeno_t
,filepath
>& pathmap
, set
<inodeno_t
>& missing
);
955 bool parallel_fetch_traverse_dir(inodeno_t ino
, filepath
& path
,
956 set
<CDir
*>& fetch_queue
, set
<inodeno_t
>& missing
,
957 C_GatherBuilder
&gather_bld
);
959 void open_remote_dentry(CDentry
*dn
, bool projected
, MDSInternalContextBase
*fin
,
960 bool want_xlocked
=false);
961 void _open_remote_dentry_finish(CDentry
*dn
, inodeno_t ino
, MDSInternalContextBase
*fin
,
962 bool want_xlocked
, int r
);
964 void make_trace(vector
<CDentry
*>& trace
, CInode
*in
);
967 struct open_ino_info_t
{
968 vector
<inode_backpointer_t
> ancestors
;
969 set
<mds_rank_t
> checked
;
971 mds_rank_t auth_hint
;
973 bool fetch_backtrace
;
980 list
<MDSInternalContextBase
*> waiters
;
981 open_ino_info_t() : checking(MDS_RANK_NONE
), auth_hint(MDS_RANK_NONE
),
982 check_peers(true), fetch_backtrace(true), discover(false),
983 want_replica(false), want_xlocked(false), tid(0), pool(-1),
986 ceph_tid_t open_ino_last_tid
;
987 map
<inodeno_t
,open_ino_info_t
> opening_inodes
;
989 void _open_ino_backtrace_fetched(inodeno_t ino
, bufferlist
& bl
, int err
);
990 void _open_ino_parent_opened(inodeno_t ino
, int ret
);
991 void _open_ino_traverse_dir(inodeno_t ino
, open_ino_info_t
& info
, int err
);
992 void _open_ino_fetch_dir(inodeno_t ino
, MMDSOpenIno
*m
, CDir
*dir
, bool parent
);
993 int open_ino_traverse_dir(inodeno_t ino
, MMDSOpenIno
*m
,
994 vector
<inode_backpointer_t
>& ancestors
,
995 bool discover
, bool want_xlocked
, mds_rank_t
*hint
);
996 void open_ino_finish(inodeno_t ino
, open_ino_info_t
& info
, int err
);
997 void do_open_ino(inodeno_t ino
, open_ino_info_t
& info
, int err
);
998 void do_open_ino_peer(inodeno_t ino
, open_ino_info_t
& info
);
999 void handle_open_ino(MMDSOpenIno
*m
, int err
=0);
1000 void handle_open_ino_reply(MMDSOpenInoReply
*m
);
1001 friend class C_IO_MDC_OpenInoBacktraceFetched
;
1002 friend struct C_MDC_OpenInoTraverseDir
;
1003 friend struct C_MDC_OpenInoParentOpened
;
1006 void kick_open_ino_peers(mds_rank_t who
);
1007 void open_ino(inodeno_t ino
, int64_t pool
, MDSInternalContextBase
*fin
,
1008 bool want_replica
=true, bool want_xlocked
=false);
1010 // -- find_ino_peer --
1011 struct find_ino_peer_info_t
{
1014 MDSInternalContextBase
*fin
;
1016 mds_rank_t checking
;
1017 set
<mds_rank_t
> checked
;
1019 find_ino_peer_info_t() : tid(0), fin(NULL
), hint(MDS_RANK_NONE
), checking(MDS_RANK_NONE
) {}
1022 map
<ceph_tid_t
, find_ino_peer_info_t
> find_ino_peer
;
1023 ceph_tid_t find_ino_peer_last_tid
;
1025 void find_ino_peers(inodeno_t ino
, MDSInternalContextBase
*c
, mds_rank_t hint
=MDS_RANK_NONE
);
1026 void _do_find_ino_peer(find_ino_peer_info_t
& fip
);
1027 void handle_find_ino(MMDSFindIno
*m
);
1028 void handle_find_ino_reply(MMDSFindInoReply
*m
);
1029 void kick_find_ino_peers(mds_rank_t who
);
1033 void snaprealm_create(MDRequestRef
& mdr
, CInode
*in
);
1034 void _snaprealm_create_finish(MDRequestRef
& mdr
, MutationRef
& mut
, CInode
*in
);
1038 void fetch_backtrace(inodeno_t ino
, int64_t pool
, bufferlist
& bl
, Context
*fin
);
1039 uint64_t get_num_strays() const { return stray_manager
.get_num_strays(); }
1042 void scan_stray_dir(dirfrag_t next
=dirfrag_t());
1043 StrayManager stray_manager
;
1044 friend struct C_MDC_RetryScanStray
;
1045 friend class C_IO_MDC_FetchedBacktrace
;
1049 void dispatch(Message
*m
);
1053 void handle_discover(MDiscover
*dis
);
1054 void handle_discover_reply(MDiscoverReply
*m
);
1055 friend class C_MDC_Join
;
1058 void replicate_dir(CDir
*dir
, mds_rank_t to
, bufferlist
& bl
);
1059 void replicate_dentry(CDentry
*dn
, mds_rank_t to
, bufferlist
& bl
);
1060 void replicate_inode(CInode
*in
, mds_rank_t to
, bufferlist
& bl
,
1063 CDir
* add_replica_dir(bufferlist::iterator
& p
, CInode
*diri
, mds_rank_t from
, list
<MDSInternalContextBase
*>& finished
);
1064 CDentry
*add_replica_dentry(bufferlist::iterator
& p
, CDir
*dir
, list
<MDSInternalContextBase
*>& finished
);
1065 CInode
*add_replica_inode(bufferlist::iterator
& p
, CDentry
*dn
, list
<MDSInternalContextBase
*>& finished
);
1067 void replicate_stray(CDentry
*straydn
, mds_rank_t who
, bufferlist
& bl
);
1068 CDentry
*add_replica_stray(bufferlist
&bl
, mds_rank_t from
);
1072 void send_dentry_link(CDentry
*dn
, MDRequestRef
& mdr
);
1073 void send_dentry_unlink(CDentry
*dn
, CDentry
*straydn
, MDRequestRef
& mdr
);
1075 void handle_dentry_link(MDentryLink
*m
);
1076 void handle_dentry_unlink(MDentryUnlink
*m
);
1079 // -- fragmenting --
1085 list
<MDSInternalContextBase
*> waiters
;
1086 list
<frag_t
> old_frags
;
1087 bufferlist rollback
;
1088 ufragment() : bits(0), committed(false), ls(NULL
) {}
1090 map
<dirfrag_t
, ufragment
> uncommitted_fragments
;
1092 struct fragment_info_t
{
1095 list
<CDir
*> resultfrags
;
1097 // for deadlock detection
1099 utime_t last_cum_auth_pins_change
;
1100 int last_cum_auth_pins
;
1101 int num_remote_waiters
; // number of remote authpin waiters
1102 fragment_info_t() : bits(0), all_frozen(false), last_cum_auth_pins(0), num_remote_waiters(0) {}
1103 bool is_fragmenting() { return !resultfrags
.empty(); }
1105 map
<dirfrag_t
,fragment_info_t
> fragments
;
1107 void adjust_dir_fragments(CInode
*diri
, frag_t basefrag
, int bits
,
1108 list
<CDir
*>& frags
, list
<MDSInternalContextBase
*>& waiters
, bool replay
);
1109 void adjust_dir_fragments(CInode
*diri
,
1110 list
<CDir
*>& srcfrags
,
1111 frag_t basefrag
, int bits
,
1112 list
<CDir
*>& resultfrags
,
1113 list
<MDSInternalContextBase
*>& waiters
,
1115 CDir
*force_dir_fragment(CInode
*diri
, frag_t fg
, bool replay
=true);
1116 void get_force_dirfrag_bound_set(vector
<dirfrag_t
>& dfs
, set
<CDir
*>& bounds
);
1118 bool can_fragment(CInode
*diri
, list
<CDir
*>& dirs
);
1119 void fragment_freeze_dirs(list
<CDir
*>& dirs
);
1120 void fragment_mark_and_complete(MDRequestRef
& mdr
);
1121 void fragment_frozen(MDRequestRef
& mdr
, int r
);
1122 void fragment_unmark_unfreeze_dirs(list
<CDir
*>& dirs
);
1123 void dispatch_fragment_dir(MDRequestRef
& mdr
);
1124 void _fragment_logged(MDRequestRef
& mdr
);
1125 void _fragment_stored(MDRequestRef
& mdr
);
1126 void _fragment_committed(dirfrag_t f
, list
<CDir
*>& resultfrags
);
1127 void _fragment_finish(dirfrag_t f
, list
<CDir
*>& resultfrags
);
1129 friend class EFragment
;
1130 friend class C_MDC_FragmentFrozen
;
1131 friend class C_MDC_FragmentMarking
;
1132 friend class C_MDC_FragmentPrep
;
1133 friend class C_MDC_FragmentStore
;
1134 friend class C_MDC_FragmentCommit
;
1135 friend class C_IO_MDC_FragmentFinish
;
1137 void handle_fragment_notify(MMDSFragmentNotify
*m
);
1139 void add_uncommitted_fragment(dirfrag_t basedirfrag
, int bits
, list
<frag_t
>& old_frag
,
1140 LogSegment
*ls
, bufferlist
*rollback
=NULL
);
1141 void finish_uncommitted_fragment(dirfrag_t basedirfrag
, int op
);
1142 void rollback_uncommitted_fragment(dirfrag_t basedirfrag
, list
<frag_t
>& old_frags
);
1144 void wait_for_uncommitted_fragment(dirfrag_t dirfrag
, MDSInternalContextBase
*c
) {
1145 assert(uncommitted_fragments
.count(dirfrag
));
1146 uncommitted_fragments
[dirfrag
].waiters
.push_back(c
);
1148 void split_dir(CDir
*dir
, int byn
);
1149 void merge_dir(CInode
*diri
, frag_t fg
);
1150 void rollback_uncommitted_fragments();
1152 void find_stale_fragment_freeze();
1153 void fragment_freeze_inc_num_waiters(CDir
*dir
);
1154 bool fragment_are_all_frozen(CDir
*dir
);
1155 int get_num_fragmenting_dirs() { return fragments
.size(); }
1158 //int send_inode_updates(CInode *in);
1159 //void handle_inode_update(MInodeUpdate *m);
1161 int send_dir_updates(CDir
*in
, bool bcast
=false);
1162 void handle_dir_update(MDirUpdate
*m
);
1164 // -- cache expiration --
1165 void handle_cache_expire(MCacheExpire
*m
);
1166 void process_delayed_expire(CDir
*dir
);
1167 void discard_delayed_expire(CDir
*dir
);
1170 int dump_cache(boost::string_view fn
, Formatter
*f
,
1171 boost::string_view dump_root
= "",
1174 int dump_cache() { return dump_cache(NULL
, NULL
); }
1175 int dump_cache(boost::string_view filename
);
1176 int dump_cache(Formatter
*f
);
1177 int dump_cache(boost::string_view dump_root
, int depth
, Formatter
*f
);
1179 int cache_status(Formatter
*f
);
1181 void dump_resolve_status(Formatter
*f
) const;
1182 void dump_rejoin_status(Formatter
*f
) const;
1187 void show_subtrees(int dbl
=10);
1189 CInode
*hack_pick_random_inode() {
1190 assert(!inode_map
.empty());
1191 int n
= rand() % inode_map
.size();
1192 auto p
= inode_map
.begin();
1198 void flush_dentry_work(MDRequestRef
& mdr
);
1200 * Resolve path to a dentry and pass it onto the ScrubStack.
1202 * TODO: return enough information to the original mdr formatter
1203 * and completion that they can subsequeuntly check the progress of
1204 * this scrub (we won't block them on a whole scrub as it can take a very
1207 void enqueue_scrub_work(MDRequestRef
& mdr
);
1208 void repair_inode_stats_work(MDRequestRef
& mdr
);
1209 void repair_dirfrag_stats_work(MDRequestRef
& mdr
);
1210 friend class C_MDC_RepairDirfragStats
;
1212 void flush_dentry(boost::string_view path
, Context
*fin
);
1214 * Create and start an OP_ENQUEUE_SCRUB
1216 void enqueue_scrub(boost::string_view path
, boost::string_view tag
,
1217 bool force
, bool recursive
, bool repair
,
1218 Formatter
*f
, Context
*fin
);
1219 void repair_inode_stats(CInode
*diri
);
1220 void repair_dirfrag_stats(CDir
*dir
);
1223 /* Because exports may fail, this set lets us keep track of inodes that need exporting. */
1224 std::set
<CInode
*> export_pin_queue
;
1227 class C_MDS_RetryRequest
: public MDSInternalContext
{
1231 C_MDS_RetryRequest(MDCache
*c
, MDRequestRef
& r
);
1232 void finish(int r
) override
;