]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDCache.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / mds / MDCache.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16
17 #ifndef CEPH_MDCACHE_H
18 #define CEPH_MDCACHE_H
19
20 #include "include/types.h"
21 #include "include/filepath.h"
22 #include "include/elist.h"
23
24 #include "osdc/Filer.h"
25 #include "CInode.h"
26 #include "CDentry.h"
27 #include "CDir.h"
28 #include "include/Context.h"
29 #include "events/EMetaBlob.h"
30 #include "RecoveryQueue.h"
31 #include "StrayManager.h"
32 #include "MDSContext.h"
33 #include "MDSMap.h"
34 #include "Mutation.h"
35
36 #include "messages/MClientRequest.h"
37 #include "messages/MMDSSlaveRequest.h"
38
39 class PerfCounters;
40
41 class MDSRank;
42 class Session;
43 class Migrator;
44
45 class Message;
46 class Session;
47
48 class MMDSResolve;
49 class MMDSResolveAck;
50 class MMDSCacheRejoin;
51 class MDiscover;
52 class MDiscoverReply;
53 class MCacheExpire;
54 class MDirUpdate;
55 class MDentryLink;
56 class MDentryUnlink;
57 class MLock;
58 struct MMDSFindIno;
59 struct MMDSFindInoReply;
60 struct MMDSOpenIno;
61 struct MMDSOpenInoReply;
62
63 class Message;
64 class MClientRequest;
65 class MMDSSlaveRequest;
66 struct MClientSnap;
67
68 class MMDSFragmentNotify;
69
70 class ESubtreeMap;
71
72 enum {
73 l_mdc_first = 3000,
74 // How many inodes currently in stray dentries
75 l_mdc_num_strays,
76 // How many stray dentries are currently delayed for purge due to refs
77 l_mdc_num_strays_delayed,
78 // How many stray dentries are currently being enqueued for purge
79 l_mdc_num_strays_enqueuing,
80
81 // How many dentries have ever been added to stray dir
82 l_mdc_strays_created,
83 // How many dentries have been passed on to PurgeQueue
84 l_mdc_strays_enqueued,
85 // How many strays have been reintegrated?
86 l_mdc_strays_reintegrated,
87 // How many strays have been migrated?
88 l_mdc_strays_migrated,
89
90 // How many inode sizes currently being recovered
91 l_mdc_num_recovering_processing,
92 // How many inodes currently waiting to have size recovered
93 l_mdc_num_recovering_enqueued,
94 // How many inodes waiting with elevated priority for recovery
95 l_mdc_num_recovering_prioritized,
96 // How many inodes ever started size recovery
97 l_mdc_recovery_started,
98 // How many inodes ever completed size recovery
99 l_mdc_recovery_completed,
100
101 l_mdc_last,
102 };
103
104
105 // flags for predirty_journal_parents()
106 static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting
107 static const int PREDIRTY_DIR = 2; // update parent dir mtime/size
108 static const int PREDIRTY_SHALLOW = 4; // only go to immediate parent (for easier rollback)
109
110 class MDCache {
111 public:
112 // my master
113 MDSRank *mds;
114
115 // -- my cache --
116 LRU lru; // dentry lru for expiring items from cache
117 protected:
118 ceph::unordered_map<vinodeno_t,CInode*> inode_map; // map of inodes by ino
119 CInode *root; // root inode
120 CInode *myin; // .ceph/mds%d dir
121
122 bool readonly;
123 void set_readonly() { readonly = true; }
124
125 CInode *strays[NUM_STRAY]; // my stray dir
126 int stray_index;
127
128 CInode *get_stray() {
129 return strays[stray_index];
130 }
131
132 set<CInode*> base_inodes;
133
134 std::unique_ptr<PerfCounters> logger;
135
136 Filer filer;
137
138 bool exceeded_size_limit;
139
140 public:
141 void advance_stray() {
142 stray_index = (stray_index+1)%NUM_STRAY;
143 }
144
145 void activate_stray_manager();
146
147 /**
148 * Call this when you know that a CDentry is ready to be passed
149 * on to StrayManager (i.e. this is a stray you've just created)
150 */
151 void notify_stray(CDentry *dn) {
152 assert(dn->get_dir()->get_inode()->is_stray());
153 stray_manager.eval_stray(dn);
154 }
155
156 void maybe_eval_stray(CInode *in, bool delay=false);
157 bool is_readonly() { return readonly; }
158 void force_readonly();
159
160 DecayRate decayrate;
161
162 int num_inodes_with_caps;
163
164 unsigned max_dir_commit_size;
165
166 static file_layout_t gen_default_file_layout(const MDSMap &mdsmap);
167 static file_layout_t gen_default_log_layout(const MDSMap &mdsmap);
168
169 file_layout_t default_file_layout;
170 file_layout_t default_log_layout;
171
172 void register_perfcounters();
173
174 // -- client leases --
175 public:
176 static const int client_lease_pools = 3;
177 float client_lease_durations[client_lease_pools];
178 protected:
179 xlist<ClientLease*> client_leases[client_lease_pools];
180 public:
181 void touch_client_lease(ClientLease *r, int pool, utime_t ttl) {
182 client_leases[pool].push_back(&r->item_lease);
183 r->ttl = ttl;
184 }
185
186 void notify_stray_removed()
187 {
188 stray_manager.notify_stray_removed();
189 }
190
191 void notify_stray_created()
192 {
193 stray_manager.notify_stray_created();
194 }
195
196 // -- client caps --
197 uint64_t last_cap_id;
198
199
200
201 // -- discover --
202 struct discover_info_t {
203 ceph_tid_t tid;
204 mds_rank_t mds;
205 inodeno_t ino;
206 frag_t frag;
207 snapid_t snap;
208 filepath want_path;
209 MDSCacheObject *base;
210 bool want_base_dir;
211 bool want_xlocked;
212
213 discover_info_t() :
214 tid(0), mds(-1), snap(CEPH_NOSNAP), base(NULL),
215 want_base_dir(false), want_xlocked(false) {}
216 ~discover_info_t() {
217 if (base)
218 base->put(MDSCacheObject::PIN_DISCOVERBASE);
219 }
220 void pin_base(MDSCacheObject *b) {
221 base = b;
222 base->get(MDSCacheObject::PIN_DISCOVERBASE);
223 }
224 };
225
226 map<ceph_tid_t, discover_info_t> discovers;
227 ceph_tid_t discover_last_tid;
228
229 void _send_discover(discover_info_t& dis);
230 discover_info_t& _create_discover(mds_rank_t mds) {
231 ceph_tid_t t = ++discover_last_tid;
232 discover_info_t& d = discovers[t];
233 d.tid = t;
234 d.mds = mds;
235 return d;
236 }
237
238 // waiters
239 map<int, map<inodeno_t, list<MDSInternalContextBase*> > > waiting_for_base_ino;
240
241 void discover_base_ino(inodeno_t want_ino, MDSInternalContextBase *onfinish, mds_rank_t from=MDS_RANK_NONE);
242 void discover_dir_frag(CInode *base, frag_t approx_fg, MDSInternalContextBase *onfinish,
243 mds_rank_t from=MDS_RANK_NONE);
244 void discover_path(CInode *base, snapid_t snap, filepath want_path, MDSInternalContextBase *onfinish,
245 bool want_xlocked=false, mds_rank_t from=MDS_RANK_NONE);
246 void discover_path(CDir *base, snapid_t snap, filepath want_path, MDSInternalContextBase *onfinish,
247 bool want_xlocked=false);
248 void kick_discovers(mds_rank_t who); // after a failure.
249
250
251 // -- subtrees --
252 protected:
253 /* subtree keys and each tree's non-recursive nested subtrees (the "bounds") */
254 map<CDir*,set<CDir*> > subtrees;
255 map<CInode*,list<pair<CDir*,CDir*> > > projected_subtree_renames; // renamed ino -> target dir
256
257 // adjust subtree auth specification
258 // dir->dir_auth
259 // imports/exports/nested_exports
260 // join/split subtrees as appropriate
261 public:
262 bool is_subtrees() { return !subtrees.empty(); }
263 void list_subtrees(list<CDir*>& ls);
264 void adjust_subtree_auth(CDir *root, mds_authority_t auth, bool do_eval=true);
265 void adjust_subtree_auth(CDir *root, mds_rank_t a, mds_rank_t b=CDIR_AUTH_UNKNOWN, bool do_eval=true) {
266 adjust_subtree_auth(root, mds_authority_t(a,b), do_eval);
267 }
268 void adjust_bounded_subtree_auth(CDir *dir, set<CDir*>& bounds, mds_authority_t auth);
269 void adjust_bounded_subtree_auth(CDir *dir, set<CDir*>& bounds, mds_rank_t a) {
270 adjust_bounded_subtree_auth(dir, bounds, mds_authority_t(a, CDIR_AUTH_UNKNOWN));
271 }
272 void adjust_bounded_subtree_auth(CDir *dir, vector<dirfrag_t>& bounds, mds_authority_t auth);
273 void adjust_bounded_subtree_auth(CDir *dir, vector<dirfrag_t>& bounds, mds_rank_t a) {
274 adjust_bounded_subtree_auth(dir, bounds, mds_authority_t(a, CDIR_AUTH_UNKNOWN));
275 }
276 void map_dirfrag_set(list<dirfrag_t>& dfs, set<CDir*>& result);
277 void try_subtree_merge(CDir *root);
278 void try_subtree_merge_at(CDir *root, bool do_eval=true);
279 void subtree_merge_writebehind_finish(CInode *in, MutationRef& mut);
280 void eval_subtree_root(CInode *diri);
281 CDir *get_subtree_root(CDir *dir);
282 CDir *get_projected_subtree_root(CDir *dir);
283 bool is_leaf_subtree(CDir *dir) {
284 assert(subtrees.count(dir));
285 return subtrees[dir].empty();
286 }
287 void remove_subtree(CDir *dir);
288 bool is_subtree(CDir *root) {
289 return subtrees.count(root);
290 }
291 void get_subtree_bounds(CDir *root, set<CDir*>& bounds);
292 void get_wouldbe_subtree_bounds(CDir *root, set<CDir*>& bounds);
293 void verify_subtree_bounds(CDir *root, const set<CDir*>& bounds);
294 void verify_subtree_bounds(CDir *root, const list<dirfrag_t>& bounds);
295
296 void project_subtree_rename(CInode *diri, CDir *olddir, CDir *newdir);
297 void adjust_subtree_after_rename(CInode *diri, CDir *olddir,
298 bool pop, bool imported = false);
299
300 void get_auth_subtrees(set<CDir*>& s);
301 void get_fullauth_subtrees(set<CDir*>& s);
302
303 int num_subtrees();
304 int num_subtrees_fullauth();
305 int num_subtrees_fullnonauth();
306
307
308 protected:
309 // delayed cache expire
310 map<CDir*, map<mds_rank_t, MCacheExpire*> > delayed_expire; // subtree root -> expire msg
311
312
313 // -- requests --
314 ceph::unordered_map<metareqid_t, MDRequestRef> active_requests;
315
316 public:
317 int get_num_client_requests();
318
319 MDRequestRef request_start(MClientRequest *req);
320 MDRequestRef request_start_slave(metareqid_t rid, __u32 attempt, Message *m);
321 MDRequestRef request_start_internal(int op);
322 bool have_request(metareqid_t rid) {
323 return active_requests.count(rid);
324 }
325 MDRequestRef request_get(metareqid_t rid);
326 void request_pin_ref(MDRequestRef& r, CInode *ref, vector<CDentry*>& trace);
327 void request_finish(MDRequestRef& mdr);
328 void request_forward(MDRequestRef& mdr, mds_rank_t mds, int port=0);
329 void dispatch_request(MDRequestRef& mdr);
330 void request_drop_foreign_locks(MDRequestRef& mdr);
331 void request_drop_non_rdlocks(MDRequestRef& r);
332 void request_drop_locks(MDRequestRef& r);
333 void request_cleanup(MDRequestRef& r);
334
335 void request_kill(MDRequestRef& r); // called when session closes
336
337 // journal/snap helpers
338 CInode *pick_inode_snap(CInode *in, snapid_t follows);
339 CInode *cow_inode(CInode *in, snapid_t last);
340 void journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, CDentry *dn,
341 snapid_t follows=CEPH_NOSNAP,
342 CInode **pcow_inode=0, CDentry::linkage_t *dnl=0);
343 void journal_cow_inode(MutationRef& mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP,
344 CInode **pcow_inode=0);
345 void journal_dirty_inode(MutationImpl *mut, EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP);
346
347 void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first,
348 int linkunlink, SnapRealm *prealm);
349 void _project_rstat_inode_to_frag(inode_t& inode, snapid_t ofirst, snapid_t last,
350 CDir *parent, int linkunlink, bool update_inode);
351 void project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat,
352 snapid_t ofirst, snapid_t last,
353 CInode *pin, bool cow_head);
354 void broadcast_quota_to_client(CInode *in);
355 void predirty_journal_parents(MutationRef mut, EMetaBlob *blob,
356 CInode *in, CDir *parent,
357 int flags, int linkunlink=0,
358 snapid_t follows=CEPH_NOSNAP);
359
360 // slaves
361 void add_uncommitted_master(metareqid_t reqid, LogSegment *ls, set<mds_rank_t> &slaves, bool safe=false) {
362 uncommitted_masters[reqid].ls = ls;
363 uncommitted_masters[reqid].slaves = slaves;
364 uncommitted_masters[reqid].safe = safe;
365 }
366 void wait_for_uncommitted_master(metareqid_t reqid, MDSInternalContextBase *c) {
367 uncommitted_masters[reqid].waiters.push_back(c);
368 }
369 bool have_uncommitted_master(metareqid_t reqid, mds_rank_t from) {
370 auto p = uncommitted_masters.find(reqid);
371 return p != uncommitted_masters.end() && p->second.slaves.count(from) > 0;
372 }
373 void log_master_commit(metareqid_t reqid);
374 void logged_master_update(metareqid_t reqid);
375 void _logged_master_commit(metareqid_t reqid);
376 void committed_master_slave(metareqid_t r, mds_rank_t from);
377 void finish_committed_masters();
378
379 void _logged_slave_commit(mds_rank_t from, metareqid_t reqid);
380
381 // -- recovery --
382 protected:
383 set<mds_rank_t> recovery_set;
384
385 public:
386 void set_recovery_set(set<mds_rank_t>& s);
387 void handle_mds_failure(mds_rank_t who);
388 void handle_mds_recovery(mds_rank_t who);
389
390 protected:
391 // [resolve]
392 // from EImportStart w/o EImportFinish during journal replay
393 map<dirfrag_t, vector<dirfrag_t> > my_ambiguous_imports;
394 // from MMDSResolves
395 map<mds_rank_t, map<dirfrag_t, vector<dirfrag_t> > > other_ambiguous_imports;
396
397 map<mds_rank_t, map<metareqid_t, MDSlaveUpdate*> > uncommitted_slave_updates; // slave: for replay.
398 map<CInode*, int> uncommitted_slave_rename_olddir; // slave: preserve the non-auth dir until seeing commit.
399 map<CInode*, int> uncommitted_slave_unlink; // slave: preserve the unlinked inode until seeing commit.
400
401 // track master requests whose slaves haven't acknowledged commit
402 struct umaster {
403 set<mds_rank_t> slaves;
404 LogSegment *ls;
405 list<MDSInternalContextBase*> waiters;
406 bool safe;
407 bool committing;
408 bool recovering;
409 umaster() : ls(NULL), safe(false), committing(false), recovering(false) {}
410 };
411 map<metareqid_t, umaster> uncommitted_masters; // master: req -> slave set
412
413 set<metareqid_t> pending_masters;
414 map<int, set<metareqid_t> > ambiguous_slave_updates;
415
416 friend class ESlaveUpdate;
417 friend class ECommitted;
418
419 bool resolves_pending;
420 set<mds_rank_t> resolve_gather; // nodes i need resolves from
421 set<mds_rank_t> resolve_ack_gather; // nodes i need a resolve_ack from
422 map<metareqid_t, mds_rank_t> need_resolve_rollback; // rollbacks i'm writing to the journal
423 map<mds_rank_t, MMDSResolve*> delayed_resolve;
424
425 void handle_resolve(MMDSResolve *m);
426 void handle_resolve_ack(MMDSResolveAck *m);
427 void process_delayed_resolve();
428 void discard_delayed_resolve(mds_rank_t who);
429 void maybe_resolve_finish();
430 void disambiguate_my_imports();
431 void disambiguate_other_imports();
432 void trim_unlinked_inodes();
433 void add_uncommitted_slave_update(metareqid_t reqid, mds_rank_t master, MDSlaveUpdate*);
434 void finish_uncommitted_slave_update(metareqid_t reqid, mds_rank_t master);
435 MDSlaveUpdate* get_uncommitted_slave_update(metareqid_t reqid, mds_rank_t master);
436 public:
437 void recalc_auth_bits(bool replay);
438 void remove_inode_recursive(CInode *in);
439
440 bool is_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) {
441 auto p = ambiguous_slave_updates.find(master);
442 return p != ambiguous_slave_updates.end() && p->second.count(reqid);
443 }
444 void add_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) {
445 ambiguous_slave_updates[master].insert(reqid);
446 }
447 void remove_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) {
448 auto p = ambiguous_slave_updates.find(master);
449 auto q = p->second.find(reqid);
450 assert(q != p->second.end());
451 p->second.erase(q);
452 if (p->second.empty())
453 ambiguous_slave_updates.erase(p);
454 }
455
456 void add_rollback(metareqid_t reqid, mds_rank_t master) {
457 need_resolve_rollback[reqid] = master;
458 }
459 void finish_rollback(metareqid_t reqid);
460
461 // ambiguous imports
462 void add_ambiguous_import(dirfrag_t base, const vector<dirfrag_t>& bounds);
463 void add_ambiguous_import(CDir *base, const set<CDir*>& bounds);
464 bool have_ambiguous_import(dirfrag_t base) {
465 return my_ambiguous_imports.count(base);
466 }
467 void get_ambiguous_import_bounds(dirfrag_t base, vector<dirfrag_t>& bounds) {
468 assert(my_ambiguous_imports.count(base));
469 bounds = my_ambiguous_imports[base];
470 }
471 void cancel_ambiguous_import(CDir *);
472 void finish_ambiguous_import(dirfrag_t dirino);
473 void resolve_start(MDSInternalContext *resolve_done_);
474 void send_resolves();
475 void send_slave_resolves();
476 void send_subtree_resolves();
477 void maybe_send_pending_resolves() {
478 if (resolves_pending)
479 send_subtree_resolves();
480 }
481
482 void _move_subtree_map_bound(dirfrag_t df, dirfrag_t oldparent, dirfrag_t newparent,
483 map<dirfrag_t,vector<dirfrag_t> >& subtrees);
484 ESubtreeMap *create_subtree_map();
485
486
487 void clean_open_file_lists();
488
489 protected:
490 // [rejoin]
491 bool rejoins_pending;
492 set<mds_rank_t> rejoin_gather; // nodes from whom i need a rejoin
493 set<mds_rank_t> rejoin_sent; // nodes i sent a rejoin to
494 set<mds_rank_t> rejoin_ack_gather; // nodes from whom i need a rejoin ack
495 map<mds_rank_t,map<inodeno_t,map<client_t,Capability::Import> > > rejoin_imported_caps;
496 map<inodeno_t,pair<mds_rank_t,map<client_t,Capability::Export> > > rejoin_slave_exports;
497 map<client_t,entity_inst_t> rejoin_client_map;
498
499 map<inodeno_t,map<client_t,cap_reconnect_t> > cap_exports; // ino -> client -> capex
500 map<inodeno_t,mds_rank_t> cap_export_targets; // ino -> auth mds
501
502 map<inodeno_t,map<client_t,map<mds_rank_t,cap_reconnect_t> > > cap_imports; // ino -> client -> frommds -> capex
503 set<inodeno_t> cap_imports_missing;
504 map<inodeno_t, list<MDSInternalContextBase*> > cap_reconnect_waiters;
505 int cap_imports_num_opening;
506
507 set<CInode*> rejoin_undef_inodes;
508 set<CInode*> rejoin_potential_updated_scatterlocks;
509 set<CDir*> rejoin_undef_dirfrags;
510 map<mds_rank_t, set<CInode*> > rejoin_unlinked_inodes;
511
512 vector<CInode*> rejoin_recover_q, rejoin_check_q;
513 list<SimpleLock*> rejoin_eval_locks;
514 list<MDSInternalContextBase*> rejoin_waiters;
515
516 void rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin);
517 void handle_cache_rejoin(MMDSCacheRejoin *m);
518 void handle_cache_rejoin_weak(MMDSCacheRejoin *m);
519 CInode* rejoin_invent_inode(inodeno_t ino, snapid_t last);
520 CDir* rejoin_invent_dirfrag(dirfrag_t df);
521 void handle_cache_rejoin_strong(MMDSCacheRejoin *m);
522 void rejoin_scour_survivor_replicas(mds_rank_t from, MMDSCacheRejoin *ack,
523 set<vinodeno_t>& acked_inodes,
524 set<SimpleLock *>& gather_locks);
525 void handle_cache_rejoin_ack(MMDSCacheRejoin *m);
526 void rejoin_send_acks();
527 void rejoin_trim_undef_inodes();
528 void maybe_send_pending_rejoins() {
529 if (rejoins_pending)
530 rejoin_send_rejoins();
531 }
532 std::unique_ptr<MDSInternalContext> rejoin_done;
533 std::unique_ptr<MDSInternalContext> resolve_done;
534 public:
535 void rejoin_start(MDSInternalContext *rejoin_done_);
536 void rejoin_gather_finish();
537 void rejoin_send_rejoins();
538 void rejoin_export_caps(inodeno_t ino, client_t client, const cap_reconnect_t& icr,
539 int target=-1) {
540 cap_exports[ino][client] = icr;
541 cap_export_targets[ino] = target;
542 }
543 void rejoin_recovered_caps(inodeno_t ino, client_t client, const cap_reconnect_t& icr,
544 mds_rank_t frommds=MDS_RANK_NONE) {
545 cap_imports[ino][client][frommds] = icr;
546 }
547 const cap_reconnect_t *get_replay_cap_reconnect(inodeno_t ino, client_t client) {
548 if (cap_imports.count(ino) &&
549 cap_imports[ino].count(client) &&
550 cap_imports[ino][client].count(MDS_RANK_NONE)) {
551 return &cap_imports[ino][client][MDS_RANK_NONE];
552 }
553 return NULL;
554 }
555 void remove_replay_cap_reconnect(inodeno_t ino, client_t client) {
556 assert(cap_imports[ino].size() == 1);
557 assert(cap_imports[ino][client].size() == 1);
558 cap_imports.erase(ino);
559 }
560 void wait_replay_cap_reconnect(inodeno_t ino, MDSInternalContextBase *c) {
561 cap_reconnect_waiters[ino].push_back(c);
562 }
563
564 // [reconnect/rejoin caps]
565 struct reconnected_cap_info_t {
566 inodeno_t realm_ino;
567 snapid_t snap_follows;
568 int dirty_caps;
569 reconnected_cap_info_t() :
570 realm_ino(0), snap_follows(0), dirty_caps(0) {}
571 };
572 map<inodeno_t,map<client_t, reconnected_cap_info_t> > reconnected_caps; // inode -> client -> snap_follows,realmino
573 map<inodeno_t,map<client_t, snapid_t> > reconnected_snaprealms; // realmino -> client -> realmseq
574
575 void add_reconnected_cap(client_t client, inodeno_t ino, const cap_reconnect_t& icr) {
576 reconnected_cap_info_t &info = reconnected_caps[ino][client];
577 info.realm_ino = inodeno_t(icr.capinfo.snaprealm);
578 info.snap_follows = icr.snap_follows;
579 }
580 void set_reconnected_dirty_caps(client_t client, inodeno_t ino, int dirty) {
581 reconnected_cap_info_t &info = reconnected_caps[ino][client];
582 info.dirty_caps |= dirty;
583 }
584 void add_reconnected_snaprealm(client_t client, inodeno_t ino, snapid_t seq) {
585 reconnected_snaprealms[ino][client] = seq;
586 }
587
588 friend class C_MDC_RejoinOpenInoFinish;
589 friend class C_MDC_RejoinSessionsOpened;
590 void rejoin_open_ino_finish(inodeno_t ino, int ret);
591 void rejoin_open_sessions_finish(map<client_t,entity_inst_t> client_map,
592 map<client_t,uint64_t>& sseqmap);
593 bool process_imported_caps();
594 void choose_lock_states_and_reconnect_caps();
595 void prepare_realm_split(SnapRealm *realm, client_t client, inodeno_t ino,
596 map<client_t,MClientSnap*>& splits);
597 void do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool nosend=false);
598 void send_snaps(map<client_t,MClientSnap*>& splits);
599 Capability* rejoin_import_cap(CInode *in, client_t client, const cap_reconnect_t& icr, mds_rank_t frommds);
600 void finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snapid_t seq);
601 void try_reconnect_cap(CInode *in, Session *session);
602 void export_remaining_imported_caps();
603
604 // cap imports. delayed snap parent opens.
605 // realm inode -> client -> cap inodes needing to split to this realm
606 map<CInode*,set<CInode*> > missing_snap_parents;
607 map<client_t,set<CInode*> > delayed_imported_caps;
608
609 void do_cap_import(Session *session, CInode *in, Capability *cap,
610 uint64_t p_cap_id, ceph_seq_t p_seq, ceph_seq_t p_mseq,
611 int peer, int p_flags);
612 void do_delayed_cap_imports();
613 void rebuild_need_snapflush(CInode *head_in, SnapRealm *realm, client_t client,
614 snapid_t snap_follows);
615 void check_realm_past_parents(SnapRealm *realm, bool reconnect);
616 void open_snap_parents();
617
618 bool open_undef_inodes_dirfrags();
619 void opened_undef_inode(CInode *in);
620 void opened_undef_dirfrag(CDir *dir) {
621 rejoin_undef_dirfrags.erase(dir);
622 }
623
624 void reissue_all_caps();
625
626
627 friend class Locker;
628 friend class Migrator;
629 friend class MDBalancer;
630
631 // StrayManager needs to be able to remove_inode() from us
632 // when it is done purging
633 friend class StrayManager;
634
635 // File size recovery
636 private:
637 RecoveryQueue recovery_queue;
638 void identify_files_to_recover();
639 public:
640 void start_files_to_recover();
641 void do_file_recover();
642 void queue_file_recover(CInode *in);
643 void _queued_file_recover_cow(CInode *in, MutationRef& mut);
644
645 // subsystems
646 std::unique_ptr<Migrator> migrator;
647
648 public:
649 explicit MDCache(MDSRank *m, PurgeQueue &purge_queue_);
650 ~MDCache();
651
652 // debug
653 void log_stat();
654
655 // root inode
656 CInode *get_root() { return root; }
657 CInode *get_myin() { return myin; }
658
659 // cache
660 void set_cache_size(size_t max) { lru.lru_set_max(max); }
661 size_t get_cache_size() { return lru.lru_get_size(); }
662
663 // trimming
664 bool trim(int max=-1, int count=-1); // trim cache
665 bool trim_dentry(CDentry *dn, map<mds_rank_t, MCacheExpire*>& expiremap);
666 void trim_dirfrag(CDir *dir, CDir *con,
667 map<mds_rank_t, MCacheExpire*>& expiremap);
668 bool trim_inode(CDentry *dn, CInode *in, CDir *con,
669 map<mds_rank_t,class MCacheExpire*>& expiremap);
670 void send_expire_messages(map<mds_rank_t, MCacheExpire*>& expiremap);
671 void trim_non_auth(); // trim out trimmable non-auth items
672 bool trim_non_auth_subtree(CDir *directory);
673 void standby_trim_segment(LogSegment *ls);
674 void try_trim_non_auth_subtree(CDir *dir);
675 bool can_trim_non_auth_dirfrag(CDir *dir) {
676 return my_ambiguous_imports.count((dir)->dirfrag()) == 0 &&
677 uncommitted_slave_rename_olddir.count(dir->inode) == 0;
678 }
679
680 /**
681 * For all unreferenced inodes, dirs, dentries below an inode, compose
682 * expiry messages. This is used when giving up all replicas of entities
683 * for an MDS peer in the 'stopping' state, such that the peer can
684 * empty its cache and finish shutting down.
685 *
686 * We have to make sure we're only expiring un-referenced items to
687 * avoid interfering with ongoing stray-movement (we can't distinguish
688 * between the "moving my strays" and "waiting for my cache to empty"
689 * phases within 'stopping')
690 *
691 * @return false if we completed cleanly, true if caller should stop
692 * expiring because we hit something with refs.
693 */
694 bool expire_recursive(
695 CInode *in,
696 std::map<mds_rank_t, MCacheExpire*>& expiremap);
697
698 void trim_client_leases();
699 void check_memory_usage();
700
701 utime_t last_recall_state;
702
703 // shutdown
704 private:
705 set<inodeno_t> shutdown_exported_strays;
706 public:
707 void shutdown_start();
708 void shutdown_check();
709 bool shutdown_pass();
710 bool shutdown_export_strays();
711 bool shutdown(); // clear cache (ie at shutodwn)
712
713 bool did_shutdown_log_cap;
714
715 // inode_map
716 bool have_inode(vinodeno_t vino) {
717 return inode_map.count(vino) ? true:false;
718 }
719 bool have_inode(inodeno_t ino, snapid_t snap=CEPH_NOSNAP) {
720 return have_inode(vinodeno_t(ino, snap));
721 }
722 CInode* get_inode(vinodeno_t vino) {
723 if (have_inode(vino))
724 return inode_map[vino];
725 return NULL;
726 }
727 CInode* get_inode(inodeno_t ino, snapid_t s=CEPH_NOSNAP) {
728 return get_inode(vinodeno_t(ino, s));
729 }
730
731 CDir* get_dirfrag(dirfrag_t df) {
732 CInode *in = get_inode(df.ino);
733 if (!in)
734 return NULL;
735 return in->get_dirfrag(df.frag);
736 }
737 CDir* get_dirfrag(inodeno_t ino, const string& dn) {
738 CInode *in = get_inode(ino);
739 if (!in)
740 return NULL;
741 frag_t fg = in->pick_dirfrag(dn);
742 return in->get_dirfrag(fg);
743 }
744 CDir* get_force_dirfrag(dirfrag_t df, bool replay) {
745 CInode *diri = get_inode(df.ino);
746 if (!diri)
747 return NULL;
748 CDir *dir = force_dir_fragment(diri, df.frag, replay);
749 if (!dir)
750 dir = diri->get_dirfrag(df.frag);
751 return dir;
752 }
753
754 MDSCacheObject *get_object(MDSCacheObjectInfo &info);
755
756
757
758 public:
759 void add_inode(CInode *in);
760
761 void remove_inode(CInode *in);
762 protected:
763 void touch_inode(CInode *in) {
764 if (in->get_parent_dn())
765 touch_dentry(in->get_projected_parent_dn());
766 }
767 public:
768 void touch_dentry(CDentry *dn) {
769 // touch ancestors
770 if (dn->get_dir()->get_inode()->get_projected_parent_dn())
771 touch_dentry(dn->get_dir()->get_inode()->get_projected_parent_dn());
772
773 // touch me
774 if (dn->is_auth())
775 lru.lru_touch(dn);
776 else
777 lru.lru_midtouch(dn);
778 }
779 void touch_dentry_bottom(CDentry *dn) {
780 lru.lru_bottouch(dn);
781 if (dn->get_projected_linkage()->is_primary() &&
782 dn->get_dir()->inode->is_stray()) {
783 CInode *in = dn->get_projected_linkage()->get_inode();
784 if (in->has_dirfrags()) {
785 list<CDir*> ls;
786 in->get_dirfrags(ls);
787 for (list<CDir*>::iterator p = ls.begin(); p != ls.end(); ++p)
788 (*p)->touch_dentries_bottom();
789 }
790 }
791 }
792 protected:
793
794 void inode_remove_replica(CInode *in, mds_rank_t rep, bool rejoin,
795 set<SimpleLock *>& gather_locks);
796 void dentry_remove_replica(CDentry *dn, mds_rank_t rep, set<SimpleLock *>& gather_locks);
797
798 void rename_file(CDentry *srcdn, CDentry *destdn);
799
800 public:
801 // truncate
802 void truncate_inode(CInode *in, LogSegment *ls);
803 void _truncate_inode(CInode *in, LogSegment *ls);
804 void truncate_inode_finish(CInode *in, LogSegment *ls);
805 void truncate_inode_logged(CInode *in, MutationRef& mut);
806
807 void add_recovered_truncate(CInode *in, LogSegment *ls);
808 void remove_recovered_truncate(CInode *in, LogSegment *ls);
809 void start_recovered_truncates();
810
811
812 public:
813 CDir *get_auth_container(CDir *in);
814 CDir *get_export_container(CDir *dir);
815 void find_nested_exports(CDir *dir, set<CDir*>& s);
816 void find_nested_exports_under(CDir *import, CDir *dir, set<CDir*>& s);
817
818
819 private:
820 bool opening_root, open;
821 list<MDSInternalContextBase*> waiting_for_open;
822
823 public:
824 void init_layouts();
825 void create_unlinked_system_inode(CInode *in, inodeno_t ino,
826 int mode) const;
827 CInode *create_system_inode(inodeno_t ino, int mode);
828 CInode *create_root_inode();
829
830 void create_empty_hierarchy(MDSGather *gather);
831 void create_mydir_hierarchy(MDSGather *gather);
832
833 bool is_open() { return open; }
834 void wait_for_open(MDSInternalContextBase *c) {
835 waiting_for_open.push_back(c);
836 }
837
838 void open_root_inode(MDSInternalContextBase *c);
839 void open_root();
840 void open_mydir_inode(MDSInternalContextBase *c);
841 void populate_mydir();
842
843 void _create_system_file(CDir *dir, const char *name, CInode *in, MDSInternalContextBase *fin);
844 void _create_system_file_finish(MutationRef& mut, CDentry *dn,
845 version_t dpv, MDSInternalContextBase *fin);
846
847 void open_foreign_mdsdir(inodeno_t ino, MDSInternalContextBase *c);
848 CDir *get_stray_dir(CInode *in);
849 CDentry *get_or_create_stray_dentry(CInode *in);
850
851 MDSInternalContextBase *_get_waiter(MDRequestRef& mdr, Message *req, MDSInternalContextBase *fin);
852
853 /**
854 * Find the given dentry (and whether it exists or not), its ancestors,
855 * and get them all into memory and usable on this MDS. This function
856 * makes a best-effort attempt to load everything; if it needs to
857 * go away and do something then it will put the request on a waitlist.
858 * It prefers the mdr, then the req, then the fin. (At least one of these
859 * must be non-null.)
860 *
861 * At least one of the params mdr, req, and fin must be non-null.
862 *
863 * @param mdr The MDRequest associated with the path. Can be null.
864 * @param req The Message associated with the path. Can be null.
865 * @param fin The Context associated with the path. Can be null.
866 * @param path The path to traverse to.
867 * @param pdnvec Data return parameter -- on success, contains a
868 * vector of dentries. On failure, is either empty or contains the
869 * full trace of traversable dentries.
870 * @param pin Data return parameter -- if successful, points to the inode
871 * associated with filepath. If unsuccessful, is null.
872 * @param onfail Specifies different lookup failure behaviors. If set to
873 * MDS_TRAVERSE_DISCOVERXLOCK, path_traverse will succeed on null
874 * dentries (instead of returning -ENOENT). If set to
875 * MDS_TRAVERSE_FORWARD, it will forward the request to the auth
876 * MDS if that becomes appropriate (ie, if it doesn't know the contents
877 * of a directory). If set to MDS_TRAVERSE_DISCOVER, it
878 * will attempt to look up the path from a different MDS (and bring them
879 * into its cache as replicas).
880 *
881 * @returns 0 on success, 1 on "not done yet", 2 on "forwarding", -errno otherwise.
882 * If it returns 1, the requester associated with this call has been placed
883 * on the appropriate waitlist, and it should unwind itself and back out.
884 * If it returns 2 the request has been forwarded, and again the requester
885 * should unwind itself and back out.
886 */
887 int path_traverse(MDRequestRef& mdr, Message *req, MDSInternalContextBase *fin, const filepath& path,
888 vector<CDentry*> *pdnvec, CInode **pin, int onfail);
889
890 CInode *cache_traverse(const filepath& path);
891
892 void open_remote_dirfrag(CInode *diri, frag_t fg, MDSInternalContextBase *fin);
893 CInode *get_dentry_inode(CDentry *dn, MDRequestRef& mdr, bool projected=false);
894
895 bool parallel_fetch(map<inodeno_t,filepath>& pathmap, set<inodeno_t>& missing);
896 bool parallel_fetch_traverse_dir(inodeno_t ino, filepath& path,
897 set<CDir*>& fetch_queue, set<inodeno_t>& missing,
898 C_GatherBuilder &gather_bld);
899
900 void open_remote_dentry(CDentry *dn, bool projected, MDSInternalContextBase *fin,
901 bool want_xlocked=false);
902 void _open_remote_dentry_finish(CDentry *dn, inodeno_t ino, MDSInternalContextBase *fin,
903 bool want_xlocked, int r);
904
905 void make_trace(vector<CDentry*>& trace, CInode *in);
906
907 protected:
908 struct open_ino_info_t {
909 vector<inode_backpointer_t> ancestors;
910 set<mds_rank_t> checked;
911 mds_rank_t checking;
912 mds_rank_t auth_hint;
913 bool check_peers;
914 bool fetch_backtrace;
915 bool discover;
916 bool want_replica;
917 bool want_xlocked;
918 version_t tid;
919 int64_t pool;
920 int last_err;
921 list<MDSInternalContextBase*> waiters;
922 open_ino_info_t() : checking(MDS_RANK_NONE), auth_hint(MDS_RANK_NONE),
923 check_peers(true), fetch_backtrace(true), discover(false),
924 want_replica(false), want_xlocked(false), tid(0), pool(-1),
925 last_err(0) {}
926 };
927 ceph_tid_t open_ino_last_tid;
928 map<inodeno_t,open_ino_info_t> opening_inodes;
929
930 void _open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err);
931 void _open_ino_parent_opened(inodeno_t ino, int ret);
932 void _open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int err);
933 void _open_ino_fetch_dir(inodeno_t ino, MMDSOpenIno *m, CDir *dir, bool parent);
934 int open_ino_traverse_dir(inodeno_t ino, MMDSOpenIno *m,
935 vector<inode_backpointer_t>& ancestors,
936 bool discover, bool want_xlocked, mds_rank_t *hint);
937 void open_ino_finish(inodeno_t ino, open_ino_info_t& info, int err);
938 void do_open_ino(inodeno_t ino, open_ino_info_t& info, int err);
939 void do_open_ino_peer(inodeno_t ino, open_ino_info_t& info);
940 void handle_open_ino(MMDSOpenIno *m, int err=0);
941 void handle_open_ino_reply(MMDSOpenInoReply *m);
942 friend class C_IO_MDC_OpenInoBacktraceFetched;
943 friend struct C_MDC_OpenInoTraverseDir;
944 friend struct C_MDC_OpenInoParentOpened;
945
946 public:
947 void kick_open_ino_peers(mds_rank_t who);
948 void open_ino(inodeno_t ino, int64_t pool, MDSInternalContextBase *fin,
949 bool want_replica=true, bool want_xlocked=false);
950
951 // -- find_ino_peer --
952 struct find_ino_peer_info_t {
953 inodeno_t ino;
954 ceph_tid_t tid;
955 MDSInternalContextBase *fin;
956 mds_rank_t hint;
957 mds_rank_t checking;
958 set<mds_rank_t> checked;
959
960 find_ino_peer_info_t() : tid(0), fin(NULL), hint(MDS_RANK_NONE), checking(MDS_RANK_NONE) {}
961 };
962
963 map<ceph_tid_t, find_ino_peer_info_t> find_ino_peer;
964 ceph_tid_t find_ino_peer_last_tid;
965
966 void find_ino_peers(inodeno_t ino, MDSInternalContextBase *c, mds_rank_t hint=MDS_RANK_NONE);
967 void _do_find_ino_peer(find_ino_peer_info_t& fip);
968 void handle_find_ino(MMDSFindIno *m);
969 void handle_find_ino_reply(MMDSFindInoReply *m);
970 void kick_find_ino_peers(mds_rank_t who);
971
972 // -- snaprealms --
973 public:
974 void snaprealm_create(MDRequestRef& mdr, CInode *in);
975 void _snaprealm_create_finish(MDRequestRef& mdr, MutationRef& mut, CInode *in);
976
977 // -- stray --
978 public:
979 void eval_remote(CDentry *dn);
980 void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin);
981 uint64_t get_num_strays() const { return stray_manager.get_num_strays(); }
982
983 protected:
984 void scan_stray_dir(dirfrag_t next=dirfrag_t());
985 StrayManager stray_manager;
986 friend struct C_MDC_RetryScanStray;
987 friend class C_IO_MDC_FetchedBacktrace;
988
989 // == messages ==
990 public:
991 void dispatch(Message *m);
992
993 protected:
994 // -- replicas --
995 void handle_discover(MDiscover *dis);
996 void handle_discover_reply(MDiscoverReply *m);
997 friend class C_MDC_Join;
998
999 public:
1000 void replicate_dir(CDir *dir, mds_rank_t to, bufferlist& bl) {
1001 dirfrag_t df = dir->dirfrag();
1002 ::encode(df, bl);
1003 dir->encode_replica(to, bl);
1004 }
1005 void replicate_dentry(CDentry *dn, mds_rank_t to, bufferlist& bl) {
1006 ::encode(dn->name, bl);
1007 ::encode(dn->last, bl);
1008 dn->encode_replica(to, bl);
1009 }
1010 void replicate_inode(CInode *in, mds_rank_t to, bufferlist& bl,
1011 uint64_t features) {
1012 ::encode(in->inode.ino, bl); // bleh, minor assymetry here
1013 ::encode(in->last, bl);
1014 in->encode_replica(to, bl, features);
1015 }
1016
1017 CDir* add_replica_dir(bufferlist::iterator& p, CInode *diri, mds_rank_t from, list<MDSInternalContextBase*>& finished);
1018 CDir* forge_replica_dir(CInode *diri, frag_t fg, mds_rank_t from);
1019 CDentry *add_replica_dentry(bufferlist::iterator& p, CDir *dir, list<MDSInternalContextBase*>& finished);
1020 CInode *add_replica_inode(bufferlist::iterator& p, CDentry *dn, list<MDSInternalContextBase*>& finished);
1021
1022 void replicate_stray(CDentry *straydn, mds_rank_t who, bufferlist& bl);
1023 CDentry *add_replica_stray(bufferlist &bl, mds_rank_t from);
1024
1025 // -- namespace --
1026 public:
1027 void send_dentry_link(CDentry *dn, MDRequestRef& mdr);
1028 void send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr);
1029 protected:
1030 void handle_dentry_link(MDentryLink *m);
1031 void handle_dentry_unlink(MDentryUnlink *m);
1032
1033
1034 // -- fragmenting --
1035 private:
1036 struct ufragment {
1037 int bits;
1038 bool committed;
1039 LogSegment *ls;
1040 list<MDSInternalContextBase*> waiters;
1041 list<frag_t> old_frags;
1042 bufferlist rollback;
1043 ufragment() : bits(0), committed(false), ls(NULL) {}
1044 };
1045 map<dirfrag_t, ufragment> uncommitted_fragments;
1046
1047 struct fragment_info_t {
1048 int bits;
1049 list<CDir*> dirs;
1050 list<CDir*> resultfrags;
1051 MDRequestRef mdr;
1052 // for deadlock detection
1053 bool all_frozen;
1054 utime_t last_cum_auth_pins_change;
1055 int last_cum_auth_pins;
1056 int num_remote_waiters; // number of remote authpin waiters
1057 fragment_info_t() : bits(0), all_frozen(false), last_cum_auth_pins(0), num_remote_waiters(0) {}
1058 bool is_fragmenting() { return !resultfrags.empty(); }
1059 };
1060 map<dirfrag_t,fragment_info_t> fragments;
1061
1062 void adjust_dir_fragments(CInode *diri, frag_t basefrag, int bits,
1063 list<CDir*>& frags, list<MDSInternalContextBase*>& waiters, bool replay);
1064 void adjust_dir_fragments(CInode *diri,
1065 list<CDir*>& srcfrags,
1066 frag_t basefrag, int bits,
1067 list<CDir*>& resultfrags,
1068 list<MDSInternalContextBase*>& waiters,
1069 bool replay);
1070 CDir *force_dir_fragment(CInode *diri, frag_t fg, bool replay=true);
1071 void get_force_dirfrag_bound_set(vector<dirfrag_t>& dfs, set<CDir*>& bounds);
1072
1073 bool can_fragment(CInode *diri, list<CDir*>& dirs);
1074 void fragment_freeze_dirs(list<CDir*>& dirs);
1075 void fragment_mark_and_complete(MDRequestRef& mdr);
1076 void fragment_frozen(MDRequestRef& mdr, int r);
1077 void fragment_unmark_unfreeze_dirs(list<CDir*>& dirs);
1078 void dispatch_fragment_dir(MDRequestRef& mdr);
1079 void _fragment_logged(MDRequestRef& mdr);
1080 void _fragment_stored(MDRequestRef& mdr);
1081 void _fragment_committed(dirfrag_t f, list<CDir*>& resultfrags);
1082 void _fragment_finish(dirfrag_t f, list<CDir*>& resultfrags);
1083
1084 friend class EFragment;
1085 friend class C_MDC_FragmentFrozen;
1086 friend class C_MDC_FragmentMarking;
1087 friend class C_MDC_FragmentPrep;
1088 friend class C_MDC_FragmentStore;
1089 friend class C_MDC_FragmentCommit;
1090 friend class C_IO_MDC_FragmentFinish;
1091
1092 void handle_fragment_notify(MMDSFragmentNotify *m);
1093
1094 void add_uncommitted_fragment(dirfrag_t basedirfrag, int bits, list<frag_t>& old_frag,
1095 LogSegment *ls, bufferlist *rollback=NULL);
1096 void finish_uncommitted_fragment(dirfrag_t basedirfrag, int op);
1097 void rollback_uncommitted_fragment(dirfrag_t basedirfrag, list<frag_t>& old_frags);
1098 public:
1099 void wait_for_uncommitted_fragment(dirfrag_t dirfrag, MDSInternalContextBase *c) {
1100 assert(uncommitted_fragments.count(dirfrag));
1101 uncommitted_fragments[dirfrag].waiters.push_back(c);
1102 }
1103 void split_dir(CDir *dir, int byn);
1104 void merge_dir(CInode *diri, frag_t fg);
1105 void rollback_uncommitted_fragments();
1106
1107 void find_stale_fragment_freeze();
1108 void fragment_freeze_inc_num_waiters(CDir *dir);
1109 bool fragment_are_all_frozen(CDir *dir);
1110 int get_num_fragmenting_dirs() { return fragments.size(); }
1111
1112 // -- updates --
1113 //int send_inode_updates(CInode *in);
1114 //void handle_inode_update(MInodeUpdate *m);
1115
1116 int send_dir_updates(CDir *in, bool bcast=false);
1117 void handle_dir_update(MDirUpdate *m);
1118
1119 // -- cache expiration --
1120 void handle_cache_expire(MCacheExpire *m);
1121 void process_delayed_expire(CDir *dir);
1122 void discard_delayed_expire(CDir *dir);
1123
1124 protected:
1125 void dump_cache(const char *fn, Formatter *f,
1126 const std::string& dump_root = "",
1127 int depth = -1);
1128 public:
1129 void dump_cache() {dump_cache(NULL, NULL);}
1130 void dump_cache(const std::string &filename);
1131 void dump_cache(Formatter *f);
1132 void dump_cache(const std::string& dump_root, int depth, Formatter *f);
1133
1134 void dump_resolve_status(Formatter *f) const;
1135 void dump_rejoin_status(Formatter *f) const;
1136
1137 // == crap fns ==
1138 public:
1139 void show_cache();
1140 void show_subtrees(int dbl=10);
1141
1142 CInode *hack_pick_random_inode() {
1143 assert(!inode_map.empty());
1144 int n = rand() % inode_map.size();
1145 ceph::unordered_map<vinodeno_t,CInode*>::iterator p = inode_map.begin();
1146 while (n--) ++p;
1147 return p->second;
1148 }
1149
1150 protected:
1151 void flush_dentry_work(MDRequestRef& mdr);
1152 /**
1153 * Resolve path to a dentry and pass it onto the ScrubStack.
1154 *
1155 * TODO: return enough information to the original mdr formatter
1156 * and completion that they can subsequeuntly check the progress of
1157 * this scrub (we won't block them on a whole scrub as it can take a very
1158 * long time)
1159 */
1160 void enqueue_scrub_work(MDRequestRef& mdr);
1161 void repair_inode_stats_work(MDRequestRef& mdr);
1162 void repair_dirfrag_stats_work(MDRequestRef& mdr);
1163 friend class C_MDC_RepairDirfragStats;
1164 public:
1165 void flush_dentry(const string& path, Context *fin);
1166 /**
1167 * Create and start an OP_ENQUEUE_SCRUB
1168 */
1169 void enqueue_scrub(const string& path, const std::string &tag,
1170 bool force, bool recursive, bool repair,
1171 Formatter *f, Context *fin);
1172 void repair_inode_stats(CInode *diri);
1173 void repair_dirfrag_stats(CDir *dir);
1174
1175 public:
1176 /* Because exports may fail, this set lets us keep track of inodes that need exporting. */
1177 std::set<CInode *> export_pin_queue;
1178 };
1179
1180 class C_MDS_RetryRequest : public MDSInternalContext {
1181 MDCache *cache;
1182 MDRequestRef mdr;
1183 public:
1184 C_MDS_RetryRequest(MDCache *c, MDRequestRef& r);
1185 void finish(int r) override;
1186 };
1187
1188 #endif