]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/CDir.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / mds / CDir.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
7c673cae
FG
16#ifndef CEPH_CDIR_H
17#define CEPH_CDIR_H
18
7c673cae 19#include <iosfwd>
7c673cae 20#include <list>
7c673cae 21#include <map>
94b18763 22#include <set>
7c673cae 23#include <string>
11fdf7f2 24#include <string_view>
7c673cae 25
94b18763
FG
26#include "common/bloom_filter.hpp"
27#include "common/config.h"
28#include "include/buffer_fwd.h"
29#include "include/counter.h"
30#include "include/types.h"
7c673cae
FG
31
32#include "CInode.h"
94b18763 33#include "MDSCacheObject.h"
11fdf7f2
TL
34#include "MDSContext.h"
35#include "cephfs_features.h"
36#include "SessionMap.h"
37#include "messages/MClientReply.h"
7c673cae
FG
38
39class CDentry;
40class MDCache;
41
42struct ObjectOperation;
43
44ostream& operator<<(ostream& out, const class CDir& dir);
7c673cae 45
9f95a23c 46class CDir : public MDSCacheObject, public Counter<CDir> {
7c673cae 47public:
181888fb 48 MEMPOOL_CLASS_HELPERS();
9f95a23c
TL
49
50 typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map;
51 typedef mempool::mds_co::set<dentry_key_t> dentry_key_set;
52
53 // -- freezing --
54 struct freeze_tree_state_t {
55 CDir *dir; // freezing/frozen tree root
56 int auth_pins = 0;
57 bool frozen = false;
58 freeze_tree_state_t(CDir *d) : dir(d) {}
59 };
60
61 class scrub_info_t {
62 public:
63 MEMPOOL_CLASS_HELPERS();
64 struct scrub_stamps {
65 version_t version;
66 utime_t time;
67 scrub_stamps() : version(0) {}
68 void operator=(const scrub_stamps &o) {
69 version = o.version;
70 time = o.time;
71 }
72 };
73
74 scrub_info_t() :
75 directory_scrubbing(false),
76 need_scrub_local(false),
77 last_scrub_dirty(false),
78 pending_scrub_error(false) {}
79
80 /// inodes we contain with dirty scrub stamps
81 dentry_key_map dirty_scrub_stamps; // TODO: make use of this!
82
83 scrub_stamps recursive_start; // when we last started a recursive scrub
84 scrub_stamps last_recursive; // when we last finished a recursive scrub
85 scrub_stamps last_local; // when we last did a local scrub
86
87 bool directory_scrubbing; /// safety check
88 bool need_scrub_local;
89 bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode?
90 bool pending_scrub_error;
91
92 /// these are lists of children in each stage of scrubbing
93 dentry_key_set directories_to_scrub;
94 dentry_key_set directories_scrubbing;
95 dentry_key_set directories_scrubbed;
96 dentry_key_set others_to_scrub;
97 dentry_key_set others_scrubbing;
98 dentry_key_set others_scrubbed;
99
100 ScrubHeaderRefConst header;
101 };
102
7c673cae
FG
103 // -- pins --
104 static const int PIN_DNWAITER = 1;
105 static const int PIN_INOWAITER = 2;
106 static const int PIN_CHILD = 3;
107 static const int PIN_FROZEN = 4;
108 static const int PIN_SUBTREE = 5;
109 static const int PIN_IMPORTING = 7;
110 static const int PIN_IMPORTBOUND = 9;
111 static const int PIN_EXPORTBOUND = 10;
112 static const int PIN_STICKY = 11;
113 static const int PIN_SUBTREETEMP = 12; // used by MDCache::trim_non_auth()
7c673cae
FG
114
115 // -- state --
9f95a23c
TL
116 static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache
117 static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports)
118 static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing
11fdf7f2
TL
119 static const unsigned STATE_FROZENDIR = (1<< 3);
120 static const unsigned STATE_FREEZINGDIR = (1<< 4);
9f95a23c
TL
121 static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit
122 static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching
11fdf7f2
TL
123 static const unsigned STATE_CREATING = (1<< 7);
124 static const unsigned STATE_IMPORTBOUND = (1<< 8);
125 static const unsigned STATE_EXPORTBOUND = (1<< 9);
126 static const unsigned STATE_EXPORTING = (1<<10);
127 static const unsigned STATE_IMPORTING = (1<<11);
128 static const unsigned STATE_FRAGMENTING = (1<<12);
129 static const unsigned STATE_STICKY = (1<<13); // sticky pin due to inode stickydirs
130 static const unsigned STATE_DNPINNEDFRAG = (1<<14); // dir is refragmenting
131 static const unsigned STATE_ASSIMRSTAT = (1<<15); // assimilating inode->frag rstats
132 static const unsigned STATE_DIRTYDFT = (1<<16); // dirty dirfragtree
133 static const unsigned STATE_BADFRAG = (1<<17); // bad dirfrag
134 static const unsigned STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table
135 static const unsigned STATE_AUXSUBTREE = (1<<19); // no subtree merge
7c673cae
FG
136
137 // common states
138 static const unsigned STATE_CLEAN = 0;
7c673cae
FG
139
140 // these state bits are preserved by an import/export
141 // ...except if the directory is hashed, in which case none of them are!
142 static const unsigned MASK_STATE_EXPORTED =
143 (STATE_COMPLETE|STATE_DIRTY|STATE_DIRTYDFT|STATE_BADFRAG);
144 static const unsigned MASK_STATE_IMPORT_KEPT =
145 (
11fdf7f2
TL
146 STATE_IMPORTING |
147 STATE_IMPORTBOUND |
148 STATE_EXPORTBOUND |
149 STATE_FROZENTREE |
150 STATE_STICKY |
151 STATE_TRACKEDBYOFT);
7c673cae 152 static const unsigned MASK_STATE_EXPORT_KEPT =
11fdf7f2
TL
153 (STATE_EXPORTING |
154 STATE_IMPORTBOUND |
155 STATE_EXPORTBOUND |
156 STATE_FROZENTREE |
157 STATE_FROZENDIR |
158 STATE_STICKY |
159 STATE_TRACKEDBYOFT);
7c673cae 160 static const unsigned MASK_STATE_FRAGMENT_KEPT =
11fdf7f2 161 (STATE_DIRTY |
7c673cae
FG
162 STATE_EXPORTBOUND |
163 STATE_IMPORTBOUND |
164 STATE_AUXSUBTREE |
165 STATE_REJOINUNDEF);
166
167 // -- rep spec --
168 static const int REP_NONE = 0;
169 static const int REP_ALL = 1;
170 static const int REP_LIST = 2;
171
7c673cae
FG
172 static const unsigned EXPORT_NONCE = 1;
173
7c673cae
FG
174 // -- wait masks --
175 static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache
176 static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents
177 static const uint64_t WAIT_FROZEN = (1<<2); // auth pins removed
178 static const uint64_t WAIT_CREATED = (1<<3); // new dirfrag is logged
179
180 static const int WAIT_DNLOCK_OFFSET = 4;
181
182 static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
7c673cae
FG
183 static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH);
184
11fdf7f2
TL
185 // -- dump flags --
186 static const int DUMP_PATH = (1 << 0);
187 static const int DUMP_DIRFRAG = (1 << 1);
188 static const int DUMP_SNAPID_FIRST = (1 << 2);
189 static const int DUMP_VERSIONS = (1 << 3);
190 static const int DUMP_REP = (1 << 4);
191 static const int DUMP_DIR_AUTH = (1 << 5);
192 static const int DUMP_STATES = (1 << 6);
193 static const int DUMP_MDS_CACHE_OBJECT = (1 << 7);
194 static const int DUMP_ITEMS = (1 << 8);
195 static const int DUMP_ALL = (-1);
9f95a23c 196 static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS);
7c673cae 197
9f95a23c 198 CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth);
7c673cae 199
9f95a23c
TL
200 std::string_view pin_name(int p) const override {
201 switch (p) {
202 case PIN_DNWAITER: return "dnwaiter";
203 case PIN_INOWAITER: return "inowaiter";
204 case PIN_CHILD: return "child";
205 case PIN_FROZEN: return "frozen";
206 case PIN_SUBTREE: return "subtree";
207 case PIN_IMPORTING: return "importing";
208 case PIN_IMPORTBOUND: return "importbound";
209 case PIN_EXPORTBOUND: return "exportbound";
210 case PIN_STICKY: return "sticky";
211 case PIN_SUBTREETEMP: return "subtreetemp";
212 default: return generic_pin_name(p);
213 }
214 }
7c673cae
FG
215
216 bool is_lt(const MDSCacheObject *r) const override {
217 return dirfrag() < (static_cast<const CDir*>(r))->dirfrag();
218 }
219
7c673cae
FG
220 void resync_accounted_fragstat();
221 void resync_accounted_rstat();
222 void assimilate_dirty_rstat_inodes();
223 void assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blob);
224
1adf2230
AA
225 void mark_exporting() {
226 state_set(CDir::STATE_EXPORTING);
227 inode->num_exporting_dirs++;
228 }
229 void clear_exporting() {
230 state_clear(CDir::STATE_EXPORTING);
231 inode->num_exporting_dirs--;
232 }
233
7c673cae
FG
234 version_t get_version() const { return fnode.version; }
235 void set_version(version_t v) {
11fdf7f2 236 ceph_assert(projected_fnode.empty());
7c673cae
FG
237 projected_version = fnode.version = v;
238 }
239 version_t get_projected_version() const { return projected_version; }
240
241 const fnode_t *get_projected_fnode() const {
242 if (projected_fnode.empty())
243 return &fnode;
244 else
94b18763 245 return &projected_fnode.back();
7c673cae
FG
246 }
247
248 fnode_t *get_projected_fnode() {
249 if (projected_fnode.empty())
250 return &fnode;
251 else
94b18763 252 return &projected_fnode.back();
7c673cae
FG
253 }
254 fnode_t *project_fnode();
255
256 void pop_and_dirty_projected_fnode(LogSegment *ls);
257 bool is_projected() const { return !projected_fnode.empty(); }
258 version_t pre_dirty(version_t min=0);
259 void _mark_dirty(LogSegment *ls);
260 void _set_dirty_flag() {
261 if (!state_test(STATE_DIRTY)) {
262 state_set(STATE_DIRTY);
263 get(PIN_DIRTY);
264 }
265 }
266 void mark_dirty(version_t pv, LogSegment *ls);
267 void mark_clean();
268
269 bool is_new() { return item_new.is_on_list(); }
270 void mark_new(LogSegment *ls);
271
272 bool is_bad() { return state_test(STATE_BADFRAG); }
7c673cae 273
7c673cae
FG
274 /**
275 * Call to start this CDir on a new scrub.
276 * @pre It is not currently scrubbing
277 * @pre The CDir is marked complete.
278 * @post It has set up its internal scrubbing state.
279 */
280 void scrub_initialize(const ScrubHeaderRefConst& header);
281 /**
282 * Get the next dentry to scrub. Gives you a CDentry* and its meaning. This
283 * function will give you all directory-representing dentries before any
284 * others.
285 * 0: success, you should scrub this CDentry right now
286 * EAGAIN: is currently fetching the next CDentry into memory for you.
287 * It will activate your callback when done; try again when it does!
288 * ENOENT: there are no remaining dentries to scrub
289 * <0: There was an unexpected error
290 *
11fdf7f2 291 * @param cb An MDSContext which will be activated only if
7c673cae
FG
292 * we return EAGAIN via rcode, or else ignored
293 * @param dnout CDentry * which you should next scrub, or NULL
294 * @returns a value as described above
295 */
11fdf7f2 296 int scrub_dentry_next(MDSContext *cb, CDentry **dnout);
7c673cae
FG
297 /**
298 * Get the currently scrubbing dentries. When returned, the passed-in
299 * list will be filled with all CDentry * which have been returned
300 * from scrub_dentry_next() but not sent back via scrub_dentry_finished().
301 */
9f95a23c 302 std::vector<CDentry*> scrub_dentries_scrubbing();
7c673cae
FG
303 /**
304 * Report to the CDir that a CDentry has been scrubbed. Call this
305 * for every CDentry returned from scrub_dentry_next().
306 * @param dn The CDentry which has been scrubbed.
307 */
308 void scrub_dentry_finished(CDentry *dn);
309 /**
310 * Call this once all CDentries have been scrubbed, according to
311 * scrub_dentry_next's listing. It finalizes the scrub statistics.
312 */
313 void scrub_finished();
314 /**
315 * Tell the CDir to do a local scrub of itself.
316 * @pre The CDir is_complete().
317 * @returns true if the rstats and directory contents match, false otherwise.
318 */
319 bool scrub_local();
7c673cae
FG
320
321 const scrub_info_t *scrub_info() const {
322 if (!scrub_infop) {
323 scrub_info_create();
324 }
325 return scrub_infop.get();
326 }
327
7c673cae
FG
328 // -- accessors --
329 inodeno_t ino() const { return inode->ino(); } // deprecate me?
330 frag_t get_frag() const { return frag; }
331 dirfrag_t dirfrag() const { return dirfrag_t(inode->ino(), frag); }
332
333 CInode *get_inode() { return inode; }
334 const CInode *get_inode() const { return inode; }
335 CDir *get_parent_dir() { return inode->get_parent_dir(); }
336
94b18763
FG
337 dentry_key_map::iterator begin() { return items.begin(); }
338 dentry_key_map::iterator end() { return items.end(); }
339 dentry_key_map::iterator lower_bound(dentry_key_t key) { return items.lower_bound(key); }
7c673cae
FG
340
341 unsigned get_num_head_items() const { return num_head_items; }
342 unsigned get_num_head_null() const { return num_head_null; }
343 unsigned get_num_snap_items() const { return num_snap_items; }
344 unsigned get_num_snap_null() const { return num_snap_null; }
345 unsigned get_num_any() const { return num_head_items + num_head_null + num_snap_items + num_snap_null; }
346
347 bool check_rstats(bool scrub=false);
348
349 void inc_num_dirty() { num_dirty++; }
350 void dec_num_dirty() {
11fdf7f2 351 ceph_assert(num_dirty > 0);
7c673cae
FG
352 num_dirty--;
353 }
354 int get_num_dirty() const {
355 return num_dirty;
356 }
357
11fdf7f2
TL
358 void adjust_num_inodes_with_caps(int d);
359
7c673cae
FG
360 int64_t get_frag_size() const {
361 return get_projected_fnode()->fragstat.size();
362 }
363
364 // -- dentries and inodes --
11fdf7f2
TL
365 CDentry* lookup_exact_snap(std::string_view dname, snapid_t last);
366 CDentry* lookup(std::string_view n, snapid_t snap=CEPH_NOSNAP);
7c673cae 367
11fdf7f2 368 CDentry* add_null_dentry(std::string_view dname,
7c673cae 369 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
11fdf7f2 370 CDentry* add_primary_dentry(std::string_view dname, CInode *in,
7c673cae 371 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
11fdf7f2 372 CDentry* add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type,
7c673cae
FG
373 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
374 void remove_dentry( CDentry *dn ); // delete dentry
375 void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type);
376 void link_remote_inode( CDentry *dn, CInode *in );
377 void link_primary_inode( CDentry *dn, CInode *in );
31f18b77 378 void unlink_inode(CDentry *dn, bool adjust_lru=true);
7c673cae
FG
379 void try_remove_unlinked_dn(CDentry *dn);
380
381 void add_to_bloom(CDentry *dn);
11fdf7f2 382 bool is_in_bloom(std::string_view name);
7c673cae
FG
383 bool has_bloom() { return (bloom ? true : false); }
384 void remove_bloom() {
385 bloom.reset();
386 }
9f95a23c 387
7c673cae
FG
388 void try_remove_dentries_for_stray();
389 bool try_trim_snap_dentry(CDentry *dn, const std::set<snapid_t>& snaps);
390
9f95a23c
TL
391 void split(int bits, std::vector<CDir*>* subs, MDSContext::vec& waiters, bool replay);
392 void merge(const std::vector<CDir*>& subs, MDSContext::vec& waiters, bool replay);
7c673cae
FG
393
394 bool should_split() const {
11fdf7f2 395 return (int)get_frag_size() > g_conf()->mds_bal_split_size;
7c673cae
FG
396 }
397 bool should_split_fast() const;
398 bool should_merge() const {
11fdf7f2 399 return (int)get_frag_size() < g_conf()->mds_bal_merge_size;
7c673cae
FG
400 }
401
7c673cae
FG
402 mds_authority_t authority() const override;
403 mds_authority_t get_dir_auth() const { return dir_auth; }
11fdf7f2 404 void set_dir_auth(const mds_authority_t &a);
7c673cae
FG
405 void set_dir_auth(mds_rank_t a) { set_dir_auth(mds_authority_t(a, CDIR_AUTH_UNKNOWN)); }
406 bool is_ambiguous_dir_auth() const {
407 return dir_auth.second != CDIR_AUTH_UNKNOWN;
408 }
409 bool is_full_dir_auth() const {
410 return is_auth() && !is_ambiguous_dir_auth();
411 }
412 bool is_full_dir_nonauth() const {
413 return !is_auth() && !is_ambiguous_dir_auth();
414 }
415
416 bool is_subtree_root() const {
417 return dir_auth != CDIR_AUTH_DEFAULT;
418 }
419
420 bool contains(CDir *x); // true if we are x or an ancestor of x
421
7c673cae
FG
422 // for giving to clients
423 void get_dist_spec(std::set<mds_rank_t>& ls, mds_rank_t auth) {
424 if (is_rep()) {
425 list_replicas(ls);
426 if (!ls.empty())
427 ls.insert(auth);
428 }
429 }
7c673cae 430
11fdf7f2 431 static void encode_dirstat(bufferlist& bl, const session_info_t& info, const DirStat& ds);
7c673cae
FG
432
433 void _encode_base(bufferlist& bl) {
9f95a23c 434 ENCODE_START(1, 1, bl);
11fdf7f2
TL
435 encode(first, bl);
436 encode(fnode, bl);
437 encode(dir_rep, bl);
438 encode(dir_rep_by, bl);
9f95a23c 439 ENCODE_FINISH(bl);
7c673cae 440 }
11fdf7f2 441 void _decode_base(bufferlist::const_iterator& p) {
9f95a23c 442 DECODE_START(1, p);
11fdf7f2
TL
443 decode(first, p);
444 decode(fnode, p);
445 decode(dir_rep, p);
446 decode(dir_rep_by, p);
9f95a23c 447 DECODE_FINISH(p);
7c673cae 448 }
7c673cae
FG
449
450 // -- state --
451 bool is_complete() { return state & STATE_COMPLETE; }
452 bool is_exporting() { return state & STATE_EXPORTING; }
453 bool is_importing() { return state & STATE_IMPORTING; }
454 bool is_dirty_dft() { return state & STATE_DIRTYDFT; }
455
456 int get_dir_rep() const { return dir_rep; }
457 bool is_rep() const {
458 if (dir_rep == REP_NONE) return false;
459 return true;
460 }
461
462 // -- fetch --
463 object_t get_ondisk_object() {
464 return file_object_t(ino(), frag);
465 }
11fdf7f2
TL
466 void fetch(MDSContext *c, bool ignore_authpinnability=false);
467 void fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability=false);
468 void fetch(MDSContext *c, const std::set<dentry_key_t>& keys);
7c673cae 469
7c673cae
FG
470#if 0 // unused?
471 void wait_for_commit(Context *c, version_t v=0);
472#endif
473 void commit_to(version_t want);
11fdf7f2 474 void commit(version_t want, MDSContext *c,
7c673cae
FG
475 bool ignore_authpinnability=false, int op_prio=-1);
476
477 // -- dirtyness --
478 version_t get_committing_version() const { return committing_version; }
479 version_t get_committed_version() const { return committed_version; }
480 void set_committed_version(version_t v) { committed_version = v; }
481
482 void mark_complete();
483
7c673cae
FG
484 // -- reference counting --
485 void first_get() override;
486 void last_put() override;
487
11fdf7f2 488 bool is_waiting_for_dentry(std::string_view dname, snapid_t snap) {
7c673cae
FG
489 return waiting_on_dentry.count(string_snap_t(dname, snap));
490 }
11fdf7f2
TL
491 void add_dentry_waiter(std::string_view dentry, snapid_t snap, MDSContext *c);
492 void take_dentry_waiting(std::string_view dentry, snapid_t first, snapid_t last, MDSContext::vec& ls);
493 void take_sub_waiting(MDSContext::vec& ls); // dentry or ino
7c673cae 494
11fdf7f2
TL
495 void add_waiter(uint64_t mask, MDSContext *c) override;
496 void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters
7c673cae 497 void finish_waiting(uint64_t mask, int result = 0); // ditto
7c673cae
FG
498
499 // -- import/export --
500 void encode_export(bufferlist& bl);
11fdf7f2 501 void finish_export();
7c673cae
FG
502 void abort_export() {
503 put(PIN_TEMPEXPORTING);
504 }
11fdf7f2
TL
505 void decode_import(bufferlist::const_iterator& blp, LogSegment *ls);
506 void abort_import();
7c673cae
FG
507
508 // -- auth pins --
91327a77 509 bool can_auth_pin(int *err_ret=nullptr) const override;
7c673cae 510 int get_auth_pins() const { return auth_pins; }
7c673cae
FG
511 int get_dir_auth_pins() const { return dir_auth_pins; }
512 void auth_pin(void *who) override;
513 void auth_unpin(void *who) override;
514
11fdf7f2 515 void adjust_nested_auth_pins(int dirinc, void *by);
7c673cae
FG
516 void verify_fragstat();
517
11fdf7f2
TL
518 void _walk_tree(std::function<bool(CDir*)> cb);
519
7c673cae
FG
520 bool freeze_tree();
521 void _freeze_tree();
522 void unfreeze_tree();
11fdf7f2 523 void adjust_freeze_after_rename(CDir *dir);
7c673cae
FG
524
525 bool freeze_dir();
526 void _freeze_dir();
527 void unfreeze_dir();
528
529 void maybe_finish_freeze();
530
11fdf7f2
TL
531 pair<bool,bool> is_freezing_or_frozen_tree() const {
532 if (freeze_tree_state) {
533 if (freeze_tree_state->frozen)
534 return make_pair(false, true);
535 return make_pair(true, false);
536 }
537 return make_pair(false, false);
538 }
91327a77
AA
539
540 bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); }
541 bool is_freezing_tree() const {
542 if (!num_freezing_trees)
543 return false;
544 return is_freezing_or_frozen_tree().first;
545 }
7c673cae
FG
546 bool is_freezing_tree_root() const { return state & STATE_FREEZINGTREE; }
547 bool is_freezing_dir() const { return state & STATE_FREEZINGDIR; }
548
549 bool is_frozen() const override { return is_frozen_dir() || is_frozen_tree(); }
91327a77
AA
550 bool is_frozen_tree() const {
551 if (!num_frozen_trees)
552 return false;
553 return is_freezing_or_frozen_tree().second;
554 }
7c673cae
FG
555 bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; }
556 bool is_frozen_dir() const { return state & STATE_FROZENDIR; }
11fdf7f2 557
7c673cae
FG
558 bool is_freezeable(bool freezing=false) const {
559 // no nested auth pins.
11fdf7f2
TL
560 if (auth_pins - (freezing ? 1 : 0) > 0 ||
561 (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins))
7c673cae
FG
562 return false;
563
564 // inode must not be frozen.
565 if (!is_subtree_root() && inode->is_frozen())
566 return false;
567
568 return true;
569 }
11fdf7f2 570
7c673cae 571 bool is_freezeable_dir(bool freezing=false) const {
11fdf7f2 572 if ((auth_pins - freezing) > 0 || dir_auth_pins > 0)
7c673cae
FG
573 return false;
574
575 // if not subtree root, inode must not be frozen (tree--frozen_dir is okay).
576 if (!is_subtree_root() && inode->is_frozen() && !inode->is_frozen_dir())
577 return false;
578
579 return true;
580 }
581
9f95a23c
TL
582 bool is_any_freezing_or_frozen_inode() const {
583 return num_frozen_inodes || !freezing_inodes.empty();
584 }
585 bool is_auth_pinned_by_lock_cache() const {
586 return frozen_inode_suppressed;
587 }
588 void disable_frozen_inode() {
589 ceph_assert(num_frozen_inodes == 0);
590 frozen_inode_suppressed++;
591 }
592 void enable_frozen_inode();
593
7c673cae
FG
594 ostream& print_db_line_prefix(ostream& out) override;
595 void print(ostream& out) override;
11fdf7f2
TL
596 void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
597 void dump_load(Formatter *f);
9f95a23c
TL
598
599 // context
600 MDCache *cache;
601
602 CInode *inode; // my inode
603 frag_t frag; // my frag
604
605 fnode_t fnode;
606 snapid_t first = 2;
607 mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key]
608
609 // my inodes with dirty rstat data
610 elist<CInode*> dirty_rstat_inodes;
611
612 elist<CDentry*> dirty_dentries;
613 elist<CDir*>::item item_dirty, item_new;
614
615 // lock caches that auth-pin me
616 elist<MDLockCache::DirItem*> lock_caches_with_auth_pins;
617
618 // all dirfrags within freezing/frozen tree reference the 'state'
619 std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
620
621protected:
622 // friends
623 friend class Migrator;
624 friend class CInode;
625 friend class MDCache;
626 friend class MDiscover;
627 friend class MDBalancer;
628
629 friend class CDirDiscover;
630 friend class CDirExport;
631 friend class C_IO_Dir_TMAP_Fetched;
632 friend class C_IO_Dir_OMAP_Fetched;
633 friend class C_IO_Dir_OMAP_FetchedMore;
634 friend class C_IO_Dir_Committed;
635
636 void _omap_fetch(MDSContext *fin, const std::set<dentry_key_t>& keys);
637 void _omap_fetch_more(
638 bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
639 MDSContext *fin);
640 CDentry *_load_dentry(
641 std::string_view key,
642 std::string_view dname,
643 snapid_t last,
644 bufferlist &bl,
645 int pos,
646 const std::set<snapid_t> *snaps,
647 bool *force_dirty);
648
649 /**
650 * Mark this fragment as BADFRAG (common part of go_bad and go_bad_dentry)
651 */
652 void _go_bad();
653
654 /**
655 * Go bad due to a damaged dentry (register with damagetable and go BADFRAG)
656 */
657 void go_bad_dentry(snapid_t last, std::string_view dname);
658
659 /**
660 * Go bad due to a damaged header (register with damagetable and go BADFRAG)
661 */
662 void go_bad(bool complete);
663
664 void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
665 bool complete, int r);
666
667 // -- commit --
668 void _commit(version_t want, int op_prio);
669 void _omap_commit(int op_prio);
670 void _encode_dentry(CDentry *dn, bufferlist& bl, const std::set<snapid_t> *snaps);
671 void _committed(int r, version_t v);
672
673 version_t projected_version = 0;
674 mempool::mds_co::list<fnode_t> projected_fnode;
675
676 std::unique_ptr<scrub_info_t> scrub_infop;
677
678 // contents of this directory
679 dentry_key_map items; // non-null AND null
680 unsigned num_head_items = 0;
681 unsigned num_head_null = 0;
682 unsigned num_snap_items = 0;
683 unsigned num_snap_null = 0;
684
685 int num_dirty = 0;
686
687 int num_inodes_with_caps = 0;
688
689 // state
690 version_t committing_version = 0;
691 version_t committed_version = 0;
692
693 mempool::mds_co::compact_set<mempool::mds_co::string> stale_items;
694
695 // lock nesting, freeze
696 static int num_frozen_trees;
697 static int num_freezing_trees;
698
699 // freezing/frozen inodes in this dirfrag
700 int num_frozen_inodes = 0;
701 int frozen_inode_suppressed = 0;
702 elist<CInode*> freezing_inodes;
703
704 int dir_auth_pins = 0;
705
706 // cache control (defined for authority; hints for replicas)
707 __s32 dir_rep;
708 mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST
709
710 // popularity
711 dirfrag_load_vec_t pop_me;
712 dirfrag_load_vec_t pop_nested;
713 dirfrag_load_vec_t pop_auth_subtree;
714 dirfrag_load_vec_t pop_auth_subtree_nested;
715
716 ceph::coarse_mono_time last_popularity_sample = ceph::coarse_mono_clock::zero();
717
718 load_spread_t pop_spread;
719
720 elist<CInode*> pop_lru_subdirs;
721
722 std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co
723 /* If you set up the bloom filter, you must keep it accurate!
724 * It's deleted when you mark_complete() and is deliberately not serialized.*/
725
726 mempool::mds_co::compact_set<mempool::mds_co::string> wanted_items;
727 mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit;
728
729 // -- waiters --
730 mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool
731
732private:
733 friend ostream& operator<<(ostream& out, const class CDir& dir);
734
735 void log_mark_dirty();
736
737 /**
738 * Create a scrub_info_t struct for the scrub_infop pointer.
739 */
740 void scrub_info_create() const;
741 /**
742 * Delete the scrub_infop if it's not got any useful data.
743 */
744 void scrub_maybe_delete_info();
745 /**
746 * Check the given set (presumably one of those in scrub_info_t) for the
747 * next key to scrub and look it up (or fail!).
748 */
749 int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay,
750 MDSContext *cb, CDentry **dnout);
751
752 void link_inode_work( CDentry *dn, CInode *in );
753 void unlink_inode_work( CDentry *dn );
754 void remove_null_dentries();
755 void purge_stale_snap_data(const std::set<snapid_t>& snaps);
756
757 void prepare_new_fragment(bool replay);
758 void prepare_old_fragment(map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay);
759 void steal_dentry(CDentry *dn); // from another dir. used by merge/split.
760 void finish_old_fragment(MDSContext::vec& waiters, bool replay);
761 void init_fragment_pins();
762 std::string get_path() const;
763
764 // -- authority --
765 /*
766 * normal: <parent,unknown> !subtree_root
767 * delegation: <mds,unknown> subtree_root
768 * ambiguous: <mds1,mds2> subtree_root
769 * <parent,mds2> subtree_root
770 */
771 mds_authority_t dir_auth;
7c673cae
FG
772};
773
774#endif