]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/CDir.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / mds / CDir.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
7c673cae
FG
16#ifndef CEPH_CDIR_H
17#define CEPH_CDIR_H
18
7c673cae 19#include <iosfwd>
7c673cae 20#include <list>
7c673cae 21#include <map>
94b18763 22#include <set>
7c673cae 23#include <string>
11fdf7f2 24#include <string_view>
7c673cae 25
94b18763
FG
26#include "common/bloom_filter.hpp"
27#include "common/config.h"
28#include "include/buffer_fwd.h"
29#include "include/counter.h"
30#include "include/types.h"
7c673cae
FG
31
32#include "CInode.h"
94b18763 33#include "MDSCacheObject.h"
11fdf7f2
TL
34#include "MDSContext.h"
35#include "cephfs_features.h"
36#include "SessionMap.h"
37#include "messages/MClientReply.h"
7c673cae
FG
38
39class CDentry;
40class MDCache;
41
f67539c2 42std::ostream& operator<<(std::ostream& out, const class CDir& dir);
7c673cae 43
9f95a23c 44class CDir : public MDSCacheObject, public Counter<CDir> {
7c673cae 45public:
181888fb 46 MEMPOOL_CLASS_HELPERS();
9f95a23c
TL
47
48 typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map;
49 typedef mempool::mds_co::set<dentry_key_t> dentry_key_set;
50
f67539c2
TL
51 using fnode_ptr = std::shared_ptr<fnode_t>;
52 using fnode_const_ptr = std::shared_ptr<const fnode_t>;
53
54 template <typename ...Args>
55 static fnode_ptr allocate_fnode(Args && ...args) {
56 static mempool::mds_co::pool_allocator<fnode_t> allocator;
57 return std::allocate_shared<fnode_t>(allocator, std::forward<Args>(args)...);
58 }
59
60 struct dentry_commit_item {
20effc67 61 std::string key;
f67539c2
TL
62 snapid_t first;
63 bool is_remote = false;
64
65 inodeno_t ino;
66 unsigned char d_type;
67 mempool::mds_co::string alternate_name;
68
69 bool snaprealm = false;
70 sr_t srnode;
71
72 mempool::mds_co::string symlink;
73 uint64_t features;
74 uint64_t dft_len;
75 CInode::inode_const_ptr inode;
76 CInode::xattr_map_const_ptr xattrs;
77 CInode::old_inode_map_const_ptr old_inodes;
78 snapid_t oldest_snap;
79 damage_flags_t damage_flags;
80 };
81
9f95a23c
TL
82 // -- freezing --
83 struct freeze_tree_state_t {
84 CDir *dir; // freezing/frozen tree root
85 int auth_pins = 0;
86 bool frozen = false;
87 freeze_tree_state_t(CDir *d) : dir(d) {}
88 };
89
90 class scrub_info_t {
91 public:
92 MEMPOOL_CLASS_HELPERS();
93 struct scrub_stamps {
f67539c2 94 version_t version = 0;
9f95a23c 95 utime_t time;
9f95a23c
TL
96 };
97
f67539c2 98 scrub_info_t() {}
9f95a23c 99
9f95a23c
TL
100 scrub_stamps last_recursive; // when we last finished a recursive scrub
101 scrub_stamps last_local; // when we last did a local scrub
102
f67539c2
TL
103 bool directory_scrubbing = false; /// safety check
104 bool last_scrub_dirty = false; /// is scrub info dirty or is it flushed to fnode?
9f95a23c 105
f67539c2 106 ScrubHeaderRef header;
9f95a23c
TL
107 };
108
7c673cae
FG
109 // -- pins --
110 static const int PIN_DNWAITER = 1;
111 static const int PIN_INOWAITER = 2;
112 static const int PIN_CHILD = 3;
113 static const int PIN_FROZEN = 4;
114 static const int PIN_SUBTREE = 5;
115 static const int PIN_IMPORTING = 7;
116 static const int PIN_IMPORTBOUND = 9;
117 static const int PIN_EXPORTBOUND = 10;
118 static const int PIN_STICKY = 11;
119 static const int PIN_SUBTREETEMP = 12; // used by MDCache::trim_non_auth()
7c673cae
FG
120
121 // -- state --
9f95a23c
TL
122 static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache
123 static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports)
124 static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing
11fdf7f2
TL
125 static const unsigned STATE_FROZENDIR = (1<< 3);
126 static const unsigned STATE_FREEZINGDIR = (1<< 4);
9f95a23c
TL
127 static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit
128 static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching
11fdf7f2
TL
129 static const unsigned STATE_CREATING = (1<< 7);
130 static const unsigned STATE_IMPORTBOUND = (1<< 8);
131 static const unsigned STATE_EXPORTBOUND = (1<< 9);
132 static const unsigned STATE_EXPORTING = (1<<10);
133 static const unsigned STATE_IMPORTING = (1<<11);
134 static const unsigned STATE_FRAGMENTING = (1<<12);
135 static const unsigned STATE_STICKY = (1<<13); // sticky pin due to inode stickydirs
136 static const unsigned STATE_DNPINNEDFRAG = (1<<14); // dir is refragmenting
137 static const unsigned STATE_ASSIMRSTAT = (1<<15); // assimilating inode->frag rstats
138 static const unsigned STATE_DIRTYDFT = (1<<16); // dirty dirfragtree
139 static const unsigned STATE_BADFRAG = (1<<17); // bad dirfrag
140 static const unsigned STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table
141 static const unsigned STATE_AUXSUBTREE = (1<<19); // no subtree merge
7c673cae
FG
142
143 // common states
144 static const unsigned STATE_CLEAN = 0;
7c673cae
FG
145
146 // these state bits are preserved by an import/export
147 // ...except if the directory is hashed, in which case none of them are!
148 static const unsigned MASK_STATE_EXPORTED =
149 (STATE_COMPLETE|STATE_DIRTY|STATE_DIRTYDFT|STATE_BADFRAG);
150 static const unsigned MASK_STATE_IMPORT_KEPT =
151 (
11fdf7f2
TL
152 STATE_IMPORTING |
153 STATE_IMPORTBOUND |
154 STATE_EXPORTBOUND |
155 STATE_FROZENTREE |
156 STATE_STICKY |
157 STATE_TRACKEDBYOFT);
7c673cae 158 static const unsigned MASK_STATE_EXPORT_KEPT =
11fdf7f2
TL
159 (STATE_EXPORTING |
160 STATE_IMPORTBOUND |
161 STATE_EXPORTBOUND |
162 STATE_FROZENTREE |
163 STATE_FROZENDIR |
164 STATE_STICKY |
165 STATE_TRACKEDBYOFT);
7c673cae 166 static const unsigned MASK_STATE_FRAGMENT_KEPT =
11fdf7f2 167 (STATE_DIRTY |
7c673cae
FG
168 STATE_EXPORTBOUND |
169 STATE_IMPORTBOUND |
170 STATE_AUXSUBTREE |
171 STATE_REJOINUNDEF);
172
173 // -- rep spec --
174 static const int REP_NONE = 0;
175 static const int REP_ALL = 1;
176 static const int REP_LIST = 2;
177
7c673cae
FG
178 static const unsigned EXPORT_NONCE = 1;
179
7c673cae
FG
180 // -- wait masks --
181 static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache
182 static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents
183 static const uint64_t WAIT_FROZEN = (1<<2); // auth pins removed
184 static const uint64_t WAIT_CREATED = (1<<3); // new dirfrag is logged
185
186 static const int WAIT_DNLOCK_OFFSET = 4;
187
188 static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
7c673cae
FG
189 static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH);
190
11fdf7f2
TL
191 // -- dump flags --
192 static const int DUMP_PATH = (1 << 0);
193 static const int DUMP_DIRFRAG = (1 << 1);
194 static const int DUMP_SNAPID_FIRST = (1 << 2);
195 static const int DUMP_VERSIONS = (1 << 3);
196 static const int DUMP_REP = (1 << 4);
197 static const int DUMP_DIR_AUTH = (1 << 5);
198 static const int DUMP_STATES = (1 << 6);
199 static const int DUMP_MDS_CACHE_OBJECT = (1 << 7);
200 static const int DUMP_ITEMS = (1 << 8);
201 static const int DUMP_ALL = (-1);
9f95a23c 202 static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS);
7c673cae 203
f67539c2 204 CDir(CInode *in, frag_t fg, MDCache *mdc, bool auth);
7c673cae 205
9f95a23c
TL
206 std::string_view pin_name(int p) const override {
207 switch (p) {
208 case PIN_DNWAITER: return "dnwaiter";
209 case PIN_INOWAITER: return "inowaiter";
210 case PIN_CHILD: return "child";
211 case PIN_FROZEN: return "frozen";
212 case PIN_SUBTREE: return "subtree";
213 case PIN_IMPORTING: return "importing";
214 case PIN_IMPORTBOUND: return "importbound";
215 case PIN_EXPORTBOUND: return "exportbound";
216 case PIN_STICKY: return "sticky";
217 case PIN_SUBTREETEMP: return "subtreetemp";
218 default: return generic_pin_name(p);
219 }
220 }
7c673cae
FG
221
222 bool is_lt(const MDSCacheObject *r) const override {
223 return dirfrag() < (static_cast<const CDir*>(r))->dirfrag();
224 }
225
7c673cae
FG
226 void resync_accounted_fragstat();
227 void resync_accounted_rstat();
f67539c2
TL
228 void assimilate_dirty_rstat_inodes(MutationRef& mut);
229 void assimilate_dirty_rstat_inodes_finish(EMetaBlob *blob);
7c673cae 230
1adf2230
AA
231 void mark_exporting() {
232 state_set(CDir::STATE_EXPORTING);
233 inode->num_exporting_dirs++;
234 }
235 void clear_exporting() {
236 state_clear(CDir::STATE_EXPORTING);
237 inode->num_exporting_dirs--;
238 }
239
f67539c2
TL
240 version_t get_version() const { return fnode->version; }
241 void update_projected_version() {
11fdf7f2 242 ceph_assert(projected_fnode.empty());
f67539c2 243 projected_version = fnode->version;
7c673cae
FG
244 }
245 version_t get_projected_version() const { return projected_version; }
246
f67539c2
TL
247 void reset_fnode(fnode_const_ptr&& ptr) {
248 fnode = std::move(ptr);
249 }
250
251 const fnode_const_ptr& get_fnode() const {
252 return fnode;
7c673cae
FG
253 }
254
f67539c2
TL
255 // only used for updating newly allocated CDir
256 fnode_t* _get_fnode() {
257 if (fnode == empty_fnode)
258 reset_fnode(allocate_fnode());
259 return const_cast<fnode_t*>(fnode.get());
260 }
261
262 const fnode_const_ptr& get_projected_fnode() const {
7c673cae 263 if (projected_fnode.empty())
f67539c2 264 return fnode;
7c673cae 265 else
f67539c2 266 return projected_fnode.back();
7c673cae 267 }
7c673cae 268
f67539c2
TL
269 // fnode should have already been projected in caller's context
270 fnode_t* _get_projected_fnode() {
271 ceph_assert(!projected_fnode.empty());
272 return const_cast<fnode_t*>(projected_fnode.back().get());
273 }
274
275 fnode_ptr project_fnode(const MutationRef& mut);
276
277 void pop_and_dirty_projected_fnode(LogSegment *ls, const MutationRef& mut);
7c673cae
FG
278 bool is_projected() const { return !projected_fnode.empty(); }
279 version_t pre_dirty(version_t min=0);
280 void _mark_dirty(LogSegment *ls);
281 void _set_dirty_flag() {
282 if (!state_test(STATE_DIRTY)) {
283 state_set(STATE_DIRTY);
284 get(PIN_DIRTY);
285 }
286 }
f67539c2 287 void mark_dirty(LogSegment *ls, version_t pv=0);
7c673cae
FG
288 void mark_clean();
289
290 bool is_new() { return item_new.is_on_list(); }
291 void mark_new(LogSegment *ls);
292
293 bool is_bad() { return state_test(STATE_BADFRAG); }
7c673cae 294
7c673cae
FG
295 /**
296 * Call to start this CDir on a new scrub.
297 * @pre It is not currently scrubbing
298 * @pre The CDir is marked complete.
299 * @post It has set up its internal scrubbing state.
300 */
f67539c2
TL
301 void scrub_initialize(const ScrubHeaderRef& header);
302 const ScrubHeaderRef& get_scrub_header() {
303 static const ScrubHeaderRef nullref;
304 return scrub_infop ? scrub_infop->header : nullref;
305 }
306
307 bool scrub_is_in_progress() const {
308 return (scrub_infop && scrub_infop->directory_scrubbing);
309 }
310
7c673cae
FG
311 /**
312 * Call this once all CDentries have been scrubbed, according to
313 * scrub_dentry_next's listing. It finalizes the scrub statistics.
314 */
315 void scrub_finished();
f67539c2
TL
316
317 void scrub_aborted();
7c673cae
FG
318 /**
319 * Tell the CDir to do a local scrub of itself.
320 * @pre The CDir is_complete().
321 * @returns true if the rstats and directory contents match, false otherwise.
322 */
323 bool scrub_local();
7c673cae
FG
324
325 const scrub_info_t *scrub_info() const {
f67539c2 326 if (!scrub_infop)
7c673cae 327 scrub_info_create();
7c673cae
FG
328 return scrub_infop.get();
329 }
330
7c673cae
FG
331 // -- accessors --
332 inodeno_t ino() const { return inode->ino(); } // deprecate me?
333 frag_t get_frag() const { return frag; }
334 dirfrag_t dirfrag() const { return dirfrag_t(inode->ino(), frag); }
335
336 CInode *get_inode() { return inode; }
337 const CInode *get_inode() const { return inode; }
338 CDir *get_parent_dir() { return inode->get_parent_dir(); }
339
94b18763
FG
340 dentry_key_map::iterator begin() { return items.begin(); }
341 dentry_key_map::iterator end() { return items.end(); }
342 dentry_key_map::iterator lower_bound(dentry_key_t key) { return items.lower_bound(key); }
7c673cae
FG
343
344 unsigned get_num_head_items() const { return num_head_items; }
345 unsigned get_num_head_null() const { return num_head_null; }
346 unsigned get_num_snap_items() const { return num_snap_items; }
347 unsigned get_num_snap_null() const { return num_snap_null; }
348 unsigned get_num_any() const { return num_head_items + num_head_null + num_snap_items + num_snap_null; }
349
350 bool check_rstats(bool scrub=false);
351
352 void inc_num_dirty() { num_dirty++; }
353 void dec_num_dirty() {
11fdf7f2 354 ceph_assert(num_dirty > 0);
7c673cae
FG
355 num_dirty--;
356 }
357 int get_num_dirty() const {
358 return num_dirty;
359 }
360
11fdf7f2
TL
361 void adjust_num_inodes_with_caps(int d);
362
7c673cae
FG
363 int64_t get_frag_size() const {
364 return get_projected_fnode()->fragstat.size();
365 }
366
367 // -- dentries and inodes --
11fdf7f2
TL
368 CDentry* lookup_exact_snap(std::string_view dname, snapid_t last);
369 CDentry* lookup(std::string_view n, snapid_t snap=CEPH_NOSNAP);
7c673cae 370
11fdf7f2 371 CDentry* add_null_dentry(std::string_view dname,
7c673cae 372 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
f67539c2 373 CDentry* add_primary_dentry(std::string_view dname, CInode *in, mempool::mds_co::string alternate_name,
7c673cae 374 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
11fdf7f2 375 CDentry* add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type,
f67539c2 376 mempool::mds_co::string alternate_name,
7c673cae
FG
377 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
378 void remove_dentry( CDentry *dn ); // delete dentry
379 void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type);
380 void link_remote_inode( CDentry *dn, CInode *in );
381 void link_primary_inode( CDentry *dn, CInode *in );
31f18b77 382 void unlink_inode(CDentry *dn, bool adjust_lru=true);
7c673cae
FG
383 void try_remove_unlinked_dn(CDentry *dn);
384
385 void add_to_bloom(CDentry *dn);
11fdf7f2 386 bool is_in_bloom(std::string_view name);
7c673cae
FG
387 bool has_bloom() { return (bloom ? true : false); }
388 void remove_bloom() {
389 bloom.reset();
390 }
9f95a23c 391
7c673cae
FG
392 void try_remove_dentries_for_stray();
393 bool try_trim_snap_dentry(CDentry *dn, const std::set<snapid_t>& snaps);
394
9f95a23c
TL
395 void split(int bits, std::vector<CDir*>* subs, MDSContext::vec& waiters, bool replay);
396 void merge(const std::vector<CDir*>& subs, MDSContext::vec& waiters, bool replay);
7c673cae
FG
397
398 bool should_split() const {
f67539c2
TL
399 return g_conf()->mds_bal_split_size > 0 &&
400 (int)get_frag_size() > g_conf()->mds_bal_split_size;
7c673cae
FG
401 }
402 bool should_split_fast() const;
f67539c2 403 bool should_merge() const;
7c673cae 404
7c673cae
FG
405 mds_authority_t authority() const override;
406 mds_authority_t get_dir_auth() const { return dir_auth; }
11fdf7f2 407 void set_dir_auth(const mds_authority_t &a);
7c673cae
FG
408 void set_dir_auth(mds_rank_t a) { set_dir_auth(mds_authority_t(a, CDIR_AUTH_UNKNOWN)); }
409 bool is_ambiguous_dir_auth() const {
410 return dir_auth.second != CDIR_AUTH_UNKNOWN;
411 }
412 bool is_full_dir_auth() const {
413 return is_auth() && !is_ambiguous_dir_auth();
414 }
415 bool is_full_dir_nonauth() const {
416 return !is_auth() && !is_ambiguous_dir_auth();
417 }
418
419 bool is_subtree_root() const {
420 return dir_auth != CDIR_AUTH_DEFAULT;
421 }
422
423 bool contains(CDir *x); // true if we are x or an ancestor of x
424
7c673cae
FG
425 // for giving to clients
426 void get_dist_spec(std::set<mds_rank_t>& ls, mds_rank_t auth) {
f91f0fd5 427 if (is_auth()) {
7c673cae
FG
428 list_replicas(ls);
429 if (!ls.empty())
430 ls.insert(auth);
431 }
432 }
7c673cae 433
f67539c2 434 static void encode_dirstat(ceph::buffer::list& bl, const session_info_t& info, const DirStat& ds);
7c673cae 435
f67539c2 436 void _encode_base(ceph::buffer::list& bl) {
9f95a23c 437 ENCODE_START(1, 1, bl);
11fdf7f2 438 encode(first, bl);
f67539c2 439 encode(*fnode, bl);
11fdf7f2
TL
440 encode(dir_rep, bl);
441 encode(dir_rep_by, bl);
9f95a23c 442 ENCODE_FINISH(bl);
7c673cae 443 }
f67539c2 444 void _decode_base(ceph::buffer::list::const_iterator& p) {
9f95a23c 445 DECODE_START(1, p);
11fdf7f2 446 decode(first, p);
f67539c2
TL
447 {
448 auto _fnode = allocate_fnode();
449 decode(*_fnode, p);
450 reset_fnode(std::move(_fnode));
451 }
11fdf7f2
TL
452 decode(dir_rep, p);
453 decode(dir_rep_by, p);
9f95a23c 454 DECODE_FINISH(p);
7c673cae 455 }
7c673cae
FG
456
457 // -- state --
458 bool is_complete() { return state & STATE_COMPLETE; }
459 bool is_exporting() { return state & STATE_EXPORTING; }
460 bool is_importing() { return state & STATE_IMPORTING; }
461 bool is_dirty_dft() { return state & STATE_DIRTYDFT; }
462
463 int get_dir_rep() const { return dir_rep; }
464 bool is_rep() const {
465 if (dir_rep == REP_NONE) return false;
466 return true;
467 }
f67539c2 468 bool can_rep() const;
7c673cae
FG
469
470 // -- fetch --
471 object_t get_ondisk_object() {
472 return file_object_t(ino(), frag);
473 }
11fdf7f2
TL
474 void fetch(MDSContext *c, bool ignore_authpinnability=false);
475 void fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability=false);
476 void fetch(MDSContext *c, const std::set<dentry_key_t>& keys);
7c673cae 477
7c673cae
FG
478#if 0 // unused?
479 void wait_for_commit(Context *c, version_t v=0);
480#endif
481 void commit_to(version_t want);
11fdf7f2 482 void commit(version_t want, MDSContext *c,
7c673cae
FG
483 bool ignore_authpinnability=false, int op_prio=-1);
484
485 // -- dirtyness --
486 version_t get_committing_version() const { return committing_version; }
487 version_t get_committed_version() const { return committed_version; }
488 void set_committed_version(version_t v) { committed_version = v; }
489
490 void mark_complete();
491
7c673cae
FG
492 // -- reference counting --
493 void first_get() override;
494 void last_put() override;
495
11fdf7f2 496 bool is_waiting_for_dentry(std::string_view dname, snapid_t snap) {
7c673cae
FG
497 return waiting_on_dentry.count(string_snap_t(dname, snap));
498 }
11fdf7f2
TL
499 void add_dentry_waiter(std::string_view dentry, snapid_t snap, MDSContext *c);
500 void take_dentry_waiting(std::string_view dentry, snapid_t first, snapid_t last, MDSContext::vec& ls);
501 void take_sub_waiting(MDSContext::vec& ls); // dentry or ino
7c673cae 502
11fdf7f2
TL
503 void add_waiter(uint64_t mask, MDSContext *c) override;
504 void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters
7c673cae 505 void finish_waiting(uint64_t mask, int result = 0); // ditto
7c673cae
FG
506
507 // -- import/export --
f67539c2
TL
508 mds_rank_t get_export_pin(bool inherit=true) const;
509 bool is_exportable(mds_rank_t dest) const;
510
511 void encode_export(ceph::buffer::list& bl);
11fdf7f2 512 void finish_export();
7c673cae
FG
513 void abort_export() {
514 put(PIN_TEMPEXPORTING);
515 }
f67539c2 516 void decode_import(ceph::buffer::list::const_iterator& blp, LogSegment *ls);
11fdf7f2 517 void abort_import();
7c673cae
FG
518
519 // -- auth pins --
91327a77 520 bool can_auth_pin(int *err_ret=nullptr) const override;
7c673cae 521 int get_auth_pins() const { return auth_pins; }
7c673cae
FG
522 int get_dir_auth_pins() const { return dir_auth_pins; }
523 void auth_pin(void *who) override;
524 void auth_unpin(void *who) override;
525
11fdf7f2 526 void adjust_nested_auth_pins(int dirinc, void *by);
7c673cae
FG
527 void verify_fragstat();
528
11fdf7f2
TL
529 void _walk_tree(std::function<bool(CDir*)> cb);
530
7c673cae
FG
531 bool freeze_tree();
532 void _freeze_tree();
533 void unfreeze_tree();
11fdf7f2 534 void adjust_freeze_after_rename(CDir *dir);
7c673cae
FG
535
536 bool freeze_dir();
537 void _freeze_dir();
538 void unfreeze_dir();
539
540 void maybe_finish_freeze();
541
f67539c2 542 std::pair<bool,bool> is_freezing_or_frozen_tree() const {
11fdf7f2
TL
543 if (freeze_tree_state) {
544 if (freeze_tree_state->frozen)
f67539c2
TL
545 return std::make_pair(false, true);
546 return std::make_pair(true, false);
11fdf7f2 547 }
f67539c2 548 return std::make_pair(false, false);
11fdf7f2 549 }
91327a77
AA
550
551 bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); }
552 bool is_freezing_tree() const {
553 if (!num_freezing_trees)
554 return false;
555 return is_freezing_or_frozen_tree().first;
556 }
7c673cae
FG
557 bool is_freezing_tree_root() const { return state & STATE_FREEZINGTREE; }
558 bool is_freezing_dir() const { return state & STATE_FREEZINGDIR; }
559
560 bool is_frozen() const override { return is_frozen_dir() || is_frozen_tree(); }
91327a77
AA
561 bool is_frozen_tree() const {
562 if (!num_frozen_trees)
563 return false;
564 return is_freezing_or_frozen_tree().second;
565 }
7c673cae
FG
566 bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; }
567 bool is_frozen_dir() const { return state & STATE_FROZENDIR; }
11fdf7f2 568
7c673cae
FG
569 bool is_freezeable(bool freezing=false) const {
570 // no nested auth pins.
11fdf7f2
TL
571 if (auth_pins - (freezing ? 1 : 0) > 0 ||
572 (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins))
7c673cae
FG
573 return false;
574
575 // inode must not be frozen.
576 if (!is_subtree_root() && inode->is_frozen())
577 return false;
578
579 return true;
580 }
11fdf7f2 581
7c673cae 582 bool is_freezeable_dir(bool freezing=false) const {
11fdf7f2 583 if ((auth_pins - freezing) > 0 || dir_auth_pins > 0)
7c673cae
FG
584 return false;
585
586 // if not subtree root, inode must not be frozen (tree--frozen_dir is okay).
587 if (!is_subtree_root() && inode->is_frozen() && !inode->is_frozen_dir())
588 return false;
589
590 return true;
591 }
592
9f95a23c
TL
593 bool is_any_freezing_or_frozen_inode() const {
594 return num_frozen_inodes || !freezing_inodes.empty();
595 }
596 bool is_auth_pinned_by_lock_cache() const {
597 return frozen_inode_suppressed;
598 }
599 void disable_frozen_inode() {
600 ceph_assert(num_frozen_inodes == 0);
601 frozen_inode_suppressed++;
602 }
603 void enable_frozen_inode();
604
f67539c2
TL
605 std::ostream& print_db_line_prefix(std::ostream& out) override;
606 void print(std::ostream& out) override;
607 void dump(ceph::Formatter *f, int flags = DUMP_DEFAULT) const;
608 void dump_load(ceph::Formatter *f);
9f95a23c
TL
609
610 // context
f67539c2 611 MDCache *mdcache;
9f95a23c
TL
612
613 CInode *inode; // my inode
614 frag_t frag; // my frag
615
9f95a23c
TL
616 snapid_t first = 2;
617 mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key]
618
619 // my inodes with dirty rstat data
620 elist<CInode*> dirty_rstat_inodes;
621
622 elist<CDentry*> dirty_dentries;
623 elist<CDir*>::item item_dirty, item_new;
624
625 // lock caches that auth-pin me
626 elist<MDLockCache::DirItem*> lock_caches_with_auth_pins;
627
628 // all dirfrags within freezing/frozen tree reference the 'state'
629 std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
630
631protected:
632 // friends
633 friend class Migrator;
634 friend class CInode;
635 friend class MDCache;
636 friend class MDiscover;
637 friend class MDBalancer;
638
639 friend class CDirDiscover;
640 friend class CDirExport;
641 friend class C_IO_Dir_TMAP_Fetched;
642 friend class C_IO_Dir_OMAP_Fetched;
643 friend class C_IO_Dir_OMAP_FetchedMore;
644 friend class C_IO_Dir_Committed;
f67539c2 645 friend class C_IO_Dir_Commit_Ops;
9f95a23c
TL
646
647 void _omap_fetch(MDSContext *fin, const std::set<dentry_key_t>& keys);
f67539c2 648 void _omap_fetch_more(version_t omap_version, bufferlist& hdrbl,
20effc67 649 std::map<std::string, bufferlist>& omap, MDSContext *fin);
9f95a23c
TL
650 CDentry *_load_dentry(
651 std::string_view key,
652 std::string_view dname,
653 snapid_t last,
f67539c2 654 ceph::buffer::list &bl,
9f95a23c
TL
655 int pos,
656 const std::set<snapid_t> *snaps,
f91f0fd5 657 double rand_threshold,
9f95a23c
TL
658 bool *force_dirty);
659
9f95a23c
TL
660 /**
661 * Go bad due to a damaged dentry (register with damagetable and go BADFRAG)
662 */
663 void go_bad_dentry(snapid_t last, std::string_view dname);
664
665 /**
666 * Go bad due to a damaged header (register with damagetable and go BADFRAG)
667 */
668 void go_bad(bool complete);
669
f67539c2 670 void _omap_fetched(ceph::buffer::list& hdrbl, std::map<std::string, ceph::buffer::list>& omap,
9f95a23c
TL
671 bool complete, int r);
672
673 // -- commit --
674 void _commit(version_t want, int op_prio);
f67539c2 675 void _omap_commit_ops(int r, int op_prio, int64_t metapool, version_t version, bool _new,
20effc67
TL
676 std::vector<dentry_commit_item> &to_set, bufferlist &dfts,
677 std::vector<std::string> &to_remove,
f67539c2
TL
678 mempool::mds_co::compact_set<mempool::mds_co::string> &_stale);
679 void _encode_primary_inode_base(dentry_commit_item &item, bufferlist &dfts,
680 bufferlist &bl);
9f95a23c 681 void _omap_commit(int op_prio);
f67539c2 682 void _parse_dentry(CDentry *dn, dentry_commit_item &item,
20effc67 683 const std::set<snapid_t> *snaps, bufferlist &bl);
9f95a23c
TL
684 void _committed(int r, version_t v);
685
f67539c2
TL
686 static fnode_const_ptr empty_fnode;
687 // fnode is a pointer to constant fnode_t, the constant fnode_t can be shared
688 // by CDir and log events. To update fnode, read-copy-update should be used.
689
690 fnode_const_ptr fnode = empty_fnode;
691
9f95a23c 692 version_t projected_version = 0;
f67539c2 693 mempool::mds_co::list<fnode_const_ptr> projected_fnode;
9f95a23c
TL
694
695 std::unique_ptr<scrub_info_t> scrub_infop;
696
697 // contents of this directory
698 dentry_key_map items; // non-null AND null
699 unsigned num_head_items = 0;
700 unsigned num_head_null = 0;
701 unsigned num_snap_items = 0;
702 unsigned num_snap_null = 0;
703
704 int num_dirty = 0;
705
706 int num_inodes_with_caps = 0;
707
708 // state
709 version_t committing_version = 0;
710 version_t committed_version = 0;
711
712 mempool::mds_co::compact_set<mempool::mds_co::string> stale_items;
713
714 // lock nesting, freeze
715 static int num_frozen_trees;
716 static int num_freezing_trees;
717
718 // freezing/frozen inodes in this dirfrag
719 int num_frozen_inodes = 0;
720 int frozen_inode_suppressed = 0;
721 elist<CInode*> freezing_inodes;
722
723 int dir_auth_pins = 0;
724
725 // cache control (defined for authority; hints for replicas)
726 __s32 dir_rep;
727 mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST
728
729 // popularity
730 dirfrag_load_vec_t pop_me;
731 dirfrag_load_vec_t pop_nested;
732 dirfrag_load_vec_t pop_auth_subtree;
733 dirfrag_load_vec_t pop_auth_subtree_nested;
734
735 ceph::coarse_mono_time last_popularity_sample = ceph::coarse_mono_clock::zero();
736
737 load_spread_t pop_spread;
738
739 elist<CInode*> pop_lru_subdirs;
740
741 std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co
742 /* If you set up the bloom filter, you must keep it accurate!
743 * It's deleted when you mark_complete() and is deliberately not serialized.*/
744
745 mempool::mds_co::compact_set<mempool::mds_co::string> wanted_items;
746 mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit;
747
748 // -- waiters --
749 mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool
750
751private:
f67539c2 752 friend std::ostream& operator<<(std::ostream& out, const class CDir& dir);
9f95a23c
TL
753
754 void log_mark_dirty();
755
756 /**
757 * Create a scrub_info_t struct for the scrub_infop pointer.
758 */
759 void scrub_info_create() const;
760 /**
761 * Delete the scrub_infop if it's not got any useful data.
762 */
763 void scrub_maybe_delete_info();
9f95a23c
TL
764
765 void link_inode_work( CDentry *dn, CInode *in );
766 void unlink_inode_work( CDentry *dn );
767 void remove_null_dentries();
768 void purge_stale_snap_data(const std::set<snapid_t>& snaps);
769
770 void prepare_new_fragment(bool replay);
f67539c2 771 void prepare_old_fragment(std::map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay);
9f95a23c
TL
772 void steal_dentry(CDentry *dn); // from another dir. used by merge/split.
773 void finish_old_fragment(MDSContext::vec& waiters, bool replay);
774 void init_fragment_pins();
775 std::string get_path() const;
776
777 // -- authority --
778 /*
779 * normal: <parent,unknown> !subtree_root
780 * delegation: <mds,unknown> subtree_root
781 * ambiguous: <mds1,mds2> subtree_root
782 * <parent,mds2> subtree_root
783 */
784 mds_authority_t dir_auth;
7c673cae
FG
785};
786
787#endif