]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/CDir.h
update download target update for octopus release
[ceph.git] / ceph / src / mds / CDir.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16
17#ifndef CEPH_CDIR_H
18#define CEPH_CDIR_H
19
7c673cae 20#include <iosfwd>
7c673cae 21#include <list>
7c673cae 22#include <map>
94b18763 23#include <set>
7c673cae 24#include <string>
11fdf7f2 25#include <string_view>
7c673cae 26
94b18763
FG
27#include "common/bloom_filter.hpp"
28#include "common/config.h"
29#include "include/buffer_fwd.h"
30#include "include/counter.h"
31#include "include/types.h"
7c673cae
FG
32
33#include "CInode.h"
94b18763 34#include "MDSCacheObject.h"
11fdf7f2
TL
35#include "MDSContext.h"
36#include "cephfs_features.h"
37#include "SessionMap.h"
38#include "messages/MClientReply.h"
7c673cae
FG
39
40class CDentry;
41class MDCache;
42
43struct ObjectOperation;
44
45ostream& operator<<(ostream& out, const class CDir& dir);
46class CDir : public MDSCacheObject, public Counter<CDir> {
11fdf7f2
TL
47 using time = ceph::coarse_mono_time;
48 using clock = ceph::coarse_mono_clock;
49
7c673cae
FG
50 friend ostream& operator<<(ostream& out, const class CDir& dir);
51
52public:
181888fb 53 MEMPOOL_CLASS_HELPERS();
7c673cae
FG
54 // -- pins --
55 static const int PIN_DNWAITER = 1;
56 static const int PIN_INOWAITER = 2;
57 static const int PIN_CHILD = 3;
58 static const int PIN_FROZEN = 4;
59 static const int PIN_SUBTREE = 5;
60 static const int PIN_IMPORTING = 7;
61 static const int PIN_IMPORTBOUND = 9;
62 static const int PIN_EXPORTBOUND = 10;
63 static const int PIN_STICKY = 11;
64 static const int PIN_SUBTREETEMP = 12; // used by MDCache::trim_non_auth()
11fdf7f2 65 std::string_view pin_name(int p) const override {
7c673cae
FG
66 switch (p) {
67 case PIN_DNWAITER: return "dnwaiter";
68 case PIN_INOWAITER: return "inowaiter";
69 case PIN_CHILD: return "child";
70 case PIN_FROZEN: return "frozen";
71 case PIN_SUBTREE: return "subtree";
72 case PIN_IMPORTING: return "importing";
73 case PIN_IMPORTBOUND: return "importbound";
74 case PIN_EXPORTBOUND: return "exportbound";
75 case PIN_STICKY: return "sticky";
76 case PIN_SUBTREETEMP: return "subtreetemp";
77 default: return generic_pin_name(p);
78 }
79 }
80
81 // -- state --
11fdf7f2
TL
82 static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache
83 static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports)
84 static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing
85 static const unsigned STATE_FROZENDIR = (1<< 3);
86 static const unsigned STATE_FREEZINGDIR = (1<< 4);
87 static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit
88 static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching
89 static const unsigned STATE_CREATING = (1<< 7);
90 static const unsigned STATE_IMPORTBOUND = (1<< 8);
91 static const unsigned STATE_EXPORTBOUND = (1<< 9);
92 static const unsigned STATE_EXPORTING = (1<<10);
93 static const unsigned STATE_IMPORTING = (1<<11);
94 static const unsigned STATE_FRAGMENTING = (1<<12);
95 static const unsigned STATE_STICKY = (1<<13); // sticky pin due to inode stickydirs
96 static const unsigned STATE_DNPINNEDFRAG = (1<<14); // dir is refragmenting
97 static const unsigned STATE_ASSIMRSTAT = (1<<15); // assimilating inode->frag rstats
98 static const unsigned STATE_DIRTYDFT = (1<<16); // dirty dirfragtree
99 static const unsigned STATE_BADFRAG = (1<<17); // bad dirfrag
100 static const unsigned STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table
101 static const unsigned STATE_AUXSUBTREE = (1<<19); // no subtree merge
7c673cae
FG
102
103 // common states
104 static const unsigned STATE_CLEAN = 0;
7c673cae
FG
105
106 // these state bits are preserved by an import/export
107 // ...except if the directory is hashed, in which case none of them are!
108 static const unsigned MASK_STATE_EXPORTED =
109 (STATE_COMPLETE|STATE_DIRTY|STATE_DIRTYDFT|STATE_BADFRAG);
110 static const unsigned MASK_STATE_IMPORT_KEPT =
111 (
11fdf7f2
TL
112 STATE_IMPORTING |
113 STATE_IMPORTBOUND |
114 STATE_EXPORTBOUND |
115 STATE_FROZENTREE |
116 STATE_STICKY |
117 STATE_TRACKEDBYOFT);
7c673cae 118 static const unsigned MASK_STATE_EXPORT_KEPT =
11fdf7f2
TL
119 (STATE_EXPORTING |
120 STATE_IMPORTBOUND |
121 STATE_EXPORTBOUND |
122 STATE_FROZENTREE |
123 STATE_FROZENDIR |
124 STATE_STICKY |
125 STATE_TRACKEDBYOFT);
7c673cae 126 static const unsigned MASK_STATE_FRAGMENT_KEPT =
11fdf7f2 127 (STATE_DIRTY |
7c673cae
FG
128 STATE_EXPORTBOUND |
129 STATE_IMPORTBOUND |
130 STATE_AUXSUBTREE |
131 STATE_REJOINUNDEF);
132
133 // -- rep spec --
134 static const int REP_NONE = 0;
135 static const int REP_ALL = 1;
136 static const int REP_LIST = 2;
137
138
139 static const unsigned EXPORT_NONCE = 1;
140
141
142 // -- wait masks --
143 static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache
144 static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents
145 static const uint64_t WAIT_FROZEN = (1<<2); // auth pins removed
146 static const uint64_t WAIT_CREATED = (1<<3); // new dirfrag is logged
147
148 static const int WAIT_DNLOCK_OFFSET = 4;
149
150 static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
7c673cae
FG
151 static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH);
152
11fdf7f2
TL
153 // -- dump flags --
154 static const int DUMP_PATH = (1 << 0);
155 static const int DUMP_DIRFRAG = (1 << 1);
156 static const int DUMP_SNAPID_FIRST = (1 << 2);
157 static const int DUMP_VERSIONS = (1 << 3);
158 static const int DUMP_REP = (1 << 4);
159 static const int DUMP_DIR_AUTH = (1 << 5);
160 static const int DUMP_STATES = (1 << 6);
161 static const int DUMP_MDS_CACHE_OBJECT = (1 << 7);
162 static const int DUMP_ITEMS = (1 << 8);
163 static const int DUMP_ALL = (-1);
164 static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS);
7c673cae
FG
165
166 public:
167 // context
168 MDCache *cache;
169
170 CInode *inode; // my inode
171 frag_t frag; // my frag
172
173 bool is_lt(const MDSCacheObject *r) const override {
174 return dirfrag() < (static_cast<const CDir*>(r))->dirfrag();
175 }
176
177 fnode_t fnode;
178 snapid_t first;
94b18763 179 mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key]
7c673cae
FG
180
181 // my inodes with dirty rstat data
182 elist<CInode*> dirty_rstat_inodes;
183
184 void resync_accounted_fragstat();
185 void resync_accounted_rstat();
186 void assimilate_dirty_rstat_inodes();
187 void assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blob);
188
1adf2230
AA
189 void mark_exporting() {
190 state_set(CDir::STATE_EXPORTING);
191 inode->num_exporting_dirs++;
192 }
193 void clear_exporting() {
194 state_clear(CDir::STATE_EXPORTING);
195 inode->num_exporting_dirs--;
196 }
197
7c673cae
FG
198protected:
199 version_t projected_version;
94b18763 200 mempool::mds_co::list<fnode_t> projected_fnode;
7c673cae
FG
201
202public:
b32b8144 203 elist<CDentry*> dirty_dentries;
7c673cae
FG
204 elist<CDir*>::item item_dirty, item_new;
205
7c673cae
FG
206public:
207 version_t get_version() const { return fnode.version; }
208 void set_version(version_t v) {
11fdf7f2 209 ceph_assert(projected_fnode.empty());
7c673cae
FG
210 projected_version = fnode.version = v;
211 }
212 version_t get_projected_version() const { return projected_version; }
213
214 const fnode_t *get_projected_fnode() const {
215 if (projected_fnode.empty())
216 return &fnode;
217 else
94b18763 218 return &projected_fnode.back();
7c673cae
FG
219 }
220
221 fnode_t *get_projected_fnode() {
222 if (projected_fnode.empty())
223 return &fnode;
224 else
94b18763 225 return &projected_fnode.back();
7c673cae
FG
226 }
227 fnode_t *project_fnode();
228
229 void pop_and_dirty_projected_fnode(LogSegment *ls);
230 bool is_projected() const { return !projected_fnode.empty(); }
231 version_t pre_dirty(version_t min=0);
232 void _mark_dirty(LogSegment *ls);
233 void _set_dirty_flag() {
234 if (!state_test(STATE_DIRTY)) {
235 state_set(STATE_DIRTY);
236 get(PIN_DIRTY);
237 }
238 }
239 void mark_dirty(version_t pv, LogSegment *ls);
240 void mark_clean();
241
242 bool is_new() { return item_new.is_on_list(); }
243 void mark_new(LogSegment *ls);
244
245 bool is_bad() { return state_test(STATE_BADFRAG); }
246private:
247 void log_mark_dirty();
248
249public:
94b18763
FG
250 typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map;
251 typedef mempool::mds_co::set<dentry_key_t> dentry_key_set;
7c673cae
FG
252
253 class scrub_info_t {
254 public:
255 /// inodes we contain with dirty scrub stamps
94b18763 256 dentry_key_map dirty_scrub_stamps; // TODO: make use of this!
7c673cae
FG
257 struct scrub_stamps {
258 version_t version;
259 utime_t time;
260 scrub_stamps() : version(0) {}
261 void operator=(const scrub_stamps &o) {
262 version = o.version;
263 time = o.time;
264 }
265 };
266
267 scrub_stamps recursive_start; // when we last started a recursive scrub
268 scrub_stamps last_recursive; // when we last finished a recursive scrub
269 scrub_stamps last_local; // when we last did a local scrub
270
271 bool directory_scrubbing; /// safety check
272 bool need_scrub_local;
273 bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode?
274 bool pending_scrub_error;
275
276 /// these are lists of children in each stage of scrubbing
94b18763
FG
277 dentry_key_set directories_to_scrub;
278 dentry_key_set directories_scrubbing;
279 dentry_key_set directories_scrubbed;
280 dentry_key_set others_to_scrub;
281 dentry_key_set others_scrubbing;
282 dentry_key_set others_scrubbed;
7c673cae
FG
283
284 ScrubHeaderRefConst header;
285
286 scrub_info_t() :
287 directory_scrubbing(false),
288 need_scrub_local(false),
289 last_scrub_dirty(false),
290 pending_scrub_error(false) {}
291 };
292 /**
293 * Call to start this CDir on a new scrub.
294 * @pre It is not currently scrubbing
295 * @pre The CDir is marked complete.
296 * @post It has set up its internal scrubbing state.
297 */
298 void scrub_initialize(const ScrubHeaderRefConst& header);
299 /**
300 * Get the next dentry to scrub. Gives you a CDentry* and its meaning. This
301 * function will give you all directory-representing dentries before any
302 * others.
303 * 0: success, you should scrub this CDentry right now
304 * EAGAIN: is currently fetching the next CDentry into memory for you.
305 * It will activate your callback when done; try again when it does!
306 * ENOENT: there are no remaining dentries to scrub
307 * <0: There was an unexpected error
308 *
11fdf7f2 309 * @param cb An MDSContext which will be activated only if
7c673cae
FG
310 * we return EAGAIN via rcode, or else ignored
311 * @param dnout CDentry * which you should next scrub, or NULL
312 * @returns a value as described above
313 */
11fdf7f2 314 int scrub_dentry_next(MDSContext *cb, CDentry **dnout);
7c673cae
FG
315 /**
316 * Get the currently scrubbing dentries. When returned, the passed-in
317 * list will be filled with all CDentry * which have been returned
318 * from scrub_dentry_next() but not sent back via scrub_dentry_finished().
319 */
94b18763 320 void scrub_dentries_scrubbing(std::list<CDentry*> *out_dentries);
7c673cae
FG
321 /**
322 * Report to the CDir that a CDentry has been scrubbed. Call this
323 * for every CDentry returned from scrub_dentry_next().
324 * @param dn The CDentry which has been scrubbed.
325 */
326 void scrub_dentry_finished(CDentry *dn);
327 /**
328 * Call this once all CDentries have been scrubbed, according to
329 * scrub_dentry_next's listing. It finalizes the scrub statistics.
330 */
331 void scrub_finished();
332 /**
333 * Tell the CDir to do a local scrub of itself.
334 * @pre The CDir is_complete().
335 * @returns true if the rstats and directory contents match, false otherwise.
336 */
337 bool scrub_local();
338private:
339 /**
340 * Create a scrub_info_t struct for the scrub_infop pointer.
341 */
342 void scrub_info_create() const;
343 /**
344 * Delete the scrub_infop if it's not got any useful data.
345 */
346 void scrub_maybe_delete_info();
347 /**
348 * Check the given set (presumably one of those in scrub_info_t) for the
349 * next key to scrub and look it up (or fail!).
350 */
94b18763 351 int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay,
11fdf7f2 352 MDSContext *cb, CDentry **dnout);
7c673cae
FG
353
354
355protected:
94b18763 356 std::unique_ptr<scrub_info_t> scrub_infop; // FIXME not in mempool
7c673cae
FG
357
358 // contents of this directory
94b18763 359 dentry_key_map items; // non-null AND null
7c673cae
FG
360 unsigned num_head_items;
361 unsigned num_head_null;
362 unsigned num_snap_items;
363 unsigned num_snap_null;
364
365 int num_dirty;
366
11fdf7f2
TL
367 int num_inodes_with_caps = 0;
368
7c673cae
FG
369 // state
370 version_t committing_version;
371 version_t committed_version;
372
94b18763 373 mempool::mds_co::compact_set<mempool::mds_co::string> stale_items;
7c673cae
FG
374
375 // lock nesting, freeze
376 static int num_frozen_trees;
377 static int num_freezing_trees;
378
379 int dir_auth_pins;
7c673cae
FG
380
381 // cache control (defined for authority; hints for replicas)
382 __s32 dir_rep;
94b18763 383 mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST
7c673cae
FG
384
385 // popularity
386 dirfrag_load_vec_t pop_me;
387 dirfrag_load_vec_t pop_nested;
388 dirfrag_load_vec_t pop_auth_subtree;
389 dirfrag_load_vec_t pop_auth_subtree_nested;
390
11fdf7f2 391 time last_popularity_sample = clock::zero();
7c673cae
FG
392
393 load_spread_t pop_spread;
394
28e407b8
AA
395 elist<CInode*> pop_lru_subdirs;
396
7c673cae
FG
397 // and to provide density
398 int num_dentries_nested;
399 int num_dentries_auth_subtree;
400 int num_dentries_auth_subtree_nested;
401
402
403 // friends
404 friend class Migrator;
405 friend class CInode;
406 friend class MDCache;
407 friend class MDiscover;
408 friend class MDBalancer;
409
410 friend class CDirDiscover;
411 friend class CDirExport;
412 friend class C_IO_Dir_TMAP_Fetched;
413 friend class C_IO_Dir_OMAP_Fetched;
414 friend class C_IO_Dir_OMAP_FetchedMore;
415 friend class C_IO_Dir_Committed;
416
94b18763 417 std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co
7c673cae
FG
418 /* If you set up the bloom filter, you must keep it accurate!
419 * It's deleted when you mark_complete() and is deliberately not serialized.*/
420
421 public:
422 CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth);
423
424 const scrub_info_t *scrub_info() const {
425 if (!scrub_infop) {
426 scrub_info_create();
427 }
428 return scrub_infop.get();
429 }
430
431
432 // -- accessors --
433 inodeno_t ino() const { return inode->ino(); } // deprecate me?
434 frag_t get_frag() const { return frag; }
435 dirfrag_t dirfrag() const { return dirfrag_t(inode->ino(), frag); }
436
437 CInode *get_inode() { return inode; }
438 const CInode *get_inode() const { return inode; }
439 CDir *get_parent_dir() { return inode->get_parent_dir(); }
440
94b18763
FG
441 dentry_key_map::iterator begin() { return items.begin(); }
442 dentry_key_map::iterator end() { return items.end(); }
443 dentry_key_map::iterator lower_bound(dentry_key_t key) { return items.lower_bound(key); }
7c673cae
FG
444
445 unsigned get_num_head_items() const { return num_head_items; }
446 unsigned get_num_head_null() const { return num_head_null; }
447 unsigned get_num_snap_items() const { return num_snap_items; }
448 unsigned get_num_snap_null() const { return num_snap_null; }
449 unsigned get_num_any() const { return num_head_items + num_head_null + num_snap_items + num_snap_null; }
450
451 bool check_rstats(bool scrub=false);
452
453 void inc_num_dirty() { num_dirty++; }
454 void dec_num_dirty() {
11fdf7f2 455 ceph_assert(num_dirty > 0);
7c673cae
FG
456 num_dirty--;
457 }
458 int get_num_dirty() const {
459 return num_dirty;
460 }
461
11fdf7f2
TL
462 void adjust_num_inodes_with_caps(int d);
463
7c673cae
FG
464 int64_t get_frag_size() const {
465 return get_projected_fnode()->fragstat.size();
466 }
467
468 // -- dentries and inodes --
469 public:
11fdf7f2
TL
470 CDentry* lookup_exact_snap(std::string_view dname, snapid_t last);
471 CDentry* lookup(std::string_view n, snapid_t snap=CEPH_NOSNAP);
7c673cae 472
11fdf7f2 473 CDentry* add_null_dentry(std::string_view dname,
7c673cae 474 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
11fdf7f2 475 CDentry* add_primary_dentry(std::string_view dname, CInode *in,
7c673cae 476 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
11fdf7f2 477 CDentry* add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type,
7c673cae
FG
478 snapid_t first=2, snapid_t last=CEPH_NOSNAP);
479 void remove_dentry( CDentry *dn ); // delete dentry
480 void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type);
481 void link_remote_inode( CDentry *dn, CInode *in );
482 void link_primary_inode( CDentry *dn, CInode *in );
31f18b77 483 void unlink_inode(CDentry *dn, bool adjust_lru=true);
7c673cae
FG
484 void try_remove_unlinked_dn(CDentry *dn);
485
486 void add_to_bloom(CDentry *dn);
11fdf7f2 487 bool is_in_bloom(std::string_view name);
7c673cae
FG
488 bool has_bloom() { return (bloom ? true : false); }
489 void remove_bloom() {
490 bloom.reset();
491 }
492private:
493 void link_inode_work( CDentry *dn, CInode *in );
494 void unlink_inode_work( CDentry *dn );
495 void remove_null_dentries();
496 void purge_stale_snap_data(const std::set<snapid_t>& snaps);
497public:
7c673cae
FG
498 void try_remove_dentries_for_stray();
499 bool try_trim_snap_dentry(CDentry *dn, const std::set<snapid_t>& snaps);
500
501
502public:
11fdf7f2
TL
503 void split(int bits, std::list<CDir*>& subs, MDSContext::vec& waiters, bool replay);
504 void merge(std::list<CDir*>& subs, MDSContext::vec& waiters, bool replay);
7c673cae
FG
505
506 bool should_split() const {
11fdf7f2 507 return (int)get_frag_size() > g_conf()->mds_bal_split_size;
7c673cae
FG
508 }
509 bool should_split_fast() const;
510 bool should_merge() const {
11fdf7f2 511 return (int)get_frag_size() < g_conf()->mds_bal_merge_size;
7c673cae
FG
512 }
513
514private:
515 void prepare_new_fragment(bool replay);
11fdf7f2 516 void prepare_old_fragment(map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay);
7c673cae 517 void steal_dentry(CDentry *dn); // from another dir. used by merge/split.
11fdf7f2 518 void finish_old_fragment(MDSContext::vec& waiters, bool replay);
7c673cae
FG
519 void init_fragment_pins();
520
521
522 // -- authority --
523 /*
524 * normal: <parent,unknown> !subtree_root
525 * delegation: <mds,unknown> subtree_root
526 * ambiguous: <mds1,mds2> subtree_root
527 * <parent,mds2> subtree_root
528 */
529 mds_authority_t dir_auth;
530
531 std::string get_path() const;
532
533 public:
534 mds_authority_t authority() const override;
535 mds_authority_t get_dir_auth() const { return dir_auth; }
11fdf7f2 536 void set_dir_auth(const mds_authority_t &a);
7c673cae
FG
537 void set_dir_auth(mds_rank_t a) { set_dir_auth(mds_authority_t(a, CDIR_AUTH_UNKNOWN)); }
538 bool is_ambiguous_dir_auth() const {
539 return dir_auth.second != CDIR_AUTH_UNKNOWN;
540 }
541 bool is_full_dir_auth() const {
542 return is_auth() && !is_ambiguous_dir_auth();
543 }
544 bool is_full_dir_nonauth() const {
545 return !is_auth() && !is_ambiguous_dir_auth();
546 }
547
548 bool is_subtree_root() const {
549 return dir_auth != CDIR_AUTH_DEFAULT;
550 }
551
552 bool contains(CDir *x); // true if we are x or an ancestor of x
553
554
555 // for giving to clients
556 void get_dist_spec(std::set<mds_rank_t>& ls, mds_rank_t auth) {
557 if (is_rep()) {
558 list_replicas(ls);
559 if (!ls.empty())
560 ls.insert(auth);
561 }
562 }
7c673cae 563
11fdf7f2 564 static void encode_dirstat(bufferlist& bl, const session_info_t& info, const DirStat& ds);
7c673cae
FG
565
566 void _encode_base(bufferlist& bl) {
11fdf7f2
TL
567 encode(first, bl);
568 encode(fnode, bl);
569 encode(dir_rep, bl);
570 encode(dir_rep_by, bl);
7c673cae 571 }
11fdf7f2
TL
572 void _decode_base(bufferlist::const_iterator& p) {
573 decode(first, p);
574 decode(fnode, p);
575 decode(dir_rep, p);
576 decode(dir_rep_by, p);
7c673cae
FG
577 }
578 void encode_replica(mds_rank_t who, bufferlist& bl) {
579 __u32 nonce = add_replica(who);
11fdf7f2 580 encode(nonce, bl);
7c673cae
FG
581 _encode_base(bl);
582 }
11fdf7f2 583 void decode_replica(bufferlist::const_iterator& p) {
7c673cae 584 __u32 nonce;
11fdf7f2 585 decode(nonce, p);
7c673cae
FG
586 replica_nonce = nonce;
587 _decode_base(p);
588 }
589
590
591
592 // -- state --
593 bool is_complete() { return state & STATE_COMPLETE; }
594 bool is_exporting() { return state & STATE_EXPORTING; }
595 bool is_importing() { return state & STATE_IMPORTING; }
596 bool is_dirty_dft() { return state & STATE_DIRTYDFT; }
597
598 int get_dir_rep() const { return dir_rep; }
599 bool is_rep() const {
600 if (dir_rep == REP_NONE) return false;
601 return true;
602 }
603
604 // -- fetch --
605 object_t get_ondisk_object() {
606 return file_object_t(ino(), frag);
607 }
11fdf7f2
TL
608 void fetch(MDSContext *c, bool ignore_authpinnability=false);
609 void fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability=false);
610 void fetch(MDSContext *c, const std::set<dentry_key_t>& keys);
7c673cae 611protected:
94b18763 612 mempool::mds_co::compact_set<mempool::mds_co::string> wanted_items;
7c673cae 613
11fdf7f2 614 void _omap_fetch(MDSContext *fin, const std::set<dentry_key_t>& keys);
7c673cae
FG
615 void _omap_fetch_more(
616 bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
11fdf7f2 617 MDSContext *fin);
7c673cae 618 CDentry *_load_dentry(
11fdf7f2
TL
619 std::string_view key,
620 std::string_view dname,
7c673cae
FG
621 snapid_t last,
622 bufferlist &bl,
623 int pos,
624 const std::set<snapid_t> *snaps,
28e407b8 625 bool *force_dirty);
7c673cae
FG
626
627 /**
628 * Mark this fragment as BADFRAG (common part of go_bad and go_bad_dentry)
629 */
630 void _go_bad();
631
632 /**
633 * Go bad due to a damaged dentry (register with damagetable and go BADFRAG)
634 */
11fdf7f2 635 void go_bad_dentry(snapid_t last, std::string_view dname);
7c673cae
FG
636
637 /**
638 * Go bad due to a damaged header (register with damagetable and go BADFRAG)
639 */
640 void go_bad(bool complete);
641
642 void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
643 bool complete, int r);
644
645 // -- commit --
11fdf7f2 646 mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit;
7c673cae
FG
647 void _commit(version_t want, int op_prio);
648 void _omap_commit(int op_prio);
649 void _encode_dentry(CDentry *dn, bufferlist& bl, const std::set<snapid_t> *snaps);
650 void _committed(int r, version_t v);
651public:
652#if 0 // unused?
653 void wait_for_commit(Context *c, version_t v=0);
654#endif
655 void commit_to(version_t want);
11fdf7f2 656 void commit(version_t want, MDSContext *c,
7c673cae
FG
657 bool ignore_authpinnability=false, int op_prio=-1);
658
659 // -- dirtyness --
660 version_t get_committing_version() const { return committing_version; }
661 version_t get_committed_version() const { return committed_version; }
662 void set_committed_version(version_t v) { committed_version = v; }
663
664 void mark_complete();
665
666
667 // -- reference counting --
668 void first_get() override;
669 void last_put() override;
670
7c673cae
FG
671 // -- waiters --
672protected:
11fdf7f2 673 mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool
7c673cae
FG
674
675public:
11fdf7f2 676 bool is_waiting_for_dentry(std::string_view dname, snapid_t snap) {
7c673cae
FG
677 return waiting_on_dentry.count(string_snap_t(dname, snap));
678 }
11fdf7f2
TL
679 void add_dentry_waiter(std::string_view dentry, snapid_t snap, MDSContext *c);
680 void take_dentry_waiting(std::string_view dentry, snapid_t first, snapid_t last, MDSContext::vec& ls);
681 void take_sub_waiting(MDSContext::vec& ls); // dentry or ino
7c673cae 682
11fdf7f2
TL
683 void add_waiter(uint64_t mask, MDSContext *c) override;
684 void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters
7c673cae
FG
685 void finish_waiting(uint64_t mask, int result = 0); // ditto
686
687
688 // -- import/export --
689 void encode_export(bufferlist& bl);
11fdf7f2 690 void finish_export();
7c673cae
FG
691 void abort_export() {
692 put(PIN_TEMPEXPORTING);
693 }
11fdf7f2
TL
694 void decode_import(bufferlist::const_iterator& blp, LogSegment *ls);
695 void abort_import();
7c673cae
FG
696
697 // -- auth pins --
91327a77 698 bool can_auth_pin(int *err_ret=nullptr) const override;
7c673cae 699 int get_auth_pins() const { return auth_pins; }
7c673cae
FG
700 int get_dir_auth_pins() const { return dir_auth_pins; }
701 void auth_pin(void *who) override;
702 void auth_unpin(void *who) override;
703
11fdf7f2 704 void adjust_nested_auth_pins(int dirinc, void *by);
7c673cae
FG
705 void verify_fragstat();
706
707 // -- freezing --
11fdf7f2
TL
708 struct freeze_tree_state_t {
709 CDir *dir; // freezing/frozen tree root
710 int auth_pins = 0;
711 bool frozen = false;
712 freeze_tree_state_t(CDir *d) : dir(d) {}
713 };
714 // all dirfrags within freezing/frozen tree reference the 'state'
715 std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
716
717 void _walk_tree(std::function<bool(CDir*)> cb);
718
7c673cae
FG
719 bool freeze_tree();
720 void _freeze_tree();
721 void unfreeze_tree();
11fdf7f2 722 void adjust_freeze_after_rename(CDir *dir);
7c673cae
FG
723
724 bool freeze_dir();
725 void _freeze_dir();
726 void unfreeze_dir();
727
728 void maybe_finish_freeze();
729
11fdf7f2
TL
730 pair<bool,bool> is_freezing_or_frozen_tree() const {
731 if (freeze_tree_state) {
732 if (freeze_tree_state->frozen)
733 return make_pair(false, true);
734 return make_pair(true, false);
735 }
736 return make_pair(false, false);
737 }
91327a77
AA
738
739 bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); }
740 bool is_freezing_tree() const {
741 if (!num_freezing_trees)
742 return false;
743 return is_freezing_or_frozen_tree().first;
744 }
7c673cae
FG
745 bool is_freezing_tree_root() const { return state & STATE_FREEZINGTREE; }
746 bool is_freezing_dir() const { return state & STATE_FREEZINGDIR; }
747
748 bool is_frozen() const override { return is_frozen_dir() || is_frozen_tree(); }
91327a77
AA
749 bool is_frozen_tree() const {
750 if (!num_frozen_trees)
751 return false;
752 return is_freezing_or_frozen_tree().second;
753 }
7c673cae
FG
754 bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; }
755 bool is_frozen_dir() const { return state & STATE_FROZENDIR; }
11fdf7f2 756
7c673cae
FG
757 bool is_freezeable(bool freezing=false) const {
758 // no nested auth pins.
11fdf7f2
TL
759 if (auth_pins - (freezing ? 1 : 0) > 0 ||
760 (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins))
7c673cae
FG
761 return false;
762
763 // inode must not be frozen.
764 if (!is_subtree_root() && inode->is_frozen())
765 return false;
766
767 return true;
768 }
11fdf7f2 769
7c673cae 770 bool is_freezeable_dir(bool freezing=false) const {
11fdf7f2 771 if ((auth_pins - freezing) > 0 || dir_auth_pins > 0)
7c673cae
FG
772 return false;
773
774 // if not subtree root, inode must not be frozen (tree--frozen_dir is okay).
775 if (!is_subtree_root() && inode->is_frozen() && !inode->is_frozen_dir())
776 return false;
777
778 return true;
779 }
780
7c673cae
FG
781 ostream& print_db_line_prefix(ostream& out) override;
782 void print(ostream& out) override;
11fdf7f2
TL
783 void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
784 void dump_load(Formatter *f);
7c673cae
FG
785};
786
787#endif