]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
7c673cae FG |
16 | #ifndef CEPH_CDIR_H |
17 | #define CEPH_CDIR_H | |
18 | ||
7c673cae | 19 | #include <iosfwd> |
7c673cae | 20 | #include <list> |
7c673cae | 21 | #include <map> |
94b18763 | 22 | #include <set> |
7c673cae | 23 | #include <string> |
11fdf7f2 | 24 | #include <string_view> |
7c673cae | 25 | |
94b18763 FG |
26 | #include "common/bloom_filter.hpp" |
27 | #include "common/config.h" | |
28 | #include "include/buffer_fwd.h" | |
29 | #include "include/counter.h" | |
30 | #include "include/types.h" | |
7c673cae FG |
31 | |
32 | #include "CInode.h" | |
94b18763 | 33 | #include "MDSCacheObject.h" |
11fdf7f2 TL |
34 | #include "MDSContext.h" |
35 | #include "cephfs_features.h" | |
36 | #include "SessionMap.h" | |
37 | #include "messages/MClientReply.h" | |
7c673cae FG |
38 | |
39 | class CDentry; | |
40 | class MDCache; | |
41 | ||
f67539c2 | 42 | std::ostream& operator<<(std::ostream& out, const class CDir& dir); |
7c673cae | 43 | |
9f95a23c | 44 | class CDir : public MDSCacheObject, public Counter<CDir> { |
7c673cae | 45 | public: |
181888fb | 46 | MEMPOOL_CLASS_HELPERS(); |
9f95a23c TL |
47 | |
48 | typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map; | |
49 | typedef mempool::mds_co::set<dentry_key_t> dentry_key_set; | |
50 | ||
f67539c2 TL |
51 | using fnode_ptr = std::shared_ptr<fnode_t>; |
52 | using fnode_const_ptr = std::shared_ptr<const fnode_t>; | |
53 | ||
54 | template <typename ...Args> | |
55 | static fnode_ptr allocate_fnode(Args && ...args) { | |
56 | static mempool::mds_co::pool_allocator<fnode_t> allocator; | |
57 | return std::allocate_shared<fnode_t>(allocator, std::forward<Args>(args)...); | |
58 | } | |
59 | ||
60 | struct dentry_commit_item { | |
20effc67 | 61 | std::string key; |
f67539c2 TL |
62 | snapid_t first; |
63 | bool is_remote = false; | |
64 | ||
65 | inodeno_t ino; | |
66 | unsigned char d_type; | |
67 | mempool::mds_co::string alternate_name; | |
68 | ||
69 | bool snaprealm = false; | |
70 | sr_t srnode; | |
71 | ||
72 | mempool::mds_co::string symlink; | |
73 | uint64_t features; | |
74 | uint64_t dft_len; | |
75 | CInode::inode_const_ptr inode; | |
76 | CInode::xattr_map_const_ptr xattrs; | |
77 | CInode::old_inode_map_const_ptr old_inodes; | |
78 | snapid_t oldest_snap; | |
79 | damage_flags_t damage_flags; | |
80 | }; | |
81 | ||
9f95a23c TL |
82 | // -- freezing -- |
83 | struct freeze_tree_state_t { | |
84 | CDir *dir; // freezing/frozen tree root | |
85 | int auth_pins = 0; | |
86 | bool frozen = false; | |
87 | freeze_tree_state_t(CDir *d) : dir(d) {} | |
88 | }; | |
89 | ||
90 | class scrub_info_t { | |
91 | public: | |
92 | MEMPOOL_CLASS_HELPERS(); | |
93 | struct scrub_stamps { | |
f67539c2 | 94 | version_t version = 0; |
9f95a23c | 95 | utime_t time; |
9f95a23c TL |
96 | }; |
97 | ||
f67539c2 | 98 | scrub_info_t() {} |
9f95a23c | 99 | |
9f95a23c TL |
100 | scrub_stamps last_recursive; // when we last finished a recursive scrub |
101 | scrub_stamps last_local; // when we last did a local scrub | |
102 | ||
f67539c2 TL |
103 | bool directory_scrubbing = false; /// safety check |
104 | bool last_scrub_dirty = false; /// is scrub info dirty or is it flushed to fnode? | |
9f95a23c | 105 | |
f67539c2 | 106 | ScrubHeaderRef header; |
9f95a23c TL |
107 | }; |
108 | ||
7c673cae FG |
109 | // -- pins -- |
110 | static const int PIN_DNWAITER = 1; | |
111 | static const int PIN_INOWAITER = 2; | |
112 | static const int PIN_CHILD = 3; | |
113 | static const int PIN_FROZEN = 4; | |
114 | static const int PIN_SUBTREE = 5; | |
115 | static const int PIN_IMPORTING = 7; | |
116 | static const int PIN_IMPORTBOUND = 9; | |
117 | static const int PIN_EXPORTBOUND = 10; | |
118 | static const int PIN_STICKY = 11; | |
119 | static const int PIN_SUBTREETEMP = 12; // used by MDCache::trim_non_auth() | |
7c673cae FG |
120 | |
121 | // -- state -- | |
9f95a23c TL |
122 | static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache |
123 | static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports) | |
124 | static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing | |
11fdf7f2 TL |
125 | static const unsigned STATE_FROZENDIR = (1<< 3); |
126 | static const unsigned STATE_FREEZINGDIR = (1<< 4); | |
9f95a23c TL |
127 | static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit |
128 | static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching | |
11fdf7f2 TL |
129 | static const unsigned STATE_CREATING = (1<< 7); |
130 | static const unsigned STATE_IMPORTBOUND = (1<< 8); | |
131 | static const unsigned STATE_EXPORTBOUND = (1<< 9); | |
132 | static const unsigned STATE_EXPORTING = (1<<10); | |
133 | static const unsigned STATE_IMPORTING = (1<<11); | |
134 | static const unsigned STATE_FRAGMENTING = (1<<12); | |
135 | static const unsigned STATE_STICKY = (1<<13); // sticky pin due to inode stickydirs | |
136 | static const unsigned STATE_DNPINNEDFRAG = (1<<14); // dir is refragmenting | |
137 | static const unsigned STATE_ASSIMRSTAT = (1<<15); // assimilating inode->frag rstats | |
138 | static const unsigned STATE_DIRTYDFT = (1<<16); // dirty dirfragtree | |
139 | static const unsigned STATE_BADFRAG = (1<<17); // bad dirfrag | |
140 | static const unsigned STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table | |
141 | static const unsigned STATE_AUXSUBTREE = (1<<19); // no subtree merge | |
7c673cae FG |
142 | |
143 | // common states | |
144 | static const unsigned STATE_CLEAN = 0; | |
7c673cae FG |
145 | |
146 | // these state bits are preserved by an import/export | |
147 | // ...except if the directory is hashed, in which case none of them are! | |
148 | static const unsigned MASK_STATE_EXPORTED = | |
149 | (STATE_COMPLETE|STATE_DIRTY|STATE_DIRTYDFT|STATE_BADFRAG); | |
150 | static const unsigned MASK_STATE_IMPORT_KEPT = | |
151 | ( | |
11fdf7f2 TL |
152 | STATE_IMPORTING | |
153 | STATE_IMPORTBOUND | | |
154 | STATE_EXPORTBOUND | | |
155 | STATE_FROZENTREE | | |
156 | STATE_STICKY | | |
157 | STATE_TRACKEDBYOFT); | |
7c673cae | 158 | static const unsigned MASK_STATE_EXPORT_KEPT = |
11fdf7f2 TL |
159 | (STATE_EXPORTING | |
160 | STATE_IMPORTBOUND | | |
161 | STATE_EXPORTBOUND | | |
162 | STATE_FROZENTREE | | |
163 | STATE_FROZENDIR | | |
164 | STATE_STICKY | | |
165 | STATE_TRACKEDBYOFT); | |
7c673cae | 166 | static const unsigned MASK_STATE_FRAGMENT_KEPT = |
11fdf7f2 | 167 | (STATE_DIRTY | |
7c673cae FG |
168 | STATE_EXPORTBOUND | |
169 | STATE_IMPORTBOUND | | |
170 | STATE_AUXSUBTREE | | |
171 | STATE_REJOINUNDEF); | |
172 | ||
173 | // -- rep spec -- | |
174 | static const int REP_NONE = 0; | |
175 | static const int REP_ALL = 1; | |
176 | static const int REP_LIST = 2; | |
177 | ||
7c673cae FG |
178 | static const unsigned EXPORT_NONCE = 1; |
179 | ||
7c673cae FG |
180 | // -- wait masks -- |
181 | static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache | |
182 | static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents | |
183 | static const uint64_t WAIT_FROZEN = (1<<2); // auth pins removed | |
184 | static const uint64_t WAIT_CREATED = (1<<3); // new dirfrag is logged | |
185 | ||
186 | static const int WAIT_DNLOCK_OFFSET = 4; | |
187 | ||
188 | static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1); | |
7c673cae FG |
189 | static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH); |
190 | ||
11fdf7f2 TL |
191 | // -- dump flags -- |
192 | static const int DUMP_PATH = (1 << 0); | |
193 | static const int DUMP_DIRFRAG = (1 << 1); | |
194 | static const int DUMP_SNAPID_FIRST = (1 << 2); | |
195 | static const int DUMP_VERSIONS = (1 << 3); | |
196 | static const int DUMP_REP = (1 << 4); | |
197 | static const int DUMP_DIR_AUTH = (1 << 5); | |
198 | static const int DUMP_STATES = (1 << 6); | |
199 | static const int DUMP_MDS_CACHE_OBJECT = (1 << 7); | |
200 | static const int DUMP_ITEMS = (1 << 8); | |
201 | static const int DUMP_ALL = (-1); | |
9f95a23c | 202 | static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS); |
7c673cae | 203 | |
f67539c2 | 204 | CDir(CInode *in, frag_t fg, MDCache *mdc, bool auth); |
7c673cae | 205 | |
9f95a23c TL |
206 | std::string_view pin_name(int p) const override { |
207 | switch (p) { | |
208 | case PIN_DNWAITER: return "dnwaiter"; | |
209 | case PIN_INOWAITER: return "inowaiter"; | |
210 | case PIN_CHILD: return "child"; | |
211 | case PIN_FROZEN: return "frozen"; | |
212 | case PIN_SUBTREE: return "subtree"; | |
213 | case PIN_IMPORTING: return "importing"; | |
214 | case PIN_IMPORTBOUND: return "importbound"; | |
215 | case PIN_EXPORTBOUND: return "exportbound"; | |
216 | case PIN_STICKY: return "sticky"; | |
217 | case PIN_SUBTREETEMP: return "subtreetemp"; | |
218 | default: return generic_pin_name(p); | |
219 | } | |
220 | } | |
7c673cae FG |
221 | |
222 | bool is_lt(const MDSCacheObject *r) const override { | |
223 | return dirfrag() < (static_cast<const CDir*>(r))->dirfrag(); | |
224 | } | |
225 | ||
7c673cae FG |
226 | void resync_accounted_fragstat(); |
227 | void resync_accounted_rstat(); | |
f67539c2 TL |
228 | void assimilate_dirty_rstat_inodes(MutationRef& mut); |
229 | void assimilate_dirty_rstat_inodes_finish(EMetaBlob *blob); | |
7c673cae | 230 | |
1adf2230 AA |
231 | void mark_exporting() { |
232 | state_set(CDir::STATE_EXPORTING); | |
233 | inode->num_exporting_dirs++; | |
234 | } | |
235 | void clear_exporting() { | |
236 | state_clear(CDir::STATE_EXPORTING); | |
237 | inode->num_exporting_dirs--; | |
238 | } | |
239 | ||
f67539c2 TL |
240 | version_t get_version() const { return fnode->version; } |
241 | void update_projected_version() { | |
11fdf7f2 | 242 | ceph_assert(projected_fnode.empty()); |
f67539c2 | 243 | projected_version = fnode->version; |
7c673cae FG |
244 | } |
245 | version_t get_projected_version() const { return projected_version; } | |
246 | ||
f67539c2 TL |
247 | void reset_fnode(fnode_const_ptr&& ptr) { |
248 | fnode = std::move(ptr); | |
249 | } | |
2a845540 | 250 | void set_fresh_fnode(fnode_const_ptr&& ptr); |
f67539c2 TL |
251 | |
252 | const fnode_const_ptr& get_fnode() const { | |
253 | return fnode; | |
7c673cae FG |
254 | } |
255 | ||
f67539c2 TL |
256 | // only used for updating newly allocated CDir |
257 | fnode_t* _get_fnode() { | |
258 | if (fnode == empty_fnode) | |
259 | reset_fnode(allocate_fnode()); | |
260 | return const_cast<fnode_t*>(fnode.get()); | |
261 | } | |
262 | ||
263 | const fnode_const_ptr& get_projected_fnode() const { | |
7c673cae | 264 | if (projected_fnode.empty()) |
f67539c2 | 265 | return fnode; |
7c673cae | 266 | else |
f67539c2 | 267 | return projected_fnode.back(); |
7c673cae | 268 | } |
7c673cae | 269 | |
f67539c2 TL |
270 | // fnode should have already been projected in caller's context |
271 | fnode_t* _get_projected_fnode() { | |
272 | ceph_assert(!projected_fnode.empty()); | |
273 | return const_cast<fnode_t*>(projected_fnode.back().get()); | |
274 | } | |
275 | ||
276 | fnode_ptr project_fnode(const MutationRef& mut); | |
277 | ||
278 | void pop_and_dirty_projected_fnode(LogSegment *ls, const MutationRef& mut); | |
7c673cae FG |
279 | bool is_projected() const { return !projected_fnode.empty(); } |
280 | version_t pre_dirty(version_t min=0); | |
281 | void _mark_dirty(LogSegment *ls); | |
282 | void _set_dirty_flag() { | |
283 | if (!state_test(STATE_DIRTY)) { | |
284 | state_set(STATE_DIRTY); | |
285 | get(PIN_DIRTY); | |
286 | } | |
287 | } | |
f67539c2 | 288 | void mark_dirty(LogSegment *ls, version_t pv=0); |
7c673cae FG |
289 | void mark_clean(); |
290 | ||
291 | bool is_new() { return item_new.is_on_list(); } | |
292 | void mark_new(LogSegment *ls); | |
293 | ||
294 | bool is_bad() { return state_test(STATE_BADFRAG); } | |
7c673cae | 295 | |
7c673cae FG |
296 | /** |
297 | * Call to start this CDir on a new scrub. | |
298 | * @pre It is not currently scrubbing | |
299 | * @pre The CDir is marked complete. | |
300 | * @post It has set up its internal scrubbing state. | |
301 | */ | |
f67539c2 TL |
302 | void scrub_initialize(const ScrubHeaderRef& header); |
303 | const ScrubHeaderRef& get_scrub_header() { | |
304 | static const ScrubHeaderRef nullref; | |
305 | return scrub_infop ? scrub_infop->header : nullref; | |
306 | } | |
307 | ||
308 | bool scrub_is_in_progress() const { | |
309 | return (scrub_infop && scrub_infop->directory_scrubbing); | |
310 | } | |
311 | ||
7c673cae FG |
312 | /** |
313 | * Call this once all CDentries have been scrubbed, according to | |
314 | * scrub_dentry_next's listing. It finalizes the scrub statistics. | |
315 | */ | |
316 | void scrub_finished(); | |
f67539c2 TL |
317 | |
318 | void scrub_aborted(); | |
7c673cae FG |
319 | /** |
320 | * Tell the CDir to do a local scrub of itself. | |
321 | * @pre The CDir is_complete(). | |
322 | * @returns true if the rstats and directory contents match, false otherwise. | |
323 | */ | |
324 | bool scrub_local(); | |
7c673cae | 325 | |
1e59de90 TL |
326 | /** |
327 | * Go bad due to a damaged dentry (register with damagetable and go BADFRAG) | |
328 | */ | |
329 | void go_bad_dentry(snapid_t last, std::string_view dname); | |
330 | ||
7c673cae | 331 | const scrub_info_t *scrub_info() const { |
f67539c2 | 332 | if (!scrub_infop) |
7c673cae | 333 | scrub_info_create(); |
7c673cae FG |
334 | return scrub_infop.get(); |
335 | } | |
336 | ||
7c673cae FG |
337 | // -- accessors -- |
338 | inodeno_t ino() const { return inode->ino(); } // deprecate me? | |
339 | frag_t get_frag() const { return frag; } | |
340 | dirfrag_t dirfrag() const { return dirfrag_t(inode->ino(), frag); } | |
341 | ||
342 | CInode *get_inode() { return inode; } | |
343 | const CInode *get_inode() const { return inode; } | |
344 | CDir *get_parent_dir() { return inode->get_parent_dir(); } | |
345 | ||
94b18763 FG |
346 | dentry_key_map::iterator begin() { return items.begin(); } |
347 | dentry_key_map::iterator end() { return items.end(); } | |
348 | dentry_key_map::iterator lower_bound(dentry_key_t key) { return items.lower_bound(key); } | |
7c673cae FG |
349 | |
350 | unsigned get_num_head_items() const { return num_head_items; } | |
351 | unsigned get_num_head_null() const { return num_head_null; } | |
352 | unsigned get_num_snap_items() const { return num_snap_items; } | |
353 | unsigned get_num_snap_null() const { return num_snap_null; } | |
354 | unsigned get_num_any() const { return num_head_items + num_head_null + num_snap_items + num_snap_null; } | |
355 | ||
356 | bool check_rstats(bool scrub=false); | |
357 | ||
358 | void inc_num_dirty() { num_dirty++; } | |
359 | void dec_num_dirty() { | |
11fdf7f2 | 360 | ceph_assert(num_dirty > 0); |
7c673cae FG |
361 | num_dirty--; |
362 | } | |
363 | int get_num_dirty() const { | |
364 | return num_dirty; | |
365 | } | |
366 | ||
11fdf7f2 TL |
367 | void adjust_num_inodes_with_caps(int d); |
368 | ||
7c673cae FG |
369 | int64_t get_frag_size() const { |
370 | return get_projected_fnode()->fragstat.size(); | |
371 | } | |
372 | ||
373 | // -- dentries and inodes -- | |
11fdf7f2 TL |
374 | CDentry* lookup_exact_snap(std::string_view dname, snapid_t last); |
375 | CDentry* lookup(std::string_view n, snapid_t snap=CEPH_NOSNAP); | |
7c673cae | 376 | |
1e59de90 | 377 | void adjust_dentry_lru(CDentry *dn); |
11fdf7f2 | 378 | CDentry* add_null_dentry(std::string_view dname, |
7c673cae | 379 | snapid_t first=2, snapid_t last=CEPH_NOSNAP); |
f67539c2 | 380 | CDentry* add_primary_dentry(std::string_view dname, CInode *in, mempool::mds_co::string alternate_name, |
7c673cae | 381 | snapid_t first=2, snapid_t last=CEPH_NOSNAP); |
11fdf7f2 | 382 | CDentry* add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type, |
f67539c2 | 383 | mempool::mds_co::string alternate_name, |
7c673cae FG |
384 | snapid_t first=2, snapid_t last=CEPH_NOSNAP); |
385 | void remove_dentry( CDentry *dn ); // delete dentry | |
386 | void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type); | |
387 | void link_remote_inode( CDentry *dn, CInode *in ); | |
388 | void link_primary_inode( CDentry *dn, CInode *in ); | |
31f18b77 | 389 | void unlink_inode(CDentry *dn, bool adjust_lru=true); |
7c673cae FG |
390 | void try_remove_unlinked_dn(CDentry *dn); |
391 | ||
392 | void add_to_bloom(CDentry *dn); | |
11fdf7f2 | 393 | bool is_in_bloom(std::string_view name); |
7c673cae FG |
394 | bool has_bloom() { return (bloom ? true : false); } |
395 | void remove_bloom() { | |
396 | bloom.reset(); | |
397 | } | |
9f95a23c | 398 | |
7c673cae FG |
399 | void try_remove_dentries_for_stray(); |
400 | bool try_trim_snap_dentry(CDentry *dn, const std::set<snapid_t>& snaps); | |
401 | ||
9f95a23c TL |
402 | void split(int bits, std::vector<CDir*>* subs, MDSContext::vec& waiters, bool replay); |
403 | void merge(const std::vector<CDir*>& subs, MDSContext::vec& waiters, bool replay); | |
7c673cae FG |
404 | |
405 | bool should_split() const { | |
f67539c2 | 406 | return g_conf()->mds_bal_split_size > 0 && |
39ae355f | 407 | ((int)get_frag_size() + (int)get_num_snap_items()) > g_conf()->mds_bal_split_size; |
7c673cae FG |
408 | } |
409 | bool should_split_fast() const; | |
f67539c2 | 410 | bool should_merge() const; |
7c673cae | 411 | |
7c673cae FG |
412 | mds_authority_t authority() const override; |
413 | mds_authority_t get_dir_auth() const { return dir_auth; } | |
11fdf7f2 | 414 | void set_dir_auth(const mds_authority_t &a); |
7c673cae FG |
415 | void set_dir_auth(mds_rank_t a) { set_dir_auth(mds_authority_t(a, CDIR_AUTH_UNKNOWN)); } |
416 | bool is_ambiguous_dir_auth() const { | |
417 | return dir_auth.second != CDIR_AUTH_UNKNOWN; | |
418 | } | |
419 | bool is_full_dir_auth() const { | |
420 | return is_auth() && !is_ambiguous_dir_auth(); | |
421 | } | |
422 | bool is_full_dir_nonauth() const { | |
423 | return !is_auth() && !is_ambiguous_dir_auth(); | |
424 | } | |
425 | ||
426 | bool is_subtree_root() const { | |
427 | return dir_auth != CDIR_AUTH_DEFAULT; | |
428 | } | |
429 | ||
430 | bool contains(CDir *x); // true if we are x or an ancestor of x | |
431 | ||
7c673cae FG |
432 | // for giving to clients |
433 | void get_dist_spec(std::set<mds_rank_t>& ls, mds_rank_t auth) { | |
f91f0fd5 | 434 | if (is_auth()) { |
7c673cae FG |
435 | list_replicas(ls); |
436 | if (!ls.empty()) | |
437 | ls.insert(auth); | |
438 | } | |
439 | } | |
7c673cae | 440 | |
f67539c2 | 441 | static void encode_dirstat(ceph::buffer::list& bl, const session_info_t& info, const DirStat& ds); |
7c673cae | 442 | |
f67539c2 | 443 | void _encode_base(ceph::buffer::list& bl) { |
9f95a23c | 444 | ENCODE_START(1, 1, bl); |
11fdf7f2 | 445 | encode(first, bl); |
f67539c2 | 446 | encode(*fnode, bl); |
11fdf7f2 TL |
447 | encode(dir_rep, bl); |
448 | encode(dir_rep_by, bl); | |
9f95a23c | 449 | ENCODE_FINISH(bl); |
7c673cae | 450 | } |
f67539c2 | 451 | void _decode_base(ceph::buffer::list::const_iterator& p) { |
9f95a23c | 452 | DECODE_START(1, p); |
11fdf7f2 | 453 | decode(first, p); |
f67539c2 TL |
454 | { |
455 | auto _fnode = allocate_fnode(); | |
456 | decode(*_fnode, p); | |
457 | reset_fnode(std::move(_fnode)); | |
458 | } | |
11fdf7f2 TL |
459 | decode(dir_rep, p); |
460 | decode(dir_rep_by, p); | |
9f95a23c | 461 | DECODE_FINISH(p); |
7c673cae | 462 | } |
7c673cae FG |
463 | |
464 | // -- state -- | |
465 | bool is_complete() { return state & STATE_COMPLETE; } | |
466 | bool is_exporting() { return state & STATE_EXPORTING; } | |
467 | bool is_importing() { return state & STATE_IMPORTING; } | |
468 | bool is_dirty_dft() { return state & STATE_DIRTYDFT; } | |
469 | ||
470 | int get_dir_rep() const { return dir_rep; } | |
471 | bool is_rep() const { | |
472 | if (dir_rep == REP_NONE) return false; | |
473 | return true; | |
474 | } | |
f67539c2 | 475 | bool can_rep() const; |
7c673cae FG |
476 | |
477 | // -- fetch -- | |
478 | object_t get_ondisk_object() { | |
479 | return file_object_t(ino(), frag); | |
480 | } | |
1e59de90 TL |
481 | void fetch(std::string_view dname, snapid_t last, |
482 | MDSContext *c, bool ignore_authpinnability=false); | |
483 | void fetch(MDSContext *c, bool ignore_authpinnability=false) { | |
484 | fetch("", CEPH_NOSNAP, c, ignore_authpinnability); | |
485 | } | |
486 | void fetch_keys(const std::vector<dentry_key_t>& keys, MDSContext *c); | |
7c673cae | 487 | |
7c673cae FG |
488 | #if 0 // unused? |
489 | void wait_for_commit(Context *c, version_t v=0); | |
490 | #endif | |
491 | void commit_to(version_t want); | |
11fdf7f2 | 492 | void commit(version_t want, MDSContext *c, |
7c673cae FG |
493 | bool ignore_authpinnability=false, int op_prio=-1); |
494 | ||
495 | // -- dirtyness -- | |
496 | version_t get_committing_version() const { return committing_version; } | |
497 | version_t get_committed_version() const { return committed_version; } | |
498 | void set_committed_version(version_t v) { committed_version = v; } | |
499 | ||
500 | void mark_complete(); | |
501 | ||
7c673cae FG |
502 | // -- reference counting -- |
503 | void first_get() override; | |
504 | void last_put() override; | |
505 | ||
11fdf7f2 | 506 | bool is_waiting_for_dentry(std::string_view dname, snapid_t snap) { |
7c673cae FG |
507 | return waiting_on_dentry.count(string_snap_t(dname, snap)); |
508 | } | |
11fdf7f2 TL |
509 | void add_dentry_waiter(std::string_view dentry, snapid_t snap, MDSContext *c); |
510 | void take_dentry_waiting(std::string_view dentry, snapid_t first, snapid_t last, MDSContext::vec& ls); | |
7c673cae | 511 | |
11fdf7f2 TL |
512 | void add_waiter(uint64_t mask, MDSContext *c) override; |
513 | void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters | |
7c673cae | 514 | void finish_waiting(uint64_t mask, int result = 0); // ditto |
7c673cae FG |
515 | |
516 | // -- import/export -- | |
f67539c2 TL |
517 | mds_rank_t get_export_pin(bool inherit=true) const; |
518 | bool is_exportable(mds_rank_t dest) const; | |
519 | ||
520 | void encode_export(ceph::buffer::list& bl); | |
11fdf7f2 | 521 | void finish_export(); |
7c673cae FG |
522 | void abort_export() { |
523 | put(PIN_TEMPEXPORTING); | |
524 | } | |
f67539c2 | 525 | void decode_import(ceph::buffer::list::const_iterator& blp, LogSegment *ls); |
11fdf7f2 | 526 | void abort_import(); |
7c673cae FG |
527 | |
528 | // -- auth pins -- | |
91327a77 | 529 | bool can_auth_pin(int *err_ret=nullptr) const override; |
7c673cae | 530 | int get_auth_pins() const { return auth_pins; } |
7c673cae FG |
531 | int get_dir_auth_pins() const { return dir_auth_pins; } |
532 | void auth_pin(void *who) override; | |
533 | void auth_unpin(void *who) override; | |
534 | ||
11fdf7f2 | 535 | void adjust_nested_auth_pins(int dirinc, void *by); |
7c673cae FG |
536 | void verify_fragstat(); |
537 | ||
11fdf7f2 TL |
538 | void _walk_tree(std::function<bool(CDir*)> cb); |
539 | ||
7c673cae FG |
540 | bool freeze_tree(); |
541 | void _freeze_tree(); | |
542 | void unfreeze_tree(); | |
11fdf7f2 | 543 | void adjust_freeze_after_rename(CDir *dir); |
7c673cae FG |
544 | |
545 | bool freeze_dir(); | |
546 | void _freeze_dir(); | |
547 | void unfreeze_dir(); | |
548 | ||
549 | void maybe_finish_freeze(); | |
550 | ||
f67539c2 | 551 | std::pair<bool,bool> is_freezing_or_frozen_tree() const { |
11fdf7f2 TL |
552 | if (freeze_tree_state) { |
553 | if (freeze_tree_state->frozen) | |
f67539c2 TL |
554 | return std::make_pair(false, true); |
555 | return std::make_pair(true, false); | |
11fdf7f2 | 556 | } |
f67539c2 | 557 | return std::make_pair(false, false); |
11fdf7f2 | 558 | } |
91327a77 AA |
559 | |
560 | bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); } | |
561 | bool is_freezing_tree() const { | |
562 | if (!num_freezing_trees) | |
563 | return false; | |
564 | return is_freezing_or_frozen_tree().first; | |
565 | } | |
7c673cae FG |
566 | bool is_freezing_tree_root() const { return state & STATE_FREEZINGTREE; } |
567 | bool is_freezing_dir() const { return state & STATE_FREEZINGDIR; } | |
568 | ||
569 | bool is_frozen() const override { return is_frozen_dir() || is_frozen_tree(); } | |
91327a77 AA |
570 | bool is_frozen_tree() const { |
571 | if (!num_frozen_trees) | |
572 | return false; | |
573 | return is_freezing_or_frozen_tree().second; | |
574 | } | |
7c673cae FG |
575 | bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; } |
576 | bool is_frozen_dir() const { return state & STATE_FROZENDIR; } | |
11fdf7f2 | 577 | |
7c673cae FG |
578 | bool is_freezeable(bool freezing=false) const { |
579 | // no nested auth pins. | |
11fdf7f2 TL |
580 | if (auth_pins - (freezing ? 1 : 0) > 0 || |
581 | (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins)) | |
7c673cae FG |
582 | return false; |
583 | ||
584 | // inode must not be frozen. | |
585 | if (!is_subtree_root() && inode->is_frozen()) | |
586 | return false; | |
587 | ||
588 | return true; | |
589 | } | |
11fdf7f2 | 590 | |
7c673cae | 591 | bool is_freezeable_dir(bool freezing=false) const { |
11fdf7f2 | 592 | if ((auth_pins - freezing) > 0 || dir_auth_pins > 0) |
7c673cae FG |
593 | return false; |
594 | ||
595 | // if not subtree root, inode must not be frozen (tree--frozen_dir is okay). | |
596 | if (!is_subtree_root() && inode->is_frozen() && !inode->is_frozen_dir()) | |
597 | return false; | |
598 | ||
599 | return true; | |
600 | } | |
601 | ||
9f95a23c TL |
602 | bool is_any_freezing_or_frozen_inode() const { |
603 | return num_frozen_inodes || !freezing_inodes.empty(); | |
604 | } | |
605 | bool is_auth_pinned_by_lock_cache() const { | |
606 | return frozen_inode_suppressed; | |
607 | } | |
608 | void disable_frozen_inode() { | |
609 | ceph_assert(num_frozen_inodes == 0); | |
610 | frozen_inode_suppressed++; | |
611 | } | |
612 | void enable_frozen_inode(); | |
613 | ||
aee94f69 TL |
614 | std::ostream& print_db_line_prefix(std::ostream& out) const override; |
615 | void print(std::ostream& out) const override; | |
f67539c2 TL |
616 | void dump(ceph::Formatter *f, int flags = DUMP_DEFAULT) const; |
617 | void dump_load(ceph::Formatter *f); | |
9f95a23c TL |
618 | |
619 | // context | |
f67539c2 | 620 | MDCache *mdcache; |
9f95a23c TL |
621 | |
622 | CInode *inode; // my inode | |
623 | frag_t frag; // my frag | |
624 | ||
9f95a23c TL |
625 | snapid_t first = 2; |
626 | mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key] | |
627 | ||
628 | // my inodes with dirty rstat data | |
629 | elist<CInode*> dirty_rstat_inodes; | |
630 | ||
631 | elist<CDentry*> dirty_dentries; | |
632 | elist<CDir*>::item item_dirty, item_new; | |
633 | ||
634 | // lock caches that auth-pin me | |
635 | elist<MDLockCache::DirItem*> lock_caches_with_auth_pins; | |
636 | ||
637 | // all dirfrags within freezing/frozen tree reference the 'state' | |
638 | std::shared_ptr<freeze_tree_state_t> freeze_tree_state; | |
639 | ||
640 | protected: | |
641 | // friends | |
642 | friend class Migrator; | |
643 | friend class CInode; | |
644 | friend class MDCache; | |
645 | friend class MDiscover; | |
646 | friend class MDBalancer; | |
647 | ||
648 | friend class CDirDiscover; | |
649 | friend class CDirExport; | |
650 | friend class C_IO_Dir_TMAP_Fetched; | |
651 | friend class C_IO_Dir_OMAP_Fetched; | |
652 | friend class C_IO_Dir_OMAP_FetchedMore; | |
653 | friend class C_IO_Dir_Committed; | |
f67539c2 | 654 | friend class C_IO_Dir_Commit_Ops; |
9f95a23c | 655 | |
1e59de90 | 656 | void _omap_fetch(std::set<std::string> *keys, MDSContext *fin=nullptr); |
f67539c2 | 657 | void _omap_fetch_more(version_t omap_version, bufferlist& hdrbl, |
20effc67 | 658 | std::map<std::string, bufferlist>& omap, MDSContext *fin); |
9f95a23c TL |
659 | CDentry *_load_dentry( |
660 | std::string_view key, | |
661 | std::string_view dname, | |
662 | snapid_t last, | |
f67539c2 | 663 | ceph::buffer::list &bl, |
9f95a23c TL |
664 | int pos, |
665 | const std::set<snapid_t> *snaps, | |
f91f0fd5 | 666 | double rand_threshold, |
9f95a23c TL |
667 | bool *force_dirty); |
668 | ||
9f95a23c TL |
669 | /** |
670 | * Go bad due to a damaged header (register with damagetable and go BADFRAG) | |
671 | */ | |
672 | void go_bad(bool complete); | |
673 | ||
f67539c2 | 674 | void _omap_fetched(ceph::buffer::list& hdrbl, std::map<std::string, ceph::buffer::list>& omap, |
1e59de90 | 675 | bool complete, const std::set<std::string>& keys, int r); |
9f95a23c TL |
676 | |
677 | // -- commit -- | |
678 | void _commit(version_t want, int op_prio); | |
f67539c2 | 679 | void _omap_commit_ops(int r, int op_prio, int64_t metapool, version_t version, bool _new, |
20effc67 TL |
680 | std::vector<dentry_commit_item> &to_set, bufferlist &dfts, |
681 | std::vector<std::string> &to_remove, | |
f67539c2 TL |
682 | mempool::mds_co::compact_set<mempool::mds_co::string> &_stale); |
683 | void _encode_primary_inode_base(dentry_commit_item &item, bufferlist &dfts, | |
684 | bufferlist &bl); | |
9f95a23c | 685 | void _omap_commit(int op_prio); |
f67539c2 | 686 | void _parse_dentry(CDentry *dn, dentry_commit_item &item, |
20effc67 | 687 | const std::set<snapid_t> *snaps, bufferlist &bl); |
9f95a23c TL |
688 | void _committed(int r, version_t v); |
689 | ||
f67539c2 TL |
690 | static fnode_const_ptr empty_fnode; |
691 | // fnode is a pointer to constant fnode_t, the constant fnode_t can be shared | |
692 | // by CDir and log events. To update fnode, read-copy-update should be used. | |
693 | ||
694 | fnode_const_ptr fnode = empty_fnode; | |
695 | ||
9f95a23c | 696 | version_t projected_version = 0; |
f67539c2 | 697 | mempool::mds_co::list<fnode_const_ptr> projected_fnode; |
9f95a23c TL |
698 | |
699 | std::unique_ptr<scrub_info_t> scrub_infop; | |
700 | ||
701 | // contents of this directory | |
702 | dentry_key_map items; // non-null AND null | |
703 | unsigned num_head_items = 0; | |
704 | unsigned num_head_null = 0; | |
705 | unsigned num_snap_items = 0; | |
706 | unsigned num_snap_null = 0; | |
707 | ||
708 | int num_dirty = 0; | |
709 | ||
710 | int num_inodes_with_caps = 0; | |
711 | ||
712 | // state | |
713 | version_t committing_version = 0; | |
714 | version_t committed_version = 0; | |
715 | ||
716 | mempool::mds_co::compact_set<mempool::mds_co::string> stale_items; | |
717 | ||
718 | // lock nesting, freeze | |
719 | static int num_frozen_trees; | |
720 | static int num_freezing_trees; | |
721 | ||
722 | // freezing/frozen inodes in this dirfrag | |
723 | int num_frozen_inodes = 0; | |
724 | int frozen_inode_suppressed = 0; | |
725 | elist<CInode*> freezing_inodes; | |
726 | ||
727 | int dir_auth_pins = 0; | |
728 | ||
729 | // cache control (defined for authority; hints for replicas) | |
730 | __s32 dir_rep; | |
731 | mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST | |
732 | ||
733 | // popularity | |
734 | dirfrag_load_vec_t pop_me; | |
735 | dirfrag_load_vec_t pop_nested; | |
736 | dirfrag_load_vec_t pop_auth_subtree; | |
737 | dirfrag_load_vec_t pop_auth_subtree_nested; | |
738 | ||
739 | ceph::coarse_mono_time last_popularity_sample = ceph::coarse_mono_clock::zero(); | |
740 | ||
9f95a23c TL |
741 | elist<CInode*> pop_lru_subdirs; |
742 | ||
743 | std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co | |
744 | /* If you set up the bloom filter, you must keep it accurate! | |
745 | * It's deleted when you mark_complete() and is deliberately not serialized.*/ | |
746 | ||
9f95a23c TL |
747 | mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit; |
748 | ||
749 | // -- waiters -- | |
1e59de90 | 750 | mempool::mds_co::map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool |
9f95a23c TL |
751 | |
752 | private: | |
f67539c2 | 753 | friend std::ostream& operator<<(std::ostream& out, const class CDir& dir); |
9f95a23c TL |
754 | |
755 | void log_mark_dirty(); | |
756 | ||
757 | /** | |
758 | * Create a scrub_info_t struct for the scrub_infop pointer. | |
759 | */ | |
760 | void scrub_info_create() const; | |
761 | /** | |
762 | * Delete the scrub_infop if it's not got any useful data. | |
763 | */ | |
764 | void scrub_maybe_delete_info(); | |
9f95a23c TL |
765 | |
766 | void link_inode_work( CDentry *dn, CInode *in ); | |
767 | void unlink_inode_work( CDentry *dn ); | |
768 | void remove_null_dentries(); | |
9f95a23c TL |
769 | |
770 | void prepare_new_fragment(bool replay); | |
f67539c2 | 771 | void prepare_old_fragment(std::map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay); |
9f95a23c TL |
772 | void steal_dentry(CDentry *dn); // from another dir. used by merge/split. |
773 | void finish_old_fragment(MDSContext::vec& waiters, bool replay); | |
774 | void init_fragment_pins(); | |
775 | std::string get_path() const; | |
776 | ||
777 | // -- authority -- | |
778 | /* | |
779 | * normal: <parent,unknown> !subtree_root | |
780 | * delegation: <mds,unknown> subtree_root | |
781 | * ambiguous: <mds1,mds2> subtree_root | |
782 | * <parent,mds2> subtree_root | |
783 | */ | |
784 | mds_authority_t dir_auth; | |
7c673cae FG |
785 | }; |
786 | ||
787 | #endif |