]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | ||
17 | #ifndef CEPH_CDIR_H | |
18 | #define CEPH_CDIR_H | |
19 | ||
7c673cae | 20 | #include <iosfwd> |
7c673cae | 21 | #include <list> |
7c673cae | 22 | #include <map> |
94b18763 | 23 | #include <set> |
7c673cae | 24 | #include <string> |
11fdf7f2 | 25 | #include <string_view> |
7c673cae | 26 | |
94b18763 FG |
27 | #include "common/bloom_filter.hpp" |
28 | #include "common/config.h" | |
29 | #include "include/buffer_fwd.h" | |
30 | #include "include/counter.h" | |
31 | #include "include/types.h" | |
7c673cae FG |
32 | |
33 | #include "CInode.h" | |
94b18763 | 34 | #include "MDSCacheObject.h" |
11fdf7f2 TL |
35 | #include "MDSContext.h" |
36 | #include "cephfs_features.h" | |
37 | #include "SessionMap.h" | |
38 | #include "messages/MClientReply.h" | |
7c673cae FG |
39 | |
40 | class CDentry; | |
41 | class MDCache; | |
42 | ||
43 | struct ObjectOperation; | |
44 | ||
45 | ostream& operator<<(ostream& out, const class CDir& dir); | |
46 | class CDir : public MDSCacheObject, public Counter<CDir> { | |
11fdf7f2 TL |
47 | using time = ceph::coarse_mono_time; |
48 | using clock = ceph::coarse_mono_clock; | |
49 | ||
7c673cae FG |
50 | friend ostream& operator<<(ostream& out, const class CDir& dir); |
51 | ||
52 | public: | |
181888fb | 53 | MEMPOOL_CLASS_HELPERS(); |
7c673cae FG |
54 | // -- pins -- |
55 | static const int PIN_DNWAITER = 1; | |
56 | static const int PIN_INOWAITER = 2; | |
57 | static const int PIN_CHILD = 3; | |
58 | static const int PIN_FROZEN = 4; | |
59 | static const int PIN_SUBTREE = 5; | |
60 | static const int PIN_IMPORTING = 7; | |
61 | static const int PIN_IMPORTBOUND = 9; | |
62 | static const int PIN_EXPORTBOUND = 10; | |
63 | static const int PIN_STICKY = 11; | |
64 | static const int PIN_SUBTREETEMP = 12; // used by MDCache::trim_non_auth() | |
11fdf7f2 | 65 | std::string_view pin_name(int p) const override { |
7c673cae FG |
66 | switch (p) { |
67 | case PIN_DNWAITER: return "dnwaiter"; | |
68 | case PIN_INOWAITER: return "inowaiter"; | |
69 | case PIN_CHILD: return "child"; | |
70 | case PIN_FROZEN: return "frozen"; | |
71 | case PIN_SUBTREE: return "subtree"; | |
72 | case PIN_IMPORTING: return "importing"; | |
73 | case PIN_IMPORTBOUND: return "importbound"; | |
74 | case PIN_EXPORTBOUND: return "exportbound"; | |
75 | case PIN_STICKY: return "sticky"; | |
76 | case PIN_SUBTREETEMP: return "subtreetemp"; | |
77 | default: return generic_pin_name(p); | |
78 | } | |
79 | } | |
80 | ||
81 | // -- state -- | |
11fdf7f2 TL |
82 | static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache |
83 | static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports) | |
84 | static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing | |
85 | static const unsigned STATE_FROZENDIR = (1<< 3); | |
86 | static const unsigned STATE_FREEZINGDIR = (1<< 4); | |
87 | static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit | |
88 | static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching | |
89 | static const unsigned STATE_CREATING = (1<< 7); | |
90 | static const unsigned STATE_IMPORTBOUND = (1<< 8); | |
91 | static const unsigned STATE_EXPORTBOUND = (1<< 9); | |
92 | static const unsigned STATE_EXPORTING = (1<<10); | |
93 | static const unsigned STATE_IMPORTING = (1<<11); | |
94 | static const unsigned STATE_FRAGMENTING = (1<<12); | |
95 | static const unsigned STATE_STICKY = (1<<13); // sticky pin due to inode stickydirs | |
96 | static const unsigned STATE_DNPINNEDFRAG = (1<<14); // dir is refragmenting | |
97 | static const unsigned STATE_ASSIMRSTAT = (1<<15); // assimilating inode->frag rstats | |
98 | static const unsigned STATE_DIRTYDFT = (1<<16); // dirty dirfragtree | |
99 | static const unsigned STATE_BADFRAG = (1<<17); // bad dirfrag | |
100 | static const unsigned STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table | |
101 | static const unsigned STATE_AUXSUBTREE = (1<<19); // no subtree merge | |
7c673cae FG |
102 | |
103 | // common states | |
104 | static const unsigned STATE_CLEAN = 0; | |
7c673cae FG |
105 | |
106 | // these state bits are preserved by an import/export | |
107 | // ...except if the directory is hashed, in which case none of them are! | |
108 | static const unsigned MASK_STATE_EXPORTED = | |
109 | (STATE_COMPLETE|STATE_DIRTY|STATE_DIRTYDFT|STATE_BADFRAG); | |
110 | static const unsigned MASK_STATE_IMPORT_KEPT = | |
111 | ( | |
11fdf7f2 TL |
112 | STATE_IMPORTING | |
113 | STATE_IMPORTBOUND | | |
114 | STATE_EXPORTBOUND | | |
115 | STATE_FROZENTREE | | |
116 | STATE_STICKY | | |
117 | STATE_TRACKEDBYOFT); | |
7c673cae | 118 | static const unsigned MASK_STATE_EXPORT_KEPT = |
11fdf7f2 TL |
119 | (STATE_EXPORTING | |
120 | STATE_IMPORTBOUND | | |
121 | STATE_EXPORTBOUND | | |
122 | STATE_FROZENTREE | | |
123 | STATE_FROZENDIR | | |
124 | STATE_STICKY | | |
125 | STATE_TRACKEDBYOFT); | |
7c673cae | 126 | static const unsigned MASK_STATE_FRAGMENT_KEPT = |
11fdf7f2 | 127 | (STATE_DIRTY | |
7c673cae FG |
128 | STATE_EXPORTBOUND | |
129 | STATE_IMPORTBOUND | | |
130 | STATE_AUXSUBTREE | | |
131 | STATE_REJOINUNDEF); | |
132 | ||
133 | // -- rep spec -- | |
134 | static const int REP_NONE = 0; | |
135 | static const int REP_ALL = 1; | |
136 | static const int REP_LIST = 2; | |
137 | ||
138 | ||
139 | static const unsigned EXPORT_NONCE = 1; | |
140 | ||
141 | ||
142 | // -- wait masks -- | |
143 | static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache | |
144 | static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents | |
145 | static const uint64_t WAIT_FROZEN = (1<<2); // auth pins removed | |
146 | static const uint64_t WAIT_CREATED = (1<<3); // new dirfrag is logged | |
147 | ||
148 | static const int WAIT_DNLOCK_OFFSET = 4; | |
149 | ||
150 | static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1); | |
7c673cae FG |
151 | static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH); |
152 | ||
11fdf7f2 TL |
153 | // -- dump flags -- |
154 | static const int DUMP_PATH = (1 << 0); | |
155 | static const int DUMP_DIRFRAG = (1 << 1); | |
156 | static const int DUMP_SNAPID_FIRST = (1 << 2); | |
157 | static const int DUMP_VERSIONS = (1 << 3); | |
158 | static const int DUMP_REP = (1 << 4); | |
159 | static const int DUMP_DIR_AUTH = (1 << 5); | |
160 | static const int DUMP_STATES = (1 << 6); | |
161 | static const int DUMP_MDS_CACHE_OBJECT = (1 << 7); | |
162 | static const int DUMP_ITEMS = (1 << 8); | |
163 | static const int DUMP_ALL = (-1); | |
164 | static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS); | |
7c673cae FG |
165 | |
166 | public: | |
167 | // context | |
168 | MDCache *cache; | |
169 | ||
170 | CInode *inode; // my inode | |
171 | frag_t frag; // my frag | |
172 | ||
173 | bool is_lt(const MDSCacheObject *r) const override { | |
174 | return dirfrag() < (static_cast<const CDir*>(r))->dirfrag(); | |
175 | } | |
176 | ||
177 | fnode_t fnode; | |
178 | snapid_t first; | |
94b18763 | 179 | mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key] |
7c673cae FG |
180 | |
181 | // my inodes with dirty rstat data | |
182 | elist<CInode*> dirty_rstat_inodes; | |
183 | ||
184 | void resync_accounted_fragstat(); | |
185 | void resync_accounted_rstat(); | |
186 | void assimilate_dirty_rstat_inodes(); | |
187 | void assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blob); | |
188 | ||
1adf2230 AA |
189 | void mark_exporting() { |
190 | state_set(CDir::STATE_EXPORTING); | |
191 | inode->num_exporting_dirs++; | |
192 | } | |
193 | void clear_exporting() { | |
194 | state_clear(CDir::STATE_EXPORTING); | |
195 | inode->num_exporting_dirs--; | |
196 | } | |
197 | ||
7c673cae FG |
198 | protected: |
199 | version_t projected_version; | |
94b18763 | 200 | mempool::mds_co::list<fnode_t> projected_fnode; |
7c673cae FG |
201 | |
202 | public: | |
b32b8144 | 203 | elist<CDentry*> dirty_dentries; |
7c673cae FG |
204 | elist<CDir*>::item item_dirty, item_new; |
205 | ||
7c673cae FG |
206 | public: |
207 | version_t get_version() const { return fnode.version; } | |
208 | void set_version(version_t v) { | |
11fdf7f2 | 209 | ceph_assert(projected_fnode.empty()); |
7c673cae FG |
210 | projected_version = fnode.version = v; |
211 | } | |
212 | version_t get_projected_version() const { return projected_version; } | |
213 | ||
214 | const fnode_t *get_projected_fnode() const { | |
215 | if (projected_fnode.empty()) | |
216 | return &fnode; | |
217 | else | |
94b18763 | 218 | return &projected_fnode.back(); |
7c673cae FG |
219 | } |
220 | ||
221 | fnode_t *get_projected_fnode() { | |
222 | if (projected_fnode.empty()) | |
223 | return &fnode; | |
224 | else | |
94b18763 | 225 | return &projected_fnode.back(); |
7c673cae FG |
226 | } |
227 | fnode_t *project_fnode(); | |
228 | ||
229 | void pop_and_dirty_projected_fnode(LogSegment *ls); | |
230 | bool is_projected() const { return !projected_fnode.empty(); } | |
231 | version_t pre_dirty(version_t min=0); | |
232 | void _mark_dirty(LogSegment *ls); | |
233 | void _set_dirty_flag() { | |
234 | if (!state_test(STATE_DIRTY)) { | |
235 | state_set(STATE_DIRTY); | |
236 | get(PIN_DIRTY); | |
237 | } | |
238 | } | |
239 | void mark_dirty(version_t pv, LogSegment *ls); | |
240 | void mark_clean(); | |
241 | ||
242 | bool is_new() { return item_new.is_on_list(); } | |
243 | void mark_new(LogSegment *ls); | |
244 | ||
245 | bool is_bad() { return state_test(STATE_BADFRAG); } | |
246 | private: | |
247 | void log_mark_dirty(); | |
248 | ||
249 | public: | |
94b18763 FG |
250 | typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map; |
251 | typedef mempool::mds_co::set<dentry_key_t> dentry_key_set; | |
7c673cae FG |
252 | |
253 | class scrub_info_t { | |
254 | public: | |
255 | /// inodes we contain with dirty scrub stamps | |
94b18763 | 256 | dentry_key_map dirty_scrub_stamps; // TODO: make use of this! |
7c673cae FG |
257 | struct scrub_stamps { |
258 | version_t version; | |
259 | utime_t time; | |
260 | scrub_stamps() : version(0) {} | |
261 | void operator=(const scrub_stamps &o) { | |
262 | version = o.version; | |
263 | time = o.time; | |
264 | } | |
265 | }; | |
266 | ||
267 | scrub_stamps recursive_start; // when we last started a recursive scrub | |
268 | scrub_stamps last_recursive; // when we last finished a recursive scrub | |
269 | scrub_stamps last_local; // when we last did a local scrub | |
270 | ||
271 | bool directory_scrubbing; /// safety check | |
272 | bool need_scrub_local; | |
273 | bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode? | |
274 | bool pending_scrub_error; | |
275 | ||
276 | /// these are lists of children in each stage of scrubbing | |
94b18763 FG |
277 | dentry_key_set directories_to_scrub; |
278 | dentry_key_set directories_scrubbing; | |
279 | dentry_key_set directories_scrubbed; | |
280 | dentry_key_set others_to_scrub; | |
281 | dentry_key_set others_scrubbing; | |
282 | dentry_key_set others_scrubbed; | |
7c673cae FG |
283 | |
284 | ScrubHeaderRefConst header; | |
285 | ||
286 | scrub_info_t() : | |
287 | directory_scrubbing(false), | |
288 | need_scrub_local(false), | |
289 | last_scrub_dirty(false), | |
290 | pending_scrub_error(false) {} | |
291 | }; | |
292 | /** | |
293 | * Call to start this CDir on a new scrub. | |
294 | * @pre It is not currently scrubbing | |
295 | * @pre The CDir is marked complete. | |
296 | * @post It has set up its internal scrubbing state. | |
297 | */ | |
298 | void scrub_initialize(const ScrubHeaderRefConst& header); | |
299 | /** | |
300 | * Get the next dentry to scrub. Gives you a CDentry* and its meaning. This | |
301 | * function will give you all directory-representing dentries before any | |
302 | * others. | |
303 | * 0: success, you should scrub this CDentry right now | |
304 | * EAGAIN: is currently fetching the next CDentry into memory for you. | |
305 | * It will activate your callback when done; try again when it does! | |
306 | * ENOENT: there are no remaining dentries to scrub | |
307 | * <0: There was an unexpected error | |
308 | * | |
11fdf7f2 | 309 | * @param cb An MDSContext which will be activated only if |
7c673cae FG |
310 | * we return EAGAIN via rcode, or else ignored |
311 | * @param dnout CDentry * which you should next scrub, or NULL | |
312 | * @returns a value as described above | |
313 | */ | |
11fdf7f2 | 314 | int scrub_dentry_next(MDSContext *cb, CDentry **dnout); |
7c673cae FG |
315 | /** |
316 | * Get the currently scrubbing dentries. When returned, the passed-in | |
317 | * list will be filled with all CDentry * which have been returned | |
318 | * from scrub_dentry_next() but not sent back via scrub_dentry_finished(). | |
319 | */ | |
94b18763 | 320 | void scrub_dentries_scrubbing(std::list<CDentry*> *out_dentries); |
7c673cae FG |
321 | /** |
322 | * Report to the CDir that a CDentry has been scrubbed. Call this | |
323 | * for every CDentry returned from scrub_dentry_next(). | |
324 | * @param dn The CDentry which has been scrubbed. | |
325 | */ | |
326 | void scrub_dentry_finished(CDentry *dn); | |
327 | /** | |
328 | * Call this once all CDentries have been scrubbed, according to | |
329 | * scrub_dentry_next's listing. It finalizes the scrub statistics. | |
330 | */ | |
331 | void scrub_finished(); | |
332 | /** | |
333 | * Tell the CDir to do a local scrub of itself. | |
334 | * @pre The CDir is_complete(). | |
335 | * @returns true if the rstats and directory contents match, false otherwise. | |
336 | */ | |
337 | bool scrub_local(); | |
338 | private: | |
339 | /** | |
340 | * Create a scrub_info_t struct for the scrub_infop pointer. | |
341 | */ | |
342 | void scrub_info_create() const; | |
343 | /** | |
344 | * Delete the scrub_infop if it's not got any useful data. | |
345 | */ | |
346 | void scrub_maybe_delete_info(); | |
347 | /** | |
348 | * Check the given set (presumably one of those in scrub_info_t) for the | |
349 | * next key to scrub and look it up (or fail!). | |
350 | */ | |
94b18763 | 351 | int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay, |
11fdf7f2 | 352 | MDSContext *cb, CDentry **dnout); |
7c673cae FG |
353 | |
354 | ||
355 | protected: | |
94b18763 | 356 | std::unique_ptr<scrub_info_t> scrub_infop; // FIXME not in mempool |
7c673cae FG |
357 | |
358 | // contents of this directory | |
94b18763 | 359 | dentry_key_map items; // non-null AND null |
7c673cae FG |
360 | unsigned num_head_items; |
361 | unsigned num_head_null; | |
362 | unsigned num_snap_items; | |
363 | unsigned num_snap_null; | |
364 | ||
365 | int num_dirty; | |
366 | ||
11fdf7f2 TL |
367 | int num_inodes_with_caps = 0; |
368 | ||
7c673cae FG |
369 | // state |
370 | version_t committing_version; | |
371 | version_t committed_version; | |
372 | ||
94b18763 | 373 | mempool::mds_co::compact_set<mempool::mds_co::string> stale_items; |
7c673cae FG |
374 | |
375 | // lock nesting, freeze | |
376 | static int num_frozen_trees; | |
377 | static int num_freezing_trees; | |
378 | ||
379 | int dir_auth_pins; | |
7c673cae FG |
380 | |
381 | // cache control (defined for authority; hints for replicas) | |
382 | __s32 dir_rep; | |
94b18763 | 383 | mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST |
7c673cae FG |
384 | |
385 | // popularity | |
386 | dirfrag_load_vec_t pop_me; | |
387 | dirfrag_load_vec_t pop_nested; | |
388 | dirfrag_load_vec_t pop_auth_subtree; | |
389 | dirfrag_load_vec_t pop_auth_subtree_nested; | |
390 | ||
11fdf7f2 | 391 | time last_popularity_sample = clock::zero(); |
7c673cae FG |
392 | |
393 | load_spread_t pop_spread; | |
394 | ||
28e407b8 AA |
395 | elist<CInode*> pop_lru_subdirs; |
396 | ||
7c673cae FG |
397 | // and to provide density |
398 | int num_dentries_nested; | |
399 | int num_dentries_auth_subtree; | |
400 | int num_dentries_auth_subtree_nested; | |
401 | ||
402 | ||
403 | // friends | |
404 | friend class Migrator; | |
405 | friend class CInode; | |
406 | friend class MDCache; | |
407 | friend class MDiscover; | |
408 | friend class MDBalancer; | |
409 | ||
410 | friend class CDirDiscover; | |
411 | friend class CDirExport; | |
412 | friend class C_IO_Dir_TMAP_Fetched; | |
413 | friend class C_IO_Dir_OMAP_Fetched; | |
414 | friend class C_IO_Dir_OMAP_FetchedMore; | |
415 | friend class C_IO_Dir_Committed; | |
416 | ||
94b18763 | 417 | std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co |
7c673cae FG |
418 | /* If you set up the bloom filter, you must keep it accurate! |
419 | * It's deleted when you mark_complete() and is deliberately not serialized.*/ | |
420 | ||
421 | public: | |
422 | CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth); | |
423 | ||
424 | const scrub_info_t *scrub_info() const { | |
425 | if (!scrub_infop) { | |
426 | scrub_info_create(); | |
427 | } | |
428 | return scrub_infop.get(); | |
429 | } | |
430 | ||
431 | ||
432 | // -- accessors -- | |
433 | inodeno_t ino() const { return inode->ino(); } // deprecate me? | |
434 | frag_t get_frag() const { return frag; } | |
435 | dirfrag_t dirfrag() const { return dirfrag_t(inode->ino(), frag); } | |
436 | ||
437 | CInode *get_inode() { return inode; } | |
438 | const CInode *get_inode() const { return inode; } | |
439 | CDir *get_parent_dir() { return inode->get_parent_dir(); } | |
440 | ||
94b18763 FG |
441 | dentry_key_map::iterator begin() { return items.begin(); } |
442 | dentry_key_map::iterator end() { return items.end(); } | |
443 | dentry_key_map::iterator lower_bound(dentry_key_t key) { return items.lower_bound(key); } | |
7c673cae FG |
444 | |
445 | unsigned get_num_head_items() const { return num_head_items; } | |
446 | unsigned get_num_head_null() const { return num_head_null; } | |
447 | unsigned get_num_snap_items() const { return num_snap_items; } | |
448 | unsigned get_num_snap_null() const { return num_snap_null; } | |
449 | unsigned get_num_any() const { return num_head_items + num_head_null + num_snap_items + num_snap_null; } | |
450 | ||
451 | bool check_rstats(bool scrub=false); | |
452 | ||
453 | void inc_num_dirty() { num_dirty++; } | |
454 | void dec_num_dirty() { | |
11fdf7f2 | 455 | ceph_assert(num_dirty > 0); |
7c673cae FG |
456 | num_dirty--; |
457 | } | |
458 | int get_num_dirty() const { | |
459 | return num_dirty; | |
460 | } | |
461 | ||
11fdf7f2 TL |
462 | void adjust_num_inodes_with_caps(int d); |
463 | ||
7c673cae FG |
464 | int64_t get_frag_size() const { |
465 | return get_projected_fnode()->fragstat.size(); | |
466 | } | |
467 | ||
468 | // -- dentries and inodes -- | |
469 | public: | |
11fdf7f2 TL |
470 | CDentry* lookup_exact_snap(std::string_view dname, snapid_t last); |
471 | CDentry* lookup(std::string_view n, snapid_t snap=CEPH_NOSNAP); | |
7c673cae | 472 | |
11fdf7f2 | 473 | CDentry* add_null_dentry(std::string_view dname, |
7c673cae | 474 | snapid_t first=2, snapid_t last=CEPH_NOSNAP); |
11fdf7f2 | 475 | CDentry* add_primary_dentry(std::string_view dname, CInode *in, |
7c673cae | 476 | snapid_t first=2, snapid_t last=CEPH_NOSNAP); |
11fdf7f2 | 477 | CDentry* add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type, |
7c673cae FG |
478 | snapid_t first=2, snapid_t last=CEPH_NOSNAP); |
479 | void remove_dentry( CDentry *dn ); // delete dentry | |
480 | void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type); | |
481 | void link_remote_inode( CDentry *dn, CInode *in ); | |
482 | void link_primary_inode( CDentry *dn, CInode *in ); | |
31f18b77 | 483 | void unlink_inode(CDentry *dn, bool adjust_lru=true); |
7c673cae FG |
484 | void try_remove_unlinked_dn(CDentry *dn); |
485 | ||
486 | void add_to_bloom(CDentry *dn); | |
11fdf7f2 | 487 | bool is_in_bloom(std::string_view name); |
7c673cae FG |
488 | bool has_bloom() { return (bloom ? true : false); } |
489 | void remove_bloom() { | |
490 | bloom.reset(); | |
491 | } | |
492 | private: | |
493 | void link_inode_work( CDentry *dn, CInode *in ); | |
494 | void unlink_inode_work( CDentry *dn ); | |
495 | void remove_null_dentries(); | |
496 | void purge_stale_snap_data(const std::set<snapid_t>& snaps); | |
497 | public: | |
7c673cae FG |
498 | void try_remove_dentries_for_stray(); |
499 | bool try_trim_snap_dentry(CDentry *dn, const std::set<snapid_t>& snaps); | |
500 | ||
501 | ||
502 | public: | |
11fdf7f2 TL |
503 | void split(int bits, std::list<CDir*>& subs, MDSContext::vec& waiters, bool replay); |
504 | void merge(std::list<CDir*>& subs, MDSContext::vec& waiters, bool replay); | |
7c673cae FG |
505 | |
506 | bool should_split() const { | |
11fdf7f2 | 507 | return (int)get_frag_size() > g_conf()->mds_bal_split_size; |
7c673cae FG |
508 | } |
509 | bool should_split_fast() const; | |
510 | bool should_merge() const { | |
11fdf7f2 | 511 | return (int)get_frag_size() < g_conf()->mds_bal_merge_size; |
7c673cae FG |
512 | } |
513 | ||
514 | private: | |
515 | void prepare_new_fragment(bool replay); | |
11fdf7f2 | 516 | void prepare_old_fragment(map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay); |
7c673cae | 517 | void steal_dentry(CDentry *dn); // from another dir. used by merge/split. |
11fdf7f2 | 518 | void finish_old_fragment(MDSContext::vec& waiters, bool replay); |
7c673cae FG |
519 | void init_fragment_pins(); |
520 | ||
521 | ||
522 | // -- authority -- | |
523 | /* | |
524 | * normal: <parent,unknown> !subtree_root | |
525 | * delegation: <mds,unknown> subtree_root | |
526 | * ambiguous: <mds1,mds2> subtree_root | |
527 | * <parent,mds2> subtree_root | |
528 | */ | |
529 | mds_authority_t dir_auth; | |
530 | ||
531 | std::string get_path() const; | |
532 | ||
533 | public: | |
534 | mds_authority_t authority() const override; | |
535 | mds_authority_t get_dir_auth() const { return dir_auth; } | |
11fdf7f2 | 536 | void set_dir_auth(const mds_authority_t &a); |
7c673cae FG |
537 | void set_dir_auth(mds_rank_t a) { set_dir_auth(mds_authority_t(a, CDIR_AUTH_UNKNOWN)); } |
538 | bool is_ambiguous_dir_auth() const { | |
539 | return dir_auth.second != CDIR_AUTH_UNKNOWN; | |
540 | } | |
541 | bool is_full_dir_auth() const { | |
542 | return is_auth() && !is_ambiguous_dir_auth(); | |
543 | } | |
544 | bool is_full_dir_nonauth() const { | |
545 | return !is_auth() && !is_ambiguous_dir_auth(); | |
546 | } | |
547 | ||
548 | bool is_subtree_root() const { | |
549 | return dir_auth != CDIR_AUTH_DEFAULT; | |
550 | } | |
551 | ||
552 | bool contains(CDir *x); // true if we are x or an ancestor of x | |
553 | ||
554 | ||
555 | // for giving to clients | |
556 | void get_dist_spec(std::set<mds_rank_t>& ls, mds_rank_t auth) { | |
557 | if (is_rep()) { | |
558 | list_replicas(ls); | |
559 | if (!ls.empty()) | |
560 | ls.insert(auth); | |
561 | } | |
562 | } | |
7c673cae | 563 | |
11fdf7f2 | 564 | static void encode_dirstat(bufferlist& bl, const session_info_t& info, const DirStat& ds); |
7c673cae FG |
565 | |
566 | void _encode_base(bufferlist& bl) { | |
11fdf7f2 TL |
567 | encode(first, bl); |
568 | encode(fnode, bl); | |
569 | encode(dir_rep, bl); | |
570 | encode(dir_rep_by, bl); | |
7c673cae | 571 | } |
11fdf7f2 TL |
572 | void _decode_base(bufferlist::const_iterator& p) { |
573 | decode(first, p); | |
574 | decode(fnode, p); | |
575 | decode(dir_rep, p); | |
576 | decode(dir_rep_by, p); | |
7c673cae FG |
577 | } |
578 | void encode_replica(mds_rank_t who, bufferlist& bl) { | |
579 | __u32 nonce = add_replica(who); | |
11fdf7f2 | 580 | encode(nonce, bl); |
7c673cae FG |
581 | _encode_base(bl); |
582 | } | |
11fdf7f2 | 583 | void decode_replica(bufferlist::const_iterator& p) { |
7c673cae | 584 | __u32 nonce; |
11fdf7f2 | 585 | decode(nonce, p); |
7c673cae FG |
586 | replica_nonce = nonce; |
587 | _decode_base(p); | |
588 | } | |
589 | ||
590 | ||
591 | ||
592 | // -- state -- | |
593 | bool is_complete() { return state & STATE_COMPLETE; } | |
594 | bool is_exporting() { return state & STATE_EXPORTING; } | |
595 | bool is_importing() { return state & STATE_IMPORTING; } | |
596 | bool is_dirty_dft() { return state & STATE_DIRTYDFT; } | |
597 | ||
598 | int get_dir_rep() const { return dir_rep; } | |
599 | bool is_rep() const { | |
600 | if (dir_rep == REP_NONE) return false; | |
601 | return true; | |
602 | } | |
603 | ||
604 | // -- fetch -- | |
605 | object_t get_ondisk_object() { | |
606 | return file_object_t(ino(), frag); | |
607 | } | |
11fdf7f2 TL |
608 | void fetch(MDSContext *c, bool ignore_authpinnability=false); |
609 | void fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability=false); | |
610 | void fetch(MDSContext *c, const std::set<dentry_key_t>& keys); | |
7c673cae | 611 | protected: |
94b18763 | 612 | mempool::mds_co::compact_set<mempool::mds_co::string> wanted_items; |
7c673cae | 613 | |
11fdf7f2 | 614 | void _omap_fetch(MDSContext *fin, const std::set<dentry_key_t>& keys); |
7c673cae FG |
615 | void _omap_fetch_more( |
616 | bufferlist& hdrbl, std::map<std::string, bufferlist>& omap, | |
11fdf7f2 | 617 | MDSContext *fin); |
7c673cae | 618 | CDentry *_load_dentry( |
11fdf7f2 TL |
619 | std::string_view key, |
620 | std::string_view dname, | |
7c673cae FG |
621 | snapid_t last, |
622 | bufferlist &bl, | |
623 | int pos, | |
624 | const std::set<snapid_t> *snaps, | |
28e407b8 | 625 | bool *force_dirty); |
7c673cae FG |
626 | |
627 | /** | |
628 | * Mark this fragment as BADFRAG (common part of go_bad and go_bad_dentry) | |
629 | */ | |
630 | void _go_bad(); | |
631 | ||
632 | /** | |
633 | * Go bad due to a damaged dentry (register with damagetable and go BADFRAG) | |
634 | */ | |
11fdf7f2 | 635 | void go_bad_dentry(snapid_t last, std::string_view dname); |
7c673cae FG |
636 | |
637 | /** | |
638 | * Go bad due to a damaged header (register with damagetable and go BADFRAG) | |
639 | */ | |
640 | void go_bad(bool complete); | |
641 | ||
642 | void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap, | |
643 | bool complete, int r); | |
644 | ||
645 | // -- commit -- | |
11fdf7f2 | 646 | mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit; |
7c673cae FG |
647 | void _commit(version_t want, int op_prio); |
648 | void _omap_commit(int op_prio); | |
649 | void _encode_dentry(CDentry *dn, bufferlist& bl, const std::set<snapid_t> *snaps); | |
650 | void _committed(int r, version_t v); | |
651 | public: | |
652 | #if 0 // unused? | |
653 | void wait_for_commit(Context *c, version_t v=0); | |
654 | #endif | |
655 | void commit_to(version_t want); | |
11fdf7f2 | 656 | void commit(version_t want, MDSContext *c, |
7c673cae FG |
657 | bool ignore_authpinnability=false, int op_prio=-1); |
658 | ||
659 | // -- dirtyness -- | |
660 | version_t get_committing_version() const { return committing_version; } | |
661 | version_t get_committed_version() const { return committed_version; } | |
662 | void set_committed_version(version_t v) { committed_version = v; } | |
663 | ||
664 | void mark_complete(); | |
665 | ||
666 | ||
667 | // -- reference counting -- | |
668 | void first_get() override; | |
669 | void last_put() override; | |
670 | ||
7c673cae FG |
671 | // -- waiters -- |
672 | protected: | |
11fdf7f2 | 673 | mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool |
7c673cae FG |
674 | |
675 | public: | |
11fdf7f2 | 676 | bool is_waiting_for_dentry(std::string_view dname, snapid_t snap) { |
7c673cae FG |
677 | return waiting_on_dentry.count(string_snap_t(dname, snap)); |
678 | } | |
11fdf7f2 TL |
679 | void add_dentry_waiter(std::string_view dentry, snapid_t snap, MDSContext *c); |
680 | void take_dentry_waiting(std::string_view dentry, snapid_t first, snapid_t last, MDSContext::vec& ls); | |
681 | void take_sub_waiting(MDSContext::vec& ls); // dentry or ino | |
7c673cae | 682 | |
11fdf7f2 TL |
683 | void add_waiter(uint64_t mask, MDSContext *c) override; |
684 | void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters | |
7c673cae FG |
685 | void finish_waiting(uint64_t mask, int result = 0); // ditto |
686 | ||
687 | ||
688 | // -- import/export -- | |
689 | void encode_export(bufferlist& bl); | |
11fdf7f2 | 690 | void finish_export(); |
7c673cae FG |
691 | void abort_export() { |
692 | put(PIN_TEMPEXPORTING); | |
693 | } | |
11fdf7f2 TL |
694 | void decode_import(bufferlist::const_iterator& blp, LogSegment *ls); |
695 | void abort_import(); | |
7c673cae FG |
696 | |
697 | // -- auth pins -- | |
91327a77 | 698 | bool can_auth_pin(int *err_ret=nullptr) const override; |
7c673cae | 699 | int get_auth_pins() const { return auth_pins; } |
7c673cae FG |
700 | int get_dir_auth_pins() const { return dir_auth_pins; } |
701 | void auth_pin(void *who) override; | |
702 | void auth_unpin(void *who) override; | |
703 | ||
11fdf7f2 | 704 | void adjust_nested_auth_pins(int dirinc, void *by); |
7c673cae FG |
705 | void verify_fragstat(); |
706 | ||
707 | // -- freezing -- | |
11fdf7f2 TL |
708 | struct freeze_tree_state_t { |
709 | CDir *dir; // freezing/frozen tree root | |
710 | int auth_pins = 0; | |
711 | bool frozen = false; | |
712 | freeze_tree_state_t(CDir *d) : dir(d) {} | |
713 | }; | |
714 | // all dirfrags within freezing/frozen tree reference the 'state' | |
715 | std::shared_ptr<freeze_tree_state_t> freeze_tree_state; | |
716 | ||
717 | void _walk_tree(std::function<bool(CDir*)> cb); | |
718 | ||
7c673cae FG |
719 | bool freeze_tree(); |
720 | void _freeze_tree(); | |
721 | void unfreeze_tree(); | |
11fdf7f2 | 722 | void adjust_freeze_after_rename(CDir *dir); |
7c673cae FG |
723 | |
724 | bool freeze_dir(); | |
725 | void _freeze_dir(); | |
726 | void unfreeze_dir(); | |
727 | ||
728 | void maybe_finish_freeze(); | |
729 | ||
11fdf7f2 TL |
730 | pair<bool,bool> is_freezing_or_frozen_tree() const { |
731 | if (freeze_tree_state) { | |
732 | if (freeze_tree_state->frozen) | |
733 | return make_pair(false, true); | |
734 | return make_pair(true, false); | |
735 | } | |
736 | return make_pair(false, false); | |
737 | } | |
91327a77 AA |
738 | |
739 | bool is_freezing() const override { return is_freezing_dir() || is_freezing_tree(); } | |
740 | bool is_freezing_tree() const { | |
741 | if (!num_freezing_trees) | |
742 | return false; | |
743 | return is_freezing_or_frozen_tree().first; | |
744 | } | |
7c673cae FG |
745 | bool is_freezing_tree_root() const { return state & STATE_FREEZINGTREE; } |
746 | bool is_freezing_dir() const { return state & STATE_FREEZINGDIR; } | |
747 | ||
748 | bool is_frozen() const override { return is_frozen_dir() || is_frozen_tree(); } | |
91327a77 AA |
749 | bool is_frozen_tree() const { |
750 | if (!num_frozen_trees) | |
751 | return false; | |
752 | return is_freezing_or_frozen_tree().second; | |
753 | } | |
7c673cae FG |
754 | bool is_frozen_tree_root() const { return state & STATE_FROZENTREE; } |
755 | bool is_frozen_dir() const { return state & STATE_FROZENDIR; } | |
11fdf7f2 | 756 | |
7c673cae FG |
757 | bool is_freezeable(bool freezing=false) const { |
758 | // no nested auth pins. | |
11fdf7f2 TL |
759 | if (auth_pins - (freezing ? 1 : 0) > 0 || |
760 | (freeze_tree_state && freeze_tree_state->auth_pins != auth_pins)) | |
7c673cae FG |
761 | return false; |
762 | ||
763 | // inode must not be frozen. | |
764 | if (!is_subtree_root() && inode->is_frozen()) | |
765 | return false; | |
766 | ||
767 | return true; | |
768 | } | |
11fdf7f2 | 769 | |
7c673cae | 770 | bool is_freezeable_dir(bool freezing=false) const { |
11fdf7f2 | 771 | if ((auth_pins - freezing) > 0 || dir_auth_pins > 0) |
7c673cae FG |
772 | return false; |
773 | ||
774 | // if not subtree root, inode must not be frozen (tree--frozen_dir is okay). | |
775 | if (!is_subtree_root() && inode->is_frozen() && !inode->is_frozen_dir()) | |
776 | return false; | |
777 | ||
778 | return true; | |
779 | } | |
780 | ||
7c673cae FG |
781 | ostream& print_db_line_prefix(ostream& out) override; |
782 | void print(ostream& out) override; | |
11fdf7f2 TL |
783 | void dump(Formatter *f, int flags = DUMP_DEFAULT) const; |
784 | void dump_load(Formatter *f); | |
7c673cae FG |
785 | }; |
786 | ||
787 | #endif |