]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/CInode.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / mds / CInode.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_CINODE_H
16 #define CEPH_CINODE_H
17
18 #include <list>
19 #include <map>
20 #include <set>
21 #include <string_view>
22
23 #include "common/config.h"
24 #include "common/RefCountedObj.h"
25 #include "include/compat.h"
26 #include "include/counter.h"
27 #include "include/elist.h"
28 #include "include/types.h"
29 #include "include/lru.h"
30 #include "include/compact_set.h"
31
32 #include "MDSCacheObject.h"
33 #include "MDSContext.h"
34 #include "flock.h"
35
36 #include "BatchOp.h"
37 #include "CDentry.h"
38 #include "SimpleLock.h"
39 #include "ScatterLock.h"
40 #include "LocalLockC.h"
41 #include "Capability.h"
42 #include "SnapRealm.h"
43 #include "Mutation.h"
44
45 #include "messages/MClientCaps.h"
46
47 #define dout_context g_ceph_context
48
49 class Context;
50 class CDir;
51 class CInode;
52 class MDCache;
53 class LogSegment;
54 struct SnapRealm;
55 class Session;
56 struct ObjectOperation;
57 class EMetaBlob;
58
59 struct cinode_lock_info_t {
60 int lock;
61 int wr_caps;
62 };
63
64 struct CInodeCommitOperation {
65 public:
66 CInodeCommitOperation(int prio, int64_t po)
67 : pool(po), priority(prio) {
68 }
69 CInodeCommitOperation(int prio, int64_t po, file_layout_t l, uint64_t f, std::string_view s)
70 : pool(po), priority(prio), _layout(l), _features(f), _symlink(s) {
71 update_layout_symlink = true;
72 }
73
74 void update(ObjectOperation &op, inode_backtrace_t &bt);
75 int64_t get_pool() { return pool; }
76
77 private:
78 int64_t pool; ///< pool id
79 int priority;
80 bool update_layout_symlink = false;
81 file_layout_t _layout;
82 uint64_t _features;
83 std::string_view _symlink;
84 };
85
86 struct CInodeCommitOperations {
87 std::vector<CInodeCommitOperation> ops_vec;
88 inode_backtrace_t bt;
89 version_t version;
90 CInode *in;
91 };
92
93 /**
94 * Base class for CInode, containing the backing store data and
95 * serialization methods. This exists so that we can read and
96 * handle CInodes from the backing store without hitting all
97 * the business logic in CInode proper.
98 */
99 class InodeStoreBase {
100 public:
101 using mempool_inode = inode_t<mempool::mds_co::pool_allocator>;
102 using inode_ptr = std::shared_ptr<mempool_inode>;
103 using inode_const_ptr = std::shared_ptr<const mempool_inode>;
104
105 template <typename ...Args>
106 static inode_ptr allocate_inode(Args && ...args) {
107 static mempool::mds_co::pool_allocator<mempool_inode> allocator;
108 return std::allocate_shared<mempool_inode>(allocator, std::forward<Args>(args)...);
109 }
110
111 using mempool_xattr_map = xattr_map<mempool::mds_co::pool_allocator>; // FIXME bufferptr not in mempool
112 using xattr_map_ptr = std::shared_ptr<mempool_xattr_map>;
113 using xattr_map_const_ptr = std::shared_ptr<const mempool_xattr_map>;
114
115 template <typename ...Args>
116 static xattr_map_ptr allocate_xattr_map(Args && ...args) {
117 static mempool::mds_co::pool_allocator<mempool_xattr_map> allocator;
118 return std::allocate_shared<mempool_xattr_map>(allocator, std::forward<Args>(args)...);
119 }
120
121 using mempool_old_inode = old_inode_t<mempool::mds_co::pool_allocator>;
122 using mempool_old_inode_map = mempool::mds_co::map<snapid_t, mempool_old_inode>;
123 using old_inode_map_ptr = std::shared_ptr<mempool_old_inode_map>;
124 using old_inode_map_const_ptr = std::shared_ptr<const mempool_old_inode_map>;
125
126 template <typename ...Args>
127 static old_inode_map_ptr allocate_old_inode_map(Args && ...args) {
128 static mempool::mds_co::pool_allocator<mempool_old_inode_map> allocator;
129 return std::allocate_shared<mempool_old_inode_map>(allocator, std::forward<Args>(args)...);
130 }
131
132 void reset_inode(inode_const_ptr&& ptr) {
133 inode = std::move(ptr);
134 }
135
136 void reset_xattrs(xattr_map_const_ptr&& ptr) {
137 xattrs = std::move(ptr);
138 }
139
140 void reset_old_inodes(old_inode_map_const_ptr&& ptr) {
141 old_inodes = std::move(ptr);
142 }
143
144 void encode_xattrs(bufferlist &bl) const;
145 void decode_xattrs(bufferlist::const_iterator &p);
146 void encode_old_inodes(bufferlist &bl, uint64_t features) const;
147 void decode_old_inodes(bufferlist::const_iterator &p);
148
149 /* Helpers */
150 static object_t get_object_name(inodeno_t ino, frag_t fg, std::string_view suffix);
151
152 /* Full serialization for use in ".inode" root inode objects */
153 void encode(ceph::buffer::list &bl, uint64_t features, const ceph::buffer::list *snap_blob=NULL) const;
154 void decode(ceph::buffer::list::const_iterator &bl, ceph::buffer::list& snap_blob);
155
156 /* Serialization without ENCODE_START/FINISH blocks for use embedded in dentry */
157 void encode_bare(ceph::buffer::list &bl, uint64_t features, const ceph::buffer::list *snap_blob=NULL) const;
158 void decode_bare(ceph::buffer::list::const_iterator &bl, ceph::buffer::list &snap_blob, __u8 struct_v=5);
159
160 /* For test/debug output */
161 void dump(ceph::Formatter *f) const;
162
163 void decode_json(JSONObj *obj);
164 static void xattrs_cb(InodeStoreBase::mempool_xattr_map& c, JSONObj *obj);
165 static void old_indoes_cb(InodeStoreBase::mempool_old_inode_map& c, JSONObj *obj);
166
167 /* For use by offline tools */
168 __u32 hash_dentry_name(std::string_view dn);
169 frag_t pick_dirfrag(std::string_view dn);
170
171 mempool::mds_co::string symlink; // symlink dest, if symlink
172 fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
173 snapid_t oldest_snap = CEPH_NOSNAP;
174 damage_flags_t damage_flags = 0;
175
176 protected:
177 static inode_const_ptr empty_inode;
178
179 // Following members are pointers to constant data, the constant data can
180 // be shared by CInode and log events. To update these members in CInode,
181 // read-copy-update should be used.
182 inode_const_ptr inode = empty_inode;
183 xattr_map_const_ptr xattrs;
184 old_inode_map_const_ptr old_inodes; // key = last, value.first = first
185 };
186
187 inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs,
188 ceph::buffer::list::const_iterator &p)
189 {
190 decode_noshare<mempool::mds_co::pool_allocator>(xattrs, p);
191 }
192
193 class InodeStore : public InodeStoreBase {
194 public:
195 mempool_inode* get_inode() {
196 if (inode == empty_inode)
197 reset_inode(allocate_inode());
198 return const_cast<mempool_inode*>(inode.get());
199 }
200 mempool_xattr_map* get_xattrs() { return const_cast<mempool_xattr_map*>(xattrs.get()); }
201
202 void encode(ceph::buffer::list &bl, uint64_t features) const {
203 InodeStoreBase::encode(bl, features, &snap_blob);
204 }
205 void decode(ceph::buffer::list::const_iterator &bl) {
206 InodeStoreBase::decode(bl, snap_blob);
207 }
208 void encode_bare(ceph::buffer::list &bl, uint64_t features) const {
209 InodeStoreBase::encode_bare(bl, features, &snap_blob);
210 }
211 void decode_bare(ceph::buffer::list::const_iterator &bl) {
212 InodeStoreBase::decode_bare(bl, snap_blob);
213 }
214
215 static void generate_test_instances(std::list<InodeStore*>& ls);
216
217 using InodeStoreBase::inode;
218 using InodeStoreBase::xattrs;
219 using InodeStoreBase::old_inodes;
220
221 // FIXME bufferlist not part of mempool
222 ceph::buffer::list snap_blob; // Encoded copy of SnapRealm, because we can't
223 // rehydrate it without full MDCache
224 };
225 WRITE_CLASS_ENCODER_FEATURES(InodeStore)
226
227 // just for ceph-dencoder
228 class InodeStoreBare : public InodeStore {
229 public:
230 void encode(ceph::buffer::list &bl, uint64_t features) const {
231 InodeStore::encode_bare(bl, features);
232 }
233 void decode(ceph::buffer::list::const_iterator &bl) {
234 InodeStore::decode_bare(bl);
235 }
236 static void generate_test_instances(std::list<InodeStoreBare*>& ls);
237 };
238 WRITE_CLASS_ENCODER_FEATURES(InodeStoreBare)
239
240 // cached inode wrapper
241 class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> {
242 public:
243 MEMPOOL_CLASS_HELPERS();
244
245 using mempool_cap_map = mempool::mds_co::map<client_t, Capability>;
246 /**
247 * @defgroup Scrubbing and fsck
248 */
249
250 /**
251 * Report the results of validation against a particular inode.
252 * Each member is a pair of bools.
253 * <member>.first represents if validation was performed against the member.
254 * <member.second represents if the member passed validation.
255 * performed_validation is set to true if the validation was actually
256 * run. It might not be run if, for instance, the inode is marked as dirty.
257 * passed_validation is set to true if everything that was checked
258 * passed its validation.
259 */
260 struct validated_data {
261 template<typename T>struct member_status {
262 bool checked = false;
263 bool passed = false;
264 bool repaired = false;
265 int ondisk_read_retval = 0;
266 T ondisk_value;
267 T memory_value;
268 std::stringstream error_str;
269 };
270
271 struct raw_stats_t {
272 frag_info_t dirstat;
273 nest_info_t rstat;
274 };
275
276 validated_data() {}
277
278 void dump(ceph::Formatter *f) const;
279
280 bool all_damage_repaired() const;
281
282 bool performed_validation = false;
283 bool passed_validation = false;
284
285 member_status<inode_backtrace_t> backtrace;
286 member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr
287 member_status<raw_stats_t> raw_stats;
288 };
289
290 // friends
291 friend class Server;
292 friend class Locker;
293 friend class Migrator;
294 friend class MDCache;
295 friend class StrayManager;
296 friend class CDir;
297 friend std::ostream& operator<<(std::ostream&, const CInode&);
298
299 class scrub_info_t {
300 public:
301 scrub_info_t() {}
302
303 version_t last_scrub_version = 0;
304 utime_t last_scrub_stamp;
305
306 bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
307 bool scrub_in_progress = false; /// are we currently scrubbing?
308
309 fragset_t queued_frags;
310
311 ScrubHeaderRef header;
312 };
313
314 // -- pins --
315 static const int PIN_DIRFRAG = -1;
316 static const int PIN_CAPS = 2; // client caps
317 static const int PIN_IMPORTING = -4; // importing
318 static const int PIN_OPENINGDIR = 7;
319 static const int PIN_REMOTEPARENT = 8;
320 static const int PIN_BATCHOPENJOURNAL = 9;
321 static const int PIN_SCATTERED = 10;
322 static const int PIN_STICKYDIRS = 11;
323 //static const int PIN_PURGING = -12;
324 static const int PIN_FREEZING = 13;
325 static const int PIN_FROZEN = 14;
326 static const int PIN_IMPORTINGCAPS = -15;
327 static const int PIN_PASTSNAPPARENT = -16;
328 static const int PIN_OPENINGSNAPPARENTS = 17;
329 static const int PIN_TRUNCATING = 18;
330 static const int PIN_STRAY = 19; // we pin our stray inode while active
331 static const int PIN_NEEDSNAPFLUSH = 20;
332 static const int PIN_DIRTYRSTAT = 21;
333 static const int PIN_EXPORTINGCAPS = 22;
334 static const int PIN_DIRTYPARENT = 23;
335 static const int PIN_DIRWAITER = 24;
336
337 // -- dump flags --
338 static const int DUMP_INODE_STORE_BASE = (1 << 0);
339 static const int DUMP_MDS_CACHE_OBJECT = (1 << 1);
340 static const int DUMP_LOCKS = (1 << 2);
341 static const int DUMP_STATE = (1 << 3);
342 static const int DUMP_CAPS = (1 << 4);
343 static const int DUMP_PATH = (1 << 5);
344 static const int DUMP_DIRFRAGS = (1 << 6);
345 static const int DUMP_ALL = (-1);
346 static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_PATH) & (~DUMP_DIRFRAGS);
347
348 // -- state --
349 static const int STATE_EXPORTING = (1<<0); // on nonauth bystander.
350 static const int STATE_OPENINGDIR = (1<<1);
351 static const int STATE_FREEZING = (1<<2);
352 static const int STATE_FROZEN = (1<<3);
353 static const int STATE_AMBIGUOUSAUTH = (1<<4);
354 static const int STATE_EXPORTINGCAPS = (1<<5);
355 static const int STATE_NEEDSRECOVER = (1<<6);
356 static const int STATE_RECOVERING = (1<<7);
357 static const int STATE_PURGING = (1<<8);
358 static const int STATE_DIRTYPARENT = (1<<9);
359 static const int STATE_DIRTYRSTAT = (1<<10);
360 static const int STATE_STRAYPINNED = (1<<11);
361 static const int STATE_FROZENAUTHPIN = (1<<12);
362 static const int STATE_DIRTYPOOL = (1<<13);
363 static const int STATE_REPAIRSTATS = (1<<14);
364 static const int STATE_MISSINGOBJS = (1<<15);
365 static const int STATE_EVALSTALECAPS = (1<<16);
366 static const int STATE_QUEUEDEXPORTPIN = (1<<17);
367 static const int STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table
368 static const int STATE_DELAYEDEXPORTPIN = (1<<19);
369 static const int STATE_DISTEPHEMERALPIN = (1<<20);
370 static const int STATE_RANDEPHEMERALPIN = (1<<21);
371 static const int STATE_CLIENTWRITEABLE = (1<<22);
372
373 // orphan inode needs notification of releasing reference
374 static const int STATE_ORPHAN = STATE_NOTIFYREF;
375
376 static const int MASK_STATE_EXPORTED =
377 (STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL|
378 STATE_DISTEPHEMERALPIN|STATE_RANDEPHEMERALPIN);
379 static const int MASK_STATE_EXPORT_KEPT =
380 (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS|
381 STATE_QUEUEDEXPORTPIN|STATE_TRACKEDBYOFT|STATE_DELAYEDEXPORTPIN|
382 STATE_DISTEPHEMERALPIN|STATE_RANDEPHEMERALPIN);
383
384 /* These are for "permanent" state markers that are passed around between
385 * MDS. Nothing protects/updates it like a typical MDS lock.
386 *
387 * Currently, we just use this for REPLICATED inodes. The reason we need to
388 * replicate the random epin state is because the directory inode is still
389 * under the authority of the parent subtree. So it's not exported normally
390 * and we can't pass around the state that way. The importer of the dirfrags
391 * still needs to know that the inode is random pinned though otherwise it
392 * doesn't know that the dirfrags are pinned.
393 */
394 static const int MASK_STATE_REPLICATED = STATE_RANDEPHEMERALPIN;
395
396 // -- waiters --
397 static const uint64_t WAIT_DIR = (1<<0);
398 static const uint64_t WAIT_FROZEN = (1<<1);
399 static const uint64_t WAIT_TRUNC = (1<<2);
400 static const uint64_t WAIT_FLOCK = (1<<3);
401
402 static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
403
404 // misc
405 static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export
406
407 // ---------------------------
408 CInode() = delete;
409 CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP);
410 ~CInode() override {
411 close_dirfrags();
412 close_snaprealm();
413 clear_file_locks();
414 ceph_assert(num_projected_srnodes == 0);
415 ceph_assert(num_caps_notable == 0);
416 ceph_assert(num_subtree_roots == 0);
417 ceph_assert(num_exporting_dirs == 0);
418 ceph_assert(batch_ops.empty());
419 }
420
421 std::map<int, std::unique_ptr<BatchOp>> batch_ops;
422
423 std::string_view pin_name(int p) const override;
424
425 std::ostream& print_db_line_prefix(std::ostream& out) const override;
426
427 const scrub_info_t *scrub_info() const {
428 if (!scrub_infop)
429 scrub_info_create();
430 return scrub_infop.get();
431 }
432
433 const ScrubHeaderRef& get_scrub_header() {
434 static const ScrubHeaderRef nullref;
435 return scrub_infop ? scrub_infop->header : nullref;
436 }
437
438 bool scrub_is_in_progress() const {
439 return (scrub_infop && scrub_infop->scrub_in_progress);
440 }
441 /**
442 * Start scrubbing on this inode. That could be very short if it's
443 * a file, or take a long time if we're recursively scrubbing a directory.
444 * @pre It is not currently scrubbing
445 * @post it has set up internal scrubbing state
446 * @param scrub_version What version are we scrubbing at (usually, parent
447 * directory's get_projected_version())
448 */
449 void scrub_initialize(ScrubHeaderRef& header);
450 /**
451 * Call this once the scrub has been completed, whether it's a full
452 * recursive scrub on a directory or simply the data on a file (or
453 * anything in between).
454 * @param c An out param which is filled in with a Context* that must
455 * be complete()ed.
456 */
457 void scrub_finished();
458
459 void scrub_aborted();
460
461 fragset_t& scrub_queued_frags() {
462 ceph_assert(scrub_infop);
463 return scrub_infop->queued_frags;
464 }
465
466 bool is_multiversion() const {
467 return snaprealm || // other snaprealms will link to me
468 get_inode()->is_dir() || // links to me in other snaps
469 get_inode()->nlink > 1 || // there are remote links, possibly snapped, that will need to find me
470 is_any_old_inodes(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
471 }
472 snapid_t get_oldest_snap();
473
474 bool is_dirty_rstat() {
475 return state_test(STATE_DIRTYRSTAT);
476 }
477 void mark_dirty_rstat();
478 void clear_dirty_rstat();
479
480 //bool hack_accessed = false;
481 //utime_t hack_load_stamp;
482
483 /**
484 * Projection methods, used to store inode changes until they have been journaled,
485 * at which point they are popped.
486 * Usage:
487 * project_inode as needed. If you're changing xattrs or sr_t, then pass true
488 * as needed then change the xattrs/snapnode member as needed. (Dirty
489 * exception: project_past_snaprealm_parent allows you to project the
490 * snapnode after doing project_inode (i.e. you don't need to pass
491 * snap=true).
492 *
493 * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
494 * This function will take care of the inode itself, the xattrs, and the snaprealm.
495 */
496
497 struct projected_inode {
498 static sr_t* const UNDEF_SRNODE;
499
500 inode_ptr const inode;
501 xattr_map_ptr const xattrs;
502 sr_t* const snapnode;
503
504 projected_inode() = delete;
505 explicit projected_inode(inode_ptr&& i, xattr_map_ptr&& x, sr_t *s=nullptr) :
506 inode(std::move(i)), xattrs(std::move(x)), snapnode(s) {}
507 };
508 projected_inode project_inode(const MutationRef& mut,
509 bool xattr = false, bool snap = false);
510
511 void pop_and_dirty_projected_inode(LogSegment *ls, const MutationRef& mut);
512
513 version_t get_projected_version() const {
514 if (projected_nodes.empty())
515 return get_inode()->version;
516 else
517 return projected_nodes.back().inode->version;
518 }
519 bool is_projected() const {
520 return !projected_nodes.empty();
521 }
522
523 const inode_const_ptr& get_projected_inode() const {
524 if (projected_nodes.empty())
525 return get_inode();
526 else
527 return projected_nodes.back().inode;
528 }
529 // inode should have already been projected in caller's context
530 mempool_inode* _get_projected_inode() {
531 ceph_assert(!projected_nodes.empty());
532 return const_cast<mempool_inode*>(projected_nodes.back().inode.get());
533 }
534 const inode_const_ptr& get_previous_projected_inode() const {
535 ceph_assert(!projected_nodes.empty());
536 auto it = projected_nodes.rbegin();
537 ++it;
538 if (it != projected_nodes.rend())
539 return it->inode;
540 else
541 return get_inode();
542 }
543
544 const xattr_map_const_ptr& get_projected_xattrs() {
545 if (projected_nodes.empty())
546 return xattrs;
547 else
548 return projected_nodes.back().xattrs;
549 }
550 const xattr_map_const_ptr& get_previous_projected_xattrs() {
551 ceph_assert(!projected_nodes.empty());
552 auto it = projected_nodes.rbegin();
553 ++it;
554 if (it != projected_nodes.rend())
555 return it->xattrs;
556 else
557 return xattrs;
558 }
559
560 sr_t *prepare_new_srnode(snapid_t snapid);
561 void project_snaprealm(sr_t *new_srnode);
562 sr_t *project_snaprealm(snapid_t snapid=0) {
563 sr_t* new_srnode = prepare_new_srnode(snapid);
564 project_snaprealm(new_srnode);
565 return new_srnode;
566 }
567 const sr_t *get_projected_srnode() const;
568
569 void mark_snaprealm_global(sr_t *new_srnode);
570 void clear_snaprealm_global(sr_t *new_srnode);
571 bool is_projected_snaprealm_global() const;
572
573 void record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent);
574 void record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *newparent,
575 CDentry *dn, bool primary_dn);
576 void project_snaprealm_past_parent(SnapRealm *newparent);
577 void early_pop_projected_snaprealm();
578
579 const mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
580 void split_old_inode(snapid_t snap);
581 snapid_t pick_old_inode(snapid_t last) const;
582 void pre_cow_old_inode();
583 bool has_snap_data(snapid_t s);
584 void purge_stale_snap_data(const std::set<snapid_t>& snaps);
585
586 size_t get_num_dirfrags() const { return dirfrags.size(); }
587 CDir* get_dirfrag(frag_t fg) {
588 auto pi = dirfrags.find(fg);
589 if (pi != dirfrags.end()) {
590 //assert(g_conf()->debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME
591 return pi->second;
592 }
593 return NULL;
594 }
595 std::pair<bool, std::vector<CDir*>> get_dirfrags_under(frag_t fg);
596 CDir* get_approx_dirfrag(frag_t fg);
597
598 template<typename Container>
599 void get_dirfrags(Container& ls) const {
600 // all dirfrags
601 if constexpr (std::is_same_v<Container, std::vector<CDir*>>)
602 ls.reserve(ls.size() + dirfrags.size());
603 for (const auto &p : dirfrags)
604 ls.push_back(p.second);
605 }
606
607 auto get_dirfrags() const {
608 std::vector<CDir*> result;
609 get_dirfrags(result);
610 return result;
611 }
612
613 void get_nested_dirfrags(std::vector<CDir*>&) const;
614 std::vector<CDir*> get_nested_dirfrags() const {
615 std::vector<CDir*> v;
616 get_nested_dirfrags(v);
617 return v;
618 }
619 void get_subtree_dirfrags(std::vector<CDir*>&) const;
620 std::vector<CDir*> get_subtree_dirfrags() const {
621 std::vector<CDir*> v;
622 get_subtree_dirfrags(v);
623 return v;
624 }
625 int get_num_subtree_roots() const {
626 return num_subtree_roots;
627 }
628
629 CDir *get_or_open_dirfrag(MDCache *mdcache, frag_t fg);
630 CDir *add_dirfrag(CDir *dir);
631 void close_dirfrag(frag_t fg);
632 void close_dirfrags();
633 bool has_subtree_root_dirfrag(int auth=-1);
634 bool has_subtree_or_exporting_dirfrag();
635
636 void force_dirfrags();
637 void verify_dirfrags();
638
639 void get_stickydirs();
640 void put_stickydirs();
641
642 void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
643 void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
644 std::pair<bool,bool> split_need_snapflush(CInode *cowin, CInode *in);
645
646 // -- accessors --
647
648 inodeno_t ino() const { return get_inode()->ino; }
649 vinodeno_t vino() const { return vinodeno_t(ino(), last); }
650 int d_type() const { return IFTODT(get_inode()->mode); }
651 bool is_root() const { return ino() == CEPH_INO_ROOT; }
652 bool is_stray() const { return MDS_INO_IS_STRAY(ino()); }
653 mds_rank_t get_stray_owner() const {
654 return (mds_rank_t)MDS_INO_STRAY_OWNER(ino());
655 }
656 bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(ino()); }
657 bool is_base() const { return MDS_INO_IS_BASE(ino()); }
658 bool is_system() const { return ino() < MDS_INO_SYSTEM_BASE; }
659 bool is_lost_and_found() const { return ino() == CEPH_INO_LOST_AND_FOUND; }
660 bool is_normal() const { return !(is_base() || is_system() || is_stray()); }
661 bool is_file() const { return get_inode()->is_file(); }
662 bool is_symlink() const { return get_inode()->is_symlink(); }
663 bool is_dir() const { return get_inode()->is_dir(); }
664
665 bool is_head() const { return last == CEPH_NOSNAP; }
666
667 // note: this overloads MDSCacheObject
668 bool is_ambiguous_auth() const {
669 return state_test(STATE_AMBIGUOUSAUTH) ||
670 MDSCacheObject::is_ambiguous_auth();
671 }
672 void set_ambiguous_auth() {
673 state_set(STATE_AMBIGUOUSAUTH);
674 }
675 void clear_ambiguous_auth(MDSContext::vec& finished);
676 void clear_ambiguous_auth();
677
678 const inode_const_ptr& get_inode() const {
679 return inode;
680 }
681
682 // only used for updating newly allocated CInode
683 mempool_inode* _get_inode() {
684 if (inode == empty_inode)
685 reset_inode(allocate_inode());
686 return const_cast<mempool_inode*>(inode.get());
687 }
688
689 const xattr_map_const_ptr& get_xattrs() const { return xattrs; }
690
691 bool is_any_old_inodes() const { return old_inodes && !old_inodes->empty(); }
692 const old_inode_map_const_ptr& get_old_inodes() const { return old_inodes; }
693
694 CDentry* get_parent_dn() { return parent; }
695 const CDentry* get_parent_dn() const { return parent; }
696 CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; }
697 const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; }
698 const CDentry* get_oldest_parent_dn() const {
699 if (parent)
700 return parent;
701 return !projected_parent.empty() ? projected_parent.front(): NULL;
702 }
703 CDir *get_parent_dir();
704 const CDir *get_projected_parent_dir() const;
705 CDir *get_projected_parent_dir();
706 CInode *get_parent_inode();
707
708 bool is_lt(const MDSCacheObject *r) const override {
709 const CInode *o = static_cast<const CInode*>(r);
710 return ino() < o->ino() ||
711 (ino() == o->ino() && last < o->last);
712 }
713
714 // -- misc --
715 bool is_ancestor_of(const CInode *other) const;
716 bool is_projected_ancestor_of(const CInode *other) const;
717
718 void make_path_string(std::string& s, bool projected=false, const CDentry *use_parent=NULL) const;
719 void make_path(filepath& s, bool projected=false) const;
720 void name_stray_dentry(std::string& dname);
721
722 // -- dirtyness --
723 version_t get_version() const { return get_inode()->version; }
724
725 version_t pre_dirty();
726 void _mark_dirty(LogSegment *ls);
727 void mark_dirty(LogSegment *ls);
728 void mark_clean();
729
730 void store(MDSContext *fin);
731 void _stored(int r, version_t cv, Context *fin);
732 /**
733 * Flush a CInode to disk. This includes the backtrace, the parent
734 * directory's link, and the Inode object itself (if a base directory).
735 * @pre is_auth() on both the inode and its containing directory
736 * @pre can_auth_pin()
737 * @param fin The Context to call when the flush is completed.
738 */
739 void flush(MDSContext *fin);
740 void fetch(MDSContext *fin);
741 void _fetched(ceph::buffer::list& bl, ceph::buffer::list& bl2, Context *fin);
742
743 void _commit_ops(int r, C_GatherBuilder &gather_bld,
744 std::vector<CInodeCommitOperation> &ops_vec,
745 inode_backtrace_t &bt);
746 void build_backtrace(int64_t pool, inode_backtrace_t& bt);
747 void _store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
748 inode_backtrace_t &bt, int op_prio);
749 void store_backtrace(CInodeCommitOperations &op, int op_prio);
750 void store_backtrace(MDSContext *fin, int op_prio=-1);
751 void _stored_backtrace(int r, version_t v, Context *fin);
752 void fetch_backtrace(Context *fin, ceph::buffer::list *backtrace);
753
754 void mark_dirty_parent(LogSegment *ls, bool dirty_pool=false);
755 void clear_dirty_parent();
756 void verify_diri_backtrace(ceph::buffer::list &bl, int err);
757 bool is_dirty_parent() { return state_test(STATE_DIRTYPARENT); }
758 bool is_dirty_pool() { return state_test(STATE_DIRTYPOOL); }
759
760 void encode_snap_blob(ceph::buffer::list &bl);
761 void decode_snap_blob(const ceph::buffer::list &bl);
762 void encode_store(ceph::buffer::list& bl, uint64_t features);
763 void decode_store(ceph::buffer::list::const_iterator& bl);
764
765 void add_dir_waiter(frag_t fg, MDSContext *c);
766 void take_dir_waiting(frag_t fg, MDSContext::vec& ls);
767 bool is_waiting_for_dir(frag_t fg) {
768 return waiting_on_dir.count(fg);
769 }
770 void add_waiter(uint64_t tag, MDSContext *c) override;
771 void take_waiting(uint64_t tag, MDSContext::vec& ls) override;
772
773 // -- encode/decode helpers --
774 void _encode_base(ceph::buffer::list& bl, uint64_t features);
775 void _decode_base(ceph::buffer::list::const_iterator& p);
776 void _encode_locks_full(ceph::buffer::list& bl);
777 void _decode_locks_full(ceph::buffer::list::const_iterator& p);
778 void _encode_locks_state_for_replica(ceph::buffer::list& bl, bool need_recover);
779 void _encode_locks_state_for_rejoin(ceph::buffer::list& bl, int rep);
780 void _decode_locks_state_for_replica(ceph::buffer::list::const_iterator& p, bool is_new);
781 void _decode_locks_rejoin(ceph::buffer::list::const_iterator& p, MDSContext::vec& waiters,
782 std::list<SimpleLock*>& eval_locks, bool survivor);
783
784 // -- import/export --
785 void encode_export(ceph::buffer::list& bl);
786 void finish_export();
787 void abort_export() {
788 put(PIN_TEMPEXPORTING);
789 ceph_assert(state_test(STATE_EXPORTINGCAPS));
790 state_clear(STATE_EXPORTINGCAPS);
791 put(PIN_EXPORTINGCAPS);
792 }
793 void decode_import(ceph::buffer::list::const_iterator& p, LogSegment *ls);
794
795 // for giving to clients
796 int encode_inodestat(ceph::buffer::list& bl, Session *session, SnapRealm *realm,
797 snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0,
798 int getattr_wants=0);
799 void encode_cap_message(const ceph::ref_t<MClientCaps> &m, Capability *cap);
800
801 SimpleLock* get_lock(int type) override;
802
803 void set_object_info(MDSCacheObjectInfo &info) override;
804
805 void encode_lock_state(int type, ceph::buffer::list& bl) override;
806 void decode_lock_state(int type, const ceph::buffer::list& bl) override;
807 void encode_lock_iauth(ceph::buffer::list& bl);
808 void decode_lock_iauth(ceph::buffer::list::const_iterator& p);
809 void encode_lock_ilink(ceph::buffer::list& bl);
810 void decode_lock_ilink(ceph::buffer::list::const_iterator& p);
811 void encode_lock_idft(ceph::buffer::list& bl);
812 void decode_lock_idft(ceph::buffer::list::const_iterator& p);
813 void encode_lock_ifile(ceph::buffer::list& bl);
814 void decode_lock_ifile(ceph::buffer::list::const_iterator& p);
815 void encode_lock_inest(ceph::buffer::list& bl);
816 void decode_lock_inest(ceph::buffer::list::const_iterator& p);
817 void encode_lock_ixattr(ceph::buffer::list& bl);
818 void decode_lock_ixattr(ceph::buffer::list::const_iterator& p);
819 void encode_lock_isnap(ceph::buffer::list& bl);
820 void decode_lock_isnap(ceph::buffer::list::const_iterator& p);
821 void encode_lock_iflock(ceph::buffer::list& bl);
822 void decode_lock_iflock(ceph::buffer::list::const_iterator& p);
823 void encode_lock_ipolicy(ceph::buffer::list& bl);
824 void decode_lock_ipolicy(ceph::buffer::list::const_iterator& p);
825
826 void _finish_frag_update(CDir *dir, MutationRef& mut);
827
828 void clear_dirty_scattered(int type) override;
829 bool is_dirty_scattered();
830 void clear_scatter_dirty(); // on rejoin ack
831
832 void start_scatter(ScatterLock *lock);
833 void finish_scatter_update(ScatterLock *lock, CDir *dir,
834 version_t inode_version, version_t dir_accounted_version);
835 void finish_scatter_gather_update(int type, MutationRef& mut);
836 void finish_scatter_gather_update_accounted(int type, EMetaBlob *metablob);
837
838 // -- snap --
839 void open_snaprealm(bool no_split=false);
840 void close_snaprealm(bool no_join=false);
841 SnapRealm *find_snaprealm() const;
842 void encode_snap(ceph::buffer::list& bl);
843 void decode_snap(ceph::buffer::list::const_iterator& p);
844
845 client_t get_loner() const { return loner_cap; }
846 client_t get_wanted_loner() const { return want_loner_cap; }
847
848 // this is the loner state our locks should aim for
849 client_t get_target_loner() const {
850 if (loner_cap == want_loner_cap)
851 return loner_cap;
852 else
853 return -1;
854 }
855
856 client_t calc_ideal_loner();
857 void set_loner_cap(client_t l);
858 bool choose_ideal_loner();
859 bool try_set_loner();
860 bool try_drop_loner();
861
862 // choose new lock state during recovery, based on issued caps
863 void choose_lock_state(SimpleLock *lock, int allissued);
864 void choose_lock_states(int dirty_caps);
865
866 int count_nonstale_caps();
867 bool multiple_nonstale_caps();
868
869 bool is_any_caps() { return !client_caps.empty(); }
870 bool is_any_nonstale_caps() { return count_nonstale_caps(); }
871
872 const mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; }
873 void set_mds_caps_wanted(mempool::mds_co::compact_map<int32_t,int32_t>& m);
874 void set_mds_caps_wanted(mds_rank_t mds, int32_t wanted);
875
876 const mempool_cap_map& get_client_caps() const { return client_caps; }
877 Capability *get_client_cap(client_t client) {
878 auto client_caps_entry = client_caps.find(client);
879 if (client_caps_entry != client_caps.end())
880 return &client_caps_entry->second;
881 return 0;
882 }
883 int get_client_cap_pending(client_t client) const {
884 auto client_caps_entry = client_caps.find(client);
885 if (client_caps_entry != client_caps.end()) {
886 return client_caps_entry->second.pending();
887 } else {
888 return 0;
889 }
890 }
891
892 int get_num_caps_notable() const { return num_caps_notable; }
893 void adjust_num_caps_notable(int d);
894
895 Capability *add_client_cap(client_t client, Session *session,
896 SnapRealm *conrealm=nullptr, bool new_inode=false);
897 void remove_client_cap(client_t client);
898 void move_to_realm(SnapRealm *realm);
899
900 Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session);
901 void clear_client_caps_after_export();
902 void export_client_caps(std::map<client_t,Capability::Export>& cl);
903
904 // caps allowed
905 int get_caps_liked() const;
906 int get_caps_allowed_ever() const;
907 int get_caps_allowed_by_type(int type) const;
908 int get_caps_careful() const;
909 int get_xlocker_mask(client_t client) const;
910 int get_caps_allowed_for_client(Session *s, Capability *cap,
911 const mempool_inode *file_i) const;
912
913 // caps issued, wanted
914 int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0,
915 int shift = 0, int mask = -1);
916 bool is_any_caps_wanted() const;
917 int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = -1) const;
918 bool issued_caps_need_gather(SimpleLock *lock);
919
920 // client writeable
921 bool is_clientwriteable() const { return state & STATE_CLIENTWRITEABLE; }
922 void mark_clientwriteable();
923 void clear_clientwriteable();
924
925 // -- authority --
926 mds_authority_t authority() const override;
927
928 // -- auth pins --
929 bool can_auth_pin(int *err_ret=nullptr) const override;
930 void auth_pin(void *by) override;
931 void auth_unpin(void *by) override;
932
933 // -- freeze --
934 bool is_freezing_inode() const { return state_test(STATE_FREEZING); }
935 bool is_frozen_inode() const { return state_test(STATE_FROZEN); }
936 bool is_frozen_auth_pin() const { return state_test(STATE_FROZENAUTHPIN); }
937 bool is_frozen() const override;
938 bool is_frozen_dir() const;
939 bool is_freezing() const override;
940
941 /* Freeze the inode. auth_pin_allowance lets the caller account for any
942 * auth_pins it is itself holding/responsible for. */
943 bool freeze_inode(int auth_pin_allowance=0);
944 void unfreeze_inode(MDSContext::vec& finished);
945 void unfreeze_inode();
946
947 void freeze_auth_pin();
948 void unfreeze_auth_pin();
949
950 // -- reference counting --
951 void bad_put(int by) override {
952 generic_dout(0) << " bad put " << *this << " by " << by << " " << pin_name(by) << " was " << ref
953 #ifdef MDS_REF_SET
954 << " (" << ref_map << ")"
955 #endif
956 << dendl;
957 #ifdef MDS_REF_SET
958 ceph_assert(ref_map[by] > 0);
959 #endif
960 ceph_assert(ref > 0);
961 }
962 void bad_get(int by) override {
963 generic_dout(0) << " bad get " << *this << " by " << by << " " << pin_name(by) << " was " << ref
964 #ifdef MDS_REF_SET
965 << " (" << ref_map << ")"
966 #endif
967 << dendl;
968 #ifdef MDS_REF_SET
969 ceph_assert(ref_map[by] >= 0);
970 #endif
971 }
972 void first_get() override;
973 void last_put() override;
974 void _put() override;
975
976 // -- hierarchy stuff --
977 void set_primary_parent(CDentry *p) {
978 ceph_assert(parent == 0 ||
979 g_conf().get_val<bool>("mds_hack_allow_loading_invalid_metadata"));
980 parent = p;
981 }
982 void remove_primary_parent(CDentry *dn) {
983 ceph_assert(dn == parent);
984 parent = 0;
985 }
986 void add_remote_parent(CDentry *p);
987 void remove_remote_parent(CDentry *p);
988 int num_remote_parents() {
989 return remote_parents.size();
990 }
991
992 void push_projected_parent(CDentry *dn) {
993 projected_parent.push_back(dn);
994 }
995 void pop_projected_parent() {
996 ceph_assert(projected_parent.size());
997 parent = projected_parent.front();
998 projected_parent.pop_front();
999 }
1000 bool is_parent_projected() const {
1001 return !projected_parent.empty();
1002 }
1003
1004 mds_rank_t get_export_pin(bool inherit=true) const;
1005 void check_pin_policy(mds_rank_t target);
1006 void set_export_pin(mds_rank_t rank);
1007 void queue_export_pin(mds_rank_t target);
1008 void maybe_export_pin(bool update=false);
1009
1010 void set_ephemeral_pin(bool dist, bool rand);
1011 void clear_ephemeral_pin(bool dist, bool rand);
1012
1013 void setxattr_ephemeral_dist(bool val=false);
1014 bool is_ephemeral_dist() const {
1015 return state_test(STATE_DISTEPHEMERALPIN);
1016 }
1017
1018 double get_ephemeral_rand() const;
1019 void maybe_ephemeral_rand(double threshold=-1.0);
1020 void setxattr_ephemeral_rand(double prob=0.0);
1021 bool is_ephemeral_rand() const {
1022 return state_test(STATE_RANDEPHEMERALPIN);
1023 }
1024
1025 bool has_ephemeral_policy() const {
1026 return get_inode()->export_ephemeral_random_pin > 0.0 ||
1027 get_inode()->export_ephemeral_distributed_pin;
1028 }
1029 bool is_ephemerally_pinned() const {
1030 return state_test(STATE_DISTEPHEMERALPIN) ||
1031 state_test(STATE_RANDEPHEMERALPIN);
1032 }
1033
1034 void print(std::ostream& out) const override;
1035 void dump(ceph::Formatter *f, int flags = DUMP_DEFAULT) const;
1036
1037 /**
1038 * Validate that the on-disk state of an inode matches what
1039 * we expect from our memory state. Currently this checks that:
1040 * 1) The backtrace associated with the file data exists and is correct
1041 * 2) For directories, the actual inode metadata matches our memory state,
1042 * 3) For directories, the rstats match
1043 *
1044 * @param results A freshly-created validated_data struct, with values set
1045 * as described in the struct documentation.
1046 * @param mdr The request to be responeded upon the completion of the
1047 * validation (or NULL)
1048 * @param fin Context to call back on completion (or NULL)
1049 */
1050 void validate_disk_state(validated_data *results,
1051 MDSContext *fin);
1052 static void dump_validation_results(const validated_data& results,
1053 ceph::Formatter *f);
1054
1055 //bool hack_accessed = false;
1056 //utime_t hack_load_stamp;
1057
1058 MDCache *mdcache;
1059
1060 SnapRealm *snaprealm = nullptr;
1061 SnapRealm *containing_realm = nullptr;
1062 snapid_t first, last;
1063 mempool::mds_co::compact_set<snapid_t> dirty_old_rstats;
1064
1065 uint64_t last_journaled = 0; // log offset for the last time i was journaled
1066 //loff_t last_open_journaled; // log offset for the last journaled EOpen
1067 utime_t last_dirstat_prop;
1068
1069 // list item node for when we have unpropagated rstat data
1070 elist<CInode*>::item dirty_rstat_item;
1071
1072 mempool::mds_co::set<client_t> client_snap_caps;
1073 mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush;
1074
1075 // LogSegment lists i (may) belong to
1076 elist<CInode*>::item item_dirty;
1077 elist<CInode*>::item item_caps;
1078 elist<CInode*>::item item_open_file;
1079 elist<CInode*>::item item_dirty_parent;
1080 elist<CInode*>::item item_dirty_dirfrag_dir;
1081 elist<CInode*>::item item_dirty_dirfrag_nest;
1082 elist<CInode*>::item item_dirty_dirfrag_dirfragtree;
1083
1084 // also update RecoveryQueue::RecoveryQueue() if you change this
1085 elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
1086 elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
1087
1088 inode_load_vec_t pop;
1089 elist<CInode*>::item item_pop_lru;
1090
1091 // -- locks --
1092 static LockType versionlock_type;
1093 static LockType authlock_type;
1094 static LockType linklock_type;
1095 static LockType dirfragtreelock_type;
1096 static LockType filelock_type;
1097 static LockType xattrlock_type;
1098 static LockType snaplock_type;
1099 static LockType nestlock_type;
1100 static LockType flocklock_type;
1101 static LockType policylock_type;
1102
1103 // FIXME not part of mempool
1104 LocalLockC versionlock;
1105 SimpleLock authlock;
1106 SimpleLock linklock;
1107 ScatterLock dirfragtreelock;
1108 ScatterLock filelock;
1109 SimpleLock xattrlock;
1110 SimpleLock snaplock;
1111 ScatterLock nestlock;
1112 SimpleLock flocklock;
1113 SimpleLock policylock;
1114
1115 // -- caps -- (new)
1116 // client caps
1117 client_t loner_cap = -1, want_loner_cap = -1;
1118
1119 protected:
1120 ceph_lock_state_t *get_fcntl_lock_state() {
1121 if (!fcntl_locks)
1122 fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL);
1123 return fcntl_locks;
1124 }
1125 void clear_fcntl_lock_state() {
1126 delete fcntl_locks;
1127 fcntl_locks = NULL;
1128 }
1129 ceph_lock_state_t *get_flock_lock_state() {
1130 if (!flock_locks)
1131 flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK);
1132 return flock_locks;
1133 }
1134 void clear_flock_lock_state() {
1135 delete flock_locks;
1136 flock_locks = NULL;
1137 }
1138 void clear_file_locks() {
1139 clear_fcntl_lock_state();
1140 clear_flock_lock_state();
1141 }
1142 void _encode_file_locks(ceph::buffer::list& bl) const {
1143 using ceph::encode;
1144 bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty();
1145 encode(has_fcntl_locks, bl);
1146 if (has_fcntl_locks)
1147 encode(*fcntl_locks, bl);
1148 bool has_flock_locks = flock_locks && !flock_locks->empty();
1149 encode(has_flock_locks, bl);
1150 if (has_flock_locks)
1151 encode(*flock_locks, bl);
1152 }
1153 void _decode_file_locks(ceph::buffer::list::const_iterator& p) {
1154 using ceph::decode;
1155 bool has_fcntl_locks;
1156 decode(has_fcntl_locks, p);
1157 if (has_fcntl_locks)
1158 decode(*get_fcntl_lock_state(), p);
1159 else
1160 clear_fcntl_lock_state();
1161 bool has_flock_locks;
1162 decode(has_flock_locks, p);
1163 if (has_flock_locks)
1164 decode(*get_flock_lock_state(), p);
1165 else
1166 clear_flock_lock_state();
1167 }
1168
1169 /**
1170 * Return the pool ID where we currently write backtraces for
1171 * this inode (in addition to inode.old_pools)
1172 *
1173 * @returns a pool ID >=0
1174 */
1175 int64_t get_backtrace_pool() const;
1176
1177 // parent dentries in cache
1178 CDentry *parent = nullptr; // primary link
1179 mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
1180
1181 mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
1182
1183 mds_authority_t inode_auth = CDIR_AUTH_DEFAULT;
1184
1185 // -- distributed state --
1186 // file capabilities
1187 mempool_cap_map client_caps; // client -> caps
1188 mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
1189 int replica_caps_wanted = 0; // [replica] what i've requested from auth
1190 int num_caps_notable = 0;
1191
1192 ceph_lock_state_t *fcntl_locks = nullptr;
1193 ceph_lock_state_t *flock_locks = nullptr;
1194
1195 // -- waiting --
1196 mempool::mds_co::compact_map<frag_t, MDSContext::vec > waiting_on_dir;
1197
1198
1199 // -- freezing inode --
1200 int auth_pin_freeze_allowance = 0;
1201 elist<CInode*>::item item_freezing_inode;
1202 void maybe_finish_freeze_inode();
1203 private:
1204
1205 friend class ValidationContinuation;
1206
1207 /**
1208 * Create a scrub_info_t struct for the scrub_infop pointer.
1209 */
1210 void scrub_info_create() const;
1211 /**
1212 * Delete the scrub_info_t struct if it's not got any useful data
1213 */
1214 void scrub_maybe_delete_info();
1215
1216 void pop_projected_snaprealm(sr_t *next_snaprealm, bool early);
1217
1218 bool _validate_disk_state(class ValidationContinuation *c,
1219 int rval, int stage);
1220
1221 struct projected_const_node {
1222 inode_const_ptr inode;
1223 xattr_map_const_ptr xattrs;
1224 sr_t *snapnode;
1225
1226 projected_const_node() = delete;
1227 projected_const_node(projected_const_node&&) = default;
1228 explicit projected_const_node(const inode_const_ptr& i, const xattr_map_const_ptr& x, sr_t *s) :
1229 inode(i), xattrs(x), snapnode(s) {}
1230 };
1231
1232 mempool::mds_co::list<projected_const_node> projected_nodes; // projected values (only defined while dirty)
1233 size_t num_projected_srnodes = 0;
1234
1235 // -- cache infrastructure --
1236 mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
1237
1238 //for the purpose of quickly determining whether there's a subtree root or exporting dir
1239 int num_subtree_roots = 0;
1240 int num_exporting_dirs = 0;
1241
1242 int stickydir_ref = 0;
1243 std::unique_ptr<scrub_info_t> scrub_infop;
1244 /** @} Scrubbing and fsck */
1245 };
1246
1247 std::ostream& operator<<(std::ostream& out, const CInode& in);
1248
1249 extern cinode_lock_info_t cinode_lock_info[];
1250 extern int num_cinode_locks;
1251 #undef dout_context
1252 #endif