]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/CInode.h
import ceph 14.2.5
[ceph.git] / ceph / src / mds / CInode.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16
17#ifndef CEPH_CINODE_H
18#define CEPH_CINODE_H
19
94b18763
FG
20#include <list>
21#include <map>
22#include <set>
11fdf7f2 23#include <string_view>
94b18763 24
7c673cae
FG
25#include "common/config.h"
26#include "include/counter.h"
27#include "include/elist.h"
28#include "include/types.h"
29#include "include/lru.h"
30#include "include/compact_set.h"
31
32#include "MDSCacheObject.h"
11fdf7f2 33#include "MDSContext.h"
7c673cae
FG
34#include "flock.h"
35
36#include "CDentry.h"
37#include "SimpleLock.h"
38#include "ScatterLock.h"
39#include "LocalLock.h"
40#include "Capability.h"
41#include "SnapRealm.h"
42#include "Mutation.h"
43
11fdf7f2
TL
44#include "messages/MClientCaps.h"
45
7c673cae
FG
46#define dout_context g_ceph_context
47
48class Context;
49class CDentry;
50class CDir;
7c673cae
FG
51class CInode;
52class MDCache;
53class LogSegment;
54struct SnapRealm;
55class Session;
7c673cae
FG
56struct ObjectOperation;
57class EMetaBlob;
58
59
60ostream& operator<<(ostream& out, const CInode& in);
61
62struct cinode_lock_info_t {
63 int lock;
64 int wr_caps;
65};
66
67extern cinode_lock_info_t cinode_lock_info[];
68extern int num_cinode_locks;
69
70
71/**
72 * Base class for CInode, containing the backing store data and
73 * serialization methods. This exists so that we can read and
74 * handle CInodes from the backing store without hitting all
75 * the business logic in CInode proper.
76 */
77class InodeStoreBase {
78public:
94b18763
FG
79 typedef inode_t<mempool::mds_co::pool_allocator> mempool_inode;
80 typedef old_inode_t<mempool::mds_co::pool_allocator> mempool_old_inode;
81 typedef mempool::mds_co::compact_map<snapid_t, mempool_old_inode> mempool_old_inode_map;
82 typedef xattr_map<mempool::mds_co::pool_allocator> mempool_xattr_map; // FIXME bufferptr not in mempool
83
84 mempool_inode inode; // the inode itself
85 mempool::mds_co::string symlink; // symlink dest, if symlink
86 mempool_xattr_map xattrs;
7c673cae 87 fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
94b18763
FG
88 mempool_old_inode_map old_inodes; // key = last, value.first = first
89 snapid_t oldest_snap = CEPH_NOSNAP;
90 damage_flags_t damage_flags = 0;
7c673cae 91
94b18763 92 InodeStoreBase() {}
7c673cae
FG
93
94 /* Helpers */
95 bool is_file() const { return inode.is_file(); }
96 bool is_symlink() const { return inode.is_symlink(); }
97 bool is_dir() const { return inode.is_dir(); }
11fdf7f2 98 static object_t get_object_name(inodeno_t ino, frag_t fg, std::string_view suffix);
7c673cae
FG
99
100 /* Full serialization for use in ".inode" root inode objects */
101 void encode(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const;
11fdf7f2 102 void decode(bufferlist::const_iterator &bl, bufferlist& snap_blob);
7c673cae
FG
103
104 /* Serialization without ENCODE_START/FINISH blocks for use embedded in dentry */
105 void encode_bare(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const;
11fdf7f2 106 void decode_bare(bufferlist::const_iterator &bl, bufferlist &snap_blob, __u8 struct_v=5);
7c673cae
FG
107
108 /* For test/debug output */
109 void dump(Formatter *f) const;
110
111 /* For use by offline tools */
11fdf7f2
TL
112 __u32 hash_dentry_name(std::string_view dn);
113 frag_t pick_dirfrag(std::string_view dn);
7c673cae
FG
114};
115
116class InodeStore : public InodeStoreBase {
117public:
94b18763 118 // FIXME bufferlist not part of mempool
7c673cae
FG
119 bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't
120 // rehydrate it without full MDCache
121 void encode(bufferlist &bl, uint64_t features) const {
122 InodeStoreBase::encode(bl, features, &snap_blob);
123 }
11fdf7f2 124 void decode(bufferlist::const_iterator &bl) {
7c673cae
FG
125 InodeStoreBase::decode(bl, snap_blob);
126 }
127 void encode_bare(bufferlist &bl, uint64_t features) const {
128 InodeStoreBase::encode_bare(bl, features, &snap_blob);
129 }
11fdf7f2 130 void decode_bare(bufferlist::const_iterator &bl) {
7c673cae
FG
131 InodeStoreBase::decode_bare(bl, snap_blob);
132 }
133
134 static void generate_test_instances(std::list<InodeStore*>& ls);
135};
136WRITE_CLASS_ENCODER_FEATURES(InodeStore)
137
11fdf7f2
TL
138// just for ceph-dencoder
139class InodeStoreBare : public InodeStore {
140public:
141 void encode(bufferlist &bl, uint64_t features) const {
142 InodeStore::encode_bare(bl, features);
143 }
144 void decode(bufferlist::const_iterator &bl) {
145 InodeStore::decode_bare(bl);
146 }
147 static void generate_test_instances(std::list<InodeStoreBare*>& ls);
148};
149WRITE_CLASS_ENCODER_FEATURES(InodeStoreBare)
150
7c673cae
FG
151// cached inode wrapper
152class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> {
153 public:
181888fb 154 MEMPOOL_CLASS_HELPERS();
7c673cae
FG
155 // -- pins --
156 static const int PIN_DIRFRAG = -1;
157 static const int PIN_CAPS = 2; // client caps
158 static const int PIN_IMPORTING = -4; // importing
159 static const int PIN_OPENINGDIR = 7;
160 static const int PIN_REMOTEPARENT = 8;
161 static const int PIN_BATCHOPENJOURNAL = 9;
162 static const int PIN_SCATTERED = 10;
163 static const int PIN_STICKYDIRS = 11;
164 //static const int PIN_PURGING = -12;
165 static const int PIN_FREEZING = 13;
166 static const int PIN_FROZEN = 14;
167 static const int PIN_IMPORTINGCAPS = -15;
168 static const int PIN_PASTSNAPPARENT = -16;
169 static const int PIN_OPENINGSNAPPARENTS = 17;
170 static const int PIN_TRUNCATING = 18;
171 static const int PIN_STRAY = 19; // we pin our stray inode while active
172 static const int PIN_NEEDSNAPFLUSH = 20;
173 static const int PIN_DIRTYRSTAT = 21;
174 static const int PIN_EXPORTINGCAPS = 22;
175 static const int PIN_DIRTYPARENT = 23;
176 static const int PIN_DIRWAITER = 24;
177 static const int PIN_SCRUBQUEUE = 25;
178
11fdf7f2 179 std::string_view pin_name(int p) const override {
7c673cae
FG
180 switch (p) {
181 case PIN_DIRFRAG: return "dirfrag";
182 case PIN_CAPS: return "caps";
183 case PIN_IMPORTING: return "importing";
184 case PIN_OPENINGDIR: return "openingdir";
185 case PIN_REMOTEPARENT: return "remoteparent";
186 case PIN_BATCHOPENJOURNAL: return "batchopenjournal";
187 case PIN_SCATTERED: return "scattered";
188 case PIN_STICKYDIRS: return "stickydirs";
189 //case PIN_PURGING: return "purging";
190 case PIN_FREEZING: return "freezing";
191 case PIN_FROZEN: return "frozen";
192 case PIN_IMPORTINGCAPS: return "importingcaps";
193 case PIN_EXPORTINGCAPS: return "exportingcaps";
194 case PIN_PASTSNAPPARENT: return "pastsnapparent";
195 case PIN_OPENINGSNAPPARENTS: return "openingsnapparents";
196 case PIN_TRUNCATING: return "truncating";
197 case PIN_STRAY: return "stray";
198 case PIN_NEEDSNAPFLUSH: return "needsnapflush";
199 case PIN_DIRTYRSTAT: return "dirtyrstat";
200 case PIN_DIRTYPARENT: return "dirtyparent";
201 case PIN_DIRWAITER: return "dirwaiter";
202 case PIN_SCRUBQUEUE: return "scrubqueue";
203 default: return generic_pin_name(p);
204 }
205 }
206
11fdf7f2
TL
207 // -- dump flags --
208 static const int DUMP_INODE_STORE_BASE = (1 << 0);
209 static const int DUMP_MDS_CACHE_OBJECT = (1 << 1);
210 static const int DUMP_LOCKS = (1 << 2);
211 static const int DUMP_STATE = (1 << 3);
212 static const int DUMP_CAPS = (1 << 4);
213 static const int DUMP_PATH = (1 << 5);
214 static const int DUMP_DIRFRAGS = (1 << 6);
215 static const int DUMP_ALL = (-1);
216 static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_PATH) & (~DUMP_DIRFRAGS);
217
7c673cae 218 // -- state --
11fdf7f2
TL
219 static const int STATE_EXPORTING = (1<<0); // on nonauth bystander.
220 static const int STATE_OPENINGDIR = (1<<1);
221 static const int STATE_FREEZING = (1<<2);
222 static const int STATE_FROZEN = (1<<3);
223 static const int STATE_AMBIGUOUSAUTH = (1<<4);
224 static const int STATE_EXPORTINGCAPS = (1<<5);
225 static const int STATE_NEEDSRECOVER = (1<<6);
226 static const int STATE_RECOVERING = (1<<7);
227 static const int STATE_PURGING = (1<<8);
228 static const int STATE_DIRTYPARENT = (1<<9);
229 static const int STATE_DIRTYRSTAT = (1<<10);
230 static const int STATE_STRAYPINNED = (1<<11);
231 static const int STATE_FROZENAUTHPIN = (1<<12);
232 static const int STATE_DIRTYPOOL = (1<<13);
233 static const int STATE_REPAIRSTATS = (1<<14);
234 static const int STATE_MISSINGOBJS = (1<<15);
235 static const int STATE_EVALSTALECAPS = (1<<16);
236 static const int STATE_QUEUEDEXPORTPIN = (1<<17);
237 static const int STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table
eafe8130 238 static const int STATE_DELAYEDEXPORTPIN = (1<<19);
7c673cae
FG
239 // orphan inode needs notification of releasing reference
240 static const int STATE_ORPHAN = STATE_NOTIFYREF;
241
242 static const int MASK_STATE_EXPORTED =
243 (STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL);
244 static const int MASK_STATE_EXPORT_KEPT =
11fdf7f2 245 (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS|
eafe8130 246 STATE_QUEUEDEXPORTPIN|STATE_TRACKEDBYOFT|STATE_DELAYEDEXPORTPIN);
7c673cae
FG
247
248 // -- waiters --
249 static const uint64_t WAIT_DIR = (1<<0);
250 static const uint64_t WAIT_FROZEN = (1<<1);
251 static const uint64_t WAIT_TRUNC = (1<<2);
252 static const uint64_t WAIT_FLOCK = (1<<3);
253
254 static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
255
256 // misc
257 static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export
258
259 ostream& print_db_line_prefix(ostream& out) override;
260
261 public:
262 MDCache *mdcache;
263
94b18763
FG
264 SnapRealm *snaprealm = nullptr;
265 SnapRealm *containing_realm = nullptr;
7c673cae 266 snapid_t first, last;
94b18763 267 mempool::mds_co::compact_set<snapid_t> dirty_old_rstats;
7c673cae
FG
268
269 class scrub_stamp_info_t {
270 public:
271 /// version we started our latest scrub (whether in-progress or finished)
94b18763 272 version_t scrub_start_version = 0;
7c673cae
FG
273 /// time we started our latest scrub (whether in-progress or finished)
274 utime_t scrub_start_stamp;
275 /// version we started our most recent finished scrub
94b18763 276 version_t last_scrub_version = 0;
7c673cae
FG
277 /// time we started our most recent finished scrub
278 utime_t last_scrub_stamp;
94b18763 279 scrub_stamp_info_t() {}
7c673cae 280 void reset() {
b32b8144
FG
281 scrub_start_version = last_scrub_version = 0;
282 scrub_start_stamp = last_scrub_stamp = utime_t();
7c673cae
FG
283 }
284 };
285
286 class scrub_info_t : public scrub_stamp_info_t {
287 public:
94b18763 288 CDentry *scrub_parent = nullptr;
11fdf7f2 289 MDSContext *on_finish = nullptr;
7c673cae 290
94b18763
FG
291 bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
292 bool scrub_in_progress = false; /// are we currently scrubbing?
293 bool children_scrubbed = false;
7c673cae
FG
294
295 /// my own (temporary) stamps and versions for each dirfrag we have
94b18763 296 std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool
7c673cae 297
b32b8144 298 ScrubHeaderRef header;
7c673cae 299
94b18763 300 scrub_info_t() {}
7c673cae
FG
301 };
302
303 const scrub_info_t *scrub_info() const{
304 if (!scrub_infop)
305 scrub_info_create();
306 return scrub_infop;
307 }
308
b32b8144
FG
309 ScrubHeaderRef get_scrub_header() {
310 if (scrub_infop == nullptr) {
311 return nullptr;
312 } else {
313 return scrub_infop->header;
314 }
315 }
316
7c673cae
FG
317 bool scrub_is_in_progress() const {
318 return (scrub_infop && scrub_infop->scrub_in_progress);
319 }
320 /**
321 * Start scrubbing on this inode. That could be very short if it's
322 * a file, or take a long time if we're recursively scrubbing a directory.
323 * @pre It is not currently scrubbing
324 * @post it has set up internal scrubbing state
325 * @param scrub_version What version are we scrubbing at (usually, parent
326 * directory's get_projected_version())
327 */
328 void scrub_initialize(CDentry *scrub_parent,
b32b8144 329 ScrubHeaderRef& header,
11fdf7f2 330 MDSContext *f);
7c673cae
FG
331 /**
332 * Get the next dirfrag to scrub. Gives you a frag_t in output param which
333 * you must convert to a CDir (and possibly load off disk).
334 * @param dir A pointer to frag_t, will be filled in with the next dirfrag to
335 * scrub if there is one.
336 * @returns 0 on success, you should scrub the passed-out frag_t right now;
337 * ENOENT: There are no remaining dirfrags to scrub
338 * <0 There was some other error (It will return -ENOTDIR if not a directory)
339 */
340 int scrub_dirfrag_next(frag_t* out_dirfrag);
341 /**
342 * Get the currently scrubbing dirfrags. When returned, the
343 * passed-in list will be filled in with all frag_ts which have
344 * been returned from scrub_dirfrag_next but not sent back
345 * via scrub_dirfrag_finished.
346 */
11fdf7f2 347 void scrub_dirfrags_scrubbing(frag_vec_t *out_dirfrags);
7c673cae
FG
348 /**
349 * Report to the CInode that a dirfrag it owns has been scrubbed. Call
350 * this for every frag_t returned from scrub_dirfrag_next().
351 * @param dirfrag The frag_t that was scrubbed
352 */
353 void scrub_dirfrag_finished(frag_t dirfrag);
354 /**
355 * Call this once the scrub has been completed, whether it's a full
356 * recursive scrub on a directory or simply the data on a file (or
357 * anything in between).
358 * @param c An out param which is filled in with a Context* that must
359 * be complete()ed.
360 */
11fdf7f2
TL
361 void scrub_finished(MDSContext **c);
362
363 void scrub_aborted(MDSContext **c);
364
7c673cae
FG
365 /**
366 * Report to the CInode that alldirfrags it owns have been scrubbed.
367 */
368 void scrub_children_finished() {
369 scrub_infop->children_scrubbed = true;
370 }
11fdf7f2
TL
371 void scrub_set_finisher(MDSContext *c) {
372 ceph_assert(!scrub_infop->on_finish);
7c673cae
FG
373 scrub_infop->on_finish = c;
374 }
375
376private:
377 /**
11fdf7f2 378 * Create a scrub_info_t struct for the scrub_infop pointer.
7c673cae
FG
379 */
380 void scrub_info_create() const;
381 /**
382 * Delete the scrub_info_t struct if it's not got any useful data
383 */
384 void scrub_maybe_delete_info();
385public:
386
387 bool is_multiversion() const {
388 return snaprealm || // other snaprealms will link to me
389 inode.is_dir() || // links to me in other snaps
390 inode.nlink > 1 || // there are remote links, possibly snapped, that will need to find me
391 !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
392 }
393 snapid_t get_oldest_snap();
394
94b18763 395 uint64_t last_journaled = 0; // log offset for the last time i was journaled
7c673cae
FG
396 //loff_t last_open_journaled; // log offset for the last journaled EOpen
397 utime_t last_dirstat_prop;
398
399
400 // list item node for when we have unpropagated rstat data
401 elist<CInode*>::item dirty_rstat_item;
402
403 bool is_dirty_rstat() {
404 return state_test(STATE_DIRTYRSTAT);
405 }
406 void mark_dirty_rstat();
407 void clear_dirty_rstat();
408
94b18763 409 //bool hack_accessed = false;
7c673cae
FG
410 //utime_t hack_load_stamp;
411
412 /**
413 * Projection methods, used to store inode changes until they have been journaled,
414 * at which point they are popped.
415 * Usage:
94b18763
FG
416 * project_inode as needed. If you're changing xattrs or sr_t, then pass true
417 * as needed then change the xattrs/snapnode member as needed. (Dirty
418 * exception: project_past_snaprealm_parent allows you to project the
419 * snapnode after doing project_inode (i.e. you don't need to pass
420 * snap=true).
7c673cae
FG
421 *
422 * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
423 * This function will take care of the inode itself, the xattrs, and the snaprealm.
424 */
425
94b18763
FG
426 class projected_inode {
427 public:
11fdf7f2
TL
428 static sr_t* const UNDEF_SRNODE;
429
94b18763
FG
430 mempool_inode inode;
431 std::unique_ptr<mempool_xattr_map> xattrs;
11fdf7f2 432 sr_t *snapnode = UNDEF_SRNODE;
94b18763
FG
433
434 projected_inode() = delete;
11fdf7f2 435 explicit projected_inode(const mempool_inode &in) : inode(in) {}
7c673cae 436 };
94b18763
FG
437
438private:
439 mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty)
440 size_t num_projected_xattrs = 0;
441 size_t num_projected_srnodes = 0;
442
94b18763
FG
443public:
444 CInode::projected_inode &project_inode(bool xattr = false, bool snap = false);
7c673cae
FG
445 void pop_and_dirty_projected_inode(LogSegment *ls);
446
94b18763 447 projected_inode *get_projected_node() {
7c673cae
FG
448 if (projected_nodes.empty())
449 return NULL;
450 else
94b18763 451 return &projected_nodes.back();
7c673cae
FG
452 }
453
454 version_t get_projected_version() const {
455 if (projected_nodes.empty())
456 return inode.version;
457 else
94b18763 458 return projected_nodes.back().inode.version;
7c673cae
FG
459 }
460 bool is_projected() const {
461 return !projected_nodes.empty();
462 }
463
94b18763 464 const mempool_inode *get_projected_inode() const {
7c673cae
FG
465 if (projected_nodes.empty())
466 return &inode;
467 else
94b18763 468 return &projected_nodes.back().inode;
7c673cae 469 }
94b18763 470 mempool_inode *get_projected_inode() {
7c673cae
FG
471 if (projected_nodes.empty())
472 return &inode;
473 else
94b18763 474 return &projected_nodes.back().inode;
7c673cae 475 }
94b18763 476 mempool_inode *get_previous_projected_inode() {
11fdf7f2 477 ceph_assert(!projected_nodes.empty());
94b18763
FG
478 auto it = projected_nodes.rbegin();
479 ++it;
480 if (it != projected_nodes.rend())
481 return &it->inode;
7c673cae
FG
482 else
483 return &inode;
484 }
485
94b18763 486 mempool_xattr_map *get_projected_xattrs() {
7c673cae 487 if (num_projected_xattrs > 0) {
94b18763
FG
488 for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
489 if (it->xattrs)
490 return it->xattrs.get();
7c673cae
FG
491 }
492 return &xattrs;
493 }
94b18763
FG
494 mempool_xattr_map *get_previous_projected_xattrs() {
495 if (num_projected_xattrs > 0) {
496 for (auto it = ++projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
497 if (it->xattrs)
498 return it->xattrs.get();
499 }
7c673cae
FG
500 return &xattrs;
501 }
502
11fdf7f2
TL
503 sr_t *prepare_new_srnode(snapid_t snapid);
504 void project_snaprealm(sr_t *new_srnode);
505 sr_t *project_snaprealm(snapid_t snapid=0) {
506 sr_t* new_srnode = prepare_new_srnode(snapid);
507 project_snaprealm(new_srnode);
508 return new_srnode;
7c673cae 509 }
11fdf7f2 510 const sr_t *get_projected_srnode() const {
7c673cae 511 if (num_projected_srnodes > 0) {
94b18763 512 for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
11fdf7f2
TL
513 if (it->snapnode != projected_inode::UNDEF_SRNODE)
514 return it->snapnode;
7c673cae
FG
515 }
516 if (snaprealm)
517 return &snaprealm->srnode;
518 else
519 return NULL;
520 }
11fdf7f2
TL
521
522 void mark_snaprealm_global(sr_t *new_srnode);
523 void clear_snaprealm_global(sr_t *new_srnode);
524 bool is_projected_snaprealm_global() const;
525
526 void record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent);
527 void record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *newparent,
528 CDentry *dn, bool primary_dn);
529 void project_snaprealm_past_parent(SnapRealm *newparent);
530 void early_pop_projected_snaprealm();
7c673cae
FG
531
532private:
11fdf7f2 533 void pop_projected_snaprealm(sr_t *next_snaprealm, bool early);
7c673cae
FG
534
535public:
94b18763 536 mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
7c673cae 537 void split_old_inode(snapid_t snap);
94b18763 538 mempool_old_inode *pick_old_inode(snapid_t last);
7c673cae 539 void pre_cow_old_inode();
11fdf7f2 540 bool has_snap_data(snapid_t s);
7c673cae
FG
541 void purge_stale_snap_data(const std::set<snapid_t>& snaps);
542
543 // -- cache infrastructure --
544private:
94b18763 545 mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
1adf2230
AA
546
547 //for the purpose of quickly determining whether there's a subtree root or exporting dir
548 int num_subtree_roots = 0;
549 int num_exporting_dirs = 0;
550
94b18763
FG
551 int stickydir_ref = 0;
552 scrub_info_t *scrub_infop = nullptr;
7c673cae
FG
553
554public:
555 bool has_dirfrags() { return !dirfrags.empty(); }
556 CDir* get_dirfrag(frag_t fg) {
11fdf7f2
TL
557 auto pi = dirfrags.find(fg);
558 if (pi != dirfrags.end()) {
559 //assert(g_conf()->debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME
560 return pi->second;
561 }
562 return NULL;
7c673cae
FG
563 }
564 bool get_dirfrags_under(frag_t fg, std::list<CDir*>& ls);
565 CDir* get_approx_dirfrag(frag_t fg);
91327a77
AA
566
567 template<typename Container>
568 void get_dirfrags(Container& ls) const {
569 // all dirfrags
11fdf7f2
TL
570 if constexpr (std::is_same_v<Container, std::vector<CDir*>>)
571 ls.reserve(ls.size() + dirfrags.size());
91327a77
AA
572 for (const auto &p : dirfrags)
573 ls.push_back(p.second);
574 }
575 template<typename Container>
576 void get_nested_dirfrags(Container& ls) const {
577 // dirfrags in same subtree
11fdf7f2
TL
578 if constexpr (std::is_same_v<Container, std::vector<CDir*>>)
579 ls.reserve(ls.size() + dirfrags.size() - num_subtree_roots);
91327a77
AA
580 for (const auto &p : dirfrags) {
581 typename Container::value_type dir = p.second;
582 if (!dir->is_subtree_root())
583 ls.push_back(dir);
584 }
585 }
586 template<typename Container>
587 void get_subtree_dirfrags(Container& ls) {
588 // dirfrags that are roots of new subtrees
11fdf7f2
TL
589 if constexpr (std::is_same_v<Container, std::vector<CDir*>>)
590 ls.reserve(ls.size() + num_subtree_roots);
91327a77
AA
591 for (const auto &p : dirfrags) {
592 typename Container::value_type dir = p.second;
593 if (dir->is_subtree_root())
594 ls.push_back(dir);
595 }
596 }
597
7c673cae
FG
598 CDir *get_or_open_dirfrag(MDCache *mdcache, frag_t fg);
599 CDir *add_dirfrag(CDir *dir);
600 void close_dirfrag(frag_t fg);
601 void close_dirfrags();
602 bool has_subtree_root_dirfrag(int auth=-1);
603 bool has_subtree_or_exporting_dirfrag();
604
605 void force_dirfrags();
606 void verify_dirfrags();
607
608 void get_stickydirs();
609 void put_stickydirs();
610
611 protected:
612 // parent dentries in cache
94b18763
FG
613 CDentry *parent = nullptr; // primary link
614 mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
7c673cae 615
94b18763 616 mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
7c673cae 617
94b18763 618 mds_authority_t inode_auth = CDIR_AUTH_DEFAULT;
7c673cae
FG
619
620 // -- distributed state --
621protected:
622 // file capabilities
11fdf7f2
TL
623 using mempool_cap_map = mempool::mds_co::map<client_t, Capability>;
624 mempool_cap_map client_caps; // client -> caps
94b18763 625 mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
11fdf7f2
TL
626 int replica_caps_wanted = 0; // [replica] what i've requested from auth
627 int num_caps_wanted = 0;
7c673cae
FG
628
629public:
eafe8130 630 mempool::mds_co::set<client_t> client_snap_caps;
94b18763 631 mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush;
7c673cae
FG
632
633 void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
634 void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
494da23a 635 pair<bool,bool> split_need_snapflush(CInode *cowin, CInode *in);
7c673cae
FG
636
637protected:
638
94b18763
FG
639 ceph_lock_state_t *fcntl_locks = nullptr;
640 ceph_lock_state_t *flock_locks = nullptr;
7c673cae
FG
641
642 ceph_lock_state_t *get_fcntl_lock_state() {
643 if (!fcntl_locks)
644 fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL);
645 return fcntl_locks;
646 }
647 void clear_fcntl_lock_state() {
648 delete fcntl_locks;
649 fcntl_locks = NULL;
650 }
651 ceph_lock_state_t *get_flock_lock_state() {
652 if (!flock_locks)
653 flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK);
654 return flock_locks;
655 }
656 void clear_flock_lock_state() {
657 delete flock_locks;
658 flock_locks = NULL;
659 }
660 void clear_file_locks() {
661 clear_fcntl_lock_state();
662 clear_flock_lock_state();
663 }
664 void _encode_file_locks(bufferlist& bl) const {
11fdf7f2 665 using ceph::encode;
7c673cae 666 bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty();
11fdf7f2 667 encode(has_fcntl_locks, bl);
7c673cae 668 if (has_fcntl_locks)
11fdf7f2 669 encode(*fcntl_locks, bl);
7c673cae 670 bool has_flock_locks = flock_locks && !flock_locks->empty();
11fdf7f2 671 encode(has_flock_locks, bl);
7c673cae 672 if (has_flock_locks)
11fdf7f2 673 encode(*flock_locks, bl);
7c673cae 674 }
11fdf7f2
TL
675 void _decode_file_locks(bufferlist::const_iterator& p) {
676 using ceph::decode;
7c673cae 677 bool has_fcntl_locks;
11fdf7f2 678 decode(has_fcntl_locks, p);
7c673cae 679 if (has_fcntl_locks)
11fdf7f2 680 decode(*get_fcntl_lock_state(), p);
7c673cae
FG
681 else
682 clear_fcntl_lock_state();
683 bool has_flock_locks;
11fdf7f2 684 decode(has_flock_locks, p);
7c673cae 685 if (has_flock_locks)
11fdf7f2 686 decode(*get_flock_lock_state(), p);
7c673cae
FG
687 else
688 clear_flock_lock_state();
689 }
690
691 // LogSegment lists i (may) belong to
692public:
693 elist<CInode*>::item item_dirty;
694 elist<CInode*>::item item_caps;
695 elist<CInode*>::item item_open_file;
696 elist<CInode*>::item item_dirty_parent;
697 elist<CInode*>::item item_dirty_dirfrag_dir;
698 elist<CInode*>::item item_dirty_dirfrag_nest;
699 elist<CInode*>::item item_dirty_dirfrag_dirfragtree;
700 elist<CInode*>::item item_scrub;
701
b32b8144
FG
702 // also update RecoveryQueue::RecoveryQueue() if you change this
703 elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
704 elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
705
7c673cae 706public:
94b18763 707 int auth_pin_freeze_allowance = 0;
7c673cae
FG
708
709 inode_load_vec_t pop;
28e407b8 710 elist<CInode*>::item item_pop_lru;
7c673cae
FG
711
712 // friends
713 friend class Server;
714 friend class Locker;
715 friend class Migrator;
716 friend class MDCache;
717 friend class StrayManager;
718 friend class CDir;
719 friend class CInodeExport;
7c673cae
FG
720
721 // ---------------------------
94b18763 722 CInode() = delete;
11fdf7f2 723 CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP);
7c673cae
FG
724 ~CInode() override {
725 close_dirfrags();
726 close_snaprealm();
727 clear_file_locks();
11fdf7f2
TL
728 ceph_assert(num_projected_xattrs == 0);
729 ceph_assert(num_projected_srnodes == 0);
730 ceph_assert(num_caps_wanted == 0);
731 ceph_assert(num_subtree_roots == 0);
732 ceph_assert(num_exporting_dirs == 0);
7c673cae
FG
733 }
734
735
736 // -- accessors --
737 bool is_root() const { return inode.ino == MDS_INO_ROOT; }
738 bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); }
739 mds_rank_t get_stray_owner() const {
740 return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino);
741 }
742 bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); }
11fdf7f2 743 bool is_base() const { return MDS_INO_IS_BASE(inode.ino); }
7c673cae
FG
744 bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; }
745 bool is_normal() const { return !(is_base() || is_system() || is_stray()); }
746
747 bool is_head() const { return last == CEPH_NOSNAP; }
748
749 // note: this overloads MDSCacheObject
750 bool is_ambiguous_auth() const {
751 return state_test(STATE_AMBIGUOUSAUTH) ||
752 MDSCacheObject::is_ambiguous_auth();
753 }
754 void set_ambiguous_auth() {
755 state_set(STATE_AMBIGUOUSAUTH);
756 }
11fdf7f2 757 void clear_ambiguous_auth(MDSContext::vec& finished);
7c673cae
FG
758 void clear_ambiguous_auth();
759
760 inodeno_t ino() const { return inode.ino; }
761 vinodeno_t vino() const { return vinodeno_t(inode.ino, last); }
762 int d_type() const { return IFTODT(inode.mode); }
763
94b18763 764 mempool_inode& get_inode() { return inode; }
f64942e4 765 const mempool_inode& get_inode() const { return inode; }
7c673cae
FG
766 CDentry* get_parent_dn() { return parent; }
767 const CDentry* get_parent_dn() const { return parent; }
7c673cae 768 CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; }
11fdf7f2
TL
769 const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; }
770 const CDentry* get_oldest_parent_dn() const {
771 if (parent)
772 return parent;
773 return !projected_parent.empty() ? projected_parent.front(): NULL;
774 }
7c673cae
FG
775 CDir *get_parent_dir();
776 const CDir *get_projected_parent_dir() const;
777 CDir *get_projected_parent_dir();
778 CInode *get_parent_inode();
779
780 bool is_lt(const MDSCacheObject *r) const override {
781 const CInode *o = static_cast<const CInode*>(r);
782 return ino() < o->ino() ||
783 (ino() == o->ino() && last < o->last);
784 }
785
786 // -- misc --
11fdf7f2
TL
787 bool is_ancestor_of(const CInode *other) const;
788 bool is_projected_ancestor_of(const CInode *other) const;
7c673cae
FG
789
790 void make_path_string(std::string& s, bool projected=false, const CDentry *use_parent=NULL) const;
791 void make_path(filepath& s, bool projected=false) const;
792 void name_stray_dentry(std::string& dname);
793
794 // -- dirtyness --
795 version_t get_version() const { return inode.version; }
796
797 version_t pre_dirty();
798 void _mark_dirty(LogSegment *ls);
799 void mark_dirty(version_t projected_dirv, LogSegment *ls);
800 void mark_clean();
801
11fdf7f2 802 void store(MDSContext *fin);
7c673cae
FG
803 void _stored(int r, version_t cv, Context *fin);
804 /**
805 * Flush a CInode to disk. This includes the backtrace, the parent
806 * directory's link, and the Inode object itself (if a base directory).
807 * @pre is_auth() on both the inode and its containing directory
808 * @pre can_auth_pin()
809 * @param fin The Context to call when the flush is completed.
810 */
11fdf7f2
TL
811 void flush(MDSContext *fin);
812 void fetch(MDSContext *fin);
7c673cae
FG
813 void _fetched(bufferlist& bl, bufferlist& bl2, Context *fin);
814
815
816 void build_backtrace(int64_t pool, inode_backtrace_t& bt);
11fdf7f2 817 void store_backtrace(MDSContext *fin, int op_prio=-1);
7c673cae
FG
818 void _stored_backtrace(int r, version_t v, Context *fin);
819 void fetch_backtrace(Context *fin, bufferlist *backtrace);
820protected:
821 /**
822 * Return the pool ID where we currently write backtraces for
823 * this inode (in addition to inode.old_pools)
824 *
825 * @returns a pool ID >=0
826 */
827 int64_t get_backtrace_pool() const;
828public:
28e407b8 829 void mark_dirty_parent(LogSegment *ls, bool dirty_pool=false);
7c673cae
FG
830 void clear_dirty_parent();
831 void verify_diri_backtrace(bufferlist &bl, int err);
832 bool is_dirty_parent() { return state_test(STATE_DIRTYPARENT); }
833 bool is_dirty_pool() { return state_test(STATE_DIRTYPOOL); }
834
835 void encode_snap_blob(bufferlist &bl);
11fdf7f2 836 void decode_snap_blob(const bufferlist &bl);
7c673cae 837 void encode_store(bufferlist& bl, uint64_t features);
11fdf7f2 838 void decode_store(bufferlist::const_iterator& bl);
7c673cae 839
b32b8144 840 void encode_replica(mds_rank_t rep, bufferlist& bl, uint64_t features, bool need_recover) {
11fdf7f2 841 ceph_assert(is_auth());
7c673cae
FG
842
843 __u32 nonce = add_replica(rep);
11fdf7f2
TL
844 using ceph::encode;
845 encode(nonce, bl);
7c673cae
FG
846
847 _encode_base(bl, features);
b32b8144 848 _encode_locks_state_for_replica(bl, need_recover);
7c673cae 849 }
11fdf7f2
TL
850 void decode_replica(bufferlist::const_iterator& p, bool is_new) {
851 using ceph::decode;
7c673cae 852 __u32 nonce;
11fdf7f2 853 decode(nonce, p);
7c673cae
FG
854 replica_nonce = nonce;
855
856 _decode_base(p);
857 _decode_locks_state(p, is_new);
858 }
859
860 // -- waiting --
861protected:
11fdf7f2 862 mempool::mds_co::compact_map<frag_t, MDSContext::vec > waiting_on_dir;
7c673cae 863public:
11fdf7f2
TL
864 void add_dir_waiter(frag_t fg, MDSContext *c);
865 void take_dir_waiting(frag_t fg, MDSContext::vec& ls);
7c673cae
FG
866 bool is_waiting_for_dir(frag_t fg) {
867 return waiting_on_dir.count(fg);
868 }
11fdf7f2
TL
869 void add_waiter(uint64_t tag, MDSContext *c) override;
870 void take_waiting(uint64_t tag, MDSContext::vec& ls) override;
7c673cae
FG
871
872 // -- encode/decode helpers --
873 void _encode_base(bufferlist& bl, uint64_t features);
11fdf7f2 874 void _decode_base(bufferlist::const_iterator& p);
7c673cae 875 void _encode_locks_full(bufferlist& bl);
11fdf7f2 876 void _decode_locks_full(bufferlist::const_iterator& p);
b32b8144 877 void _encode_locks_state_for_replica(bufferlist& bl, bool need_recover);
7c673cae 878 void _encode_locks_state_for_rejoin(bufferlist& bl, int rep);
11fdf7f2
TL
879 void _decode_locks_state(bufferlist::const_iterator& p, bool is_new);
880 void _decode_locks_rejoin(bufferlist::const_iterator& p, MDSContext::vec& waiters,
b32b8144 881 std::list<SimpleLock*>& eval_locks, bool survivor);
7c673cae
FG
882
883 // -- import/export --
884 void encode_export(bufferlist& bl);
11fdf7f2 885 void finish_export();
7c673cae
FG
886 void abort_export() {
887 put(PIN_TEMPEXPORTING);
11fdf7f2 888 ceph_assert(state_test(STATE_EXPORTINGCAPS));
7c673cae
FG
889 state_clear(STATE_EXPORTINGCAPS);
890 put(PIN_EXPORTINGCAPS);
891 }
11fdf7f2 892 void decode_import(bufferlist::const_iterator& p, LogSegment *ls);
7c673cae
FG
893
894
895 // for giving to clients
896 int encode_inodestat(bufferlist& bl, Session *session, SnapRealm *realm,
897 snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0,
898 int getattr_wants=0);
11fdf7f2 899 void encode_cap_message(const MClientCaps::ref &m, Capability *cap);
7c673cae
FG
900
901
902 // -- locks --
903public:
904 static LockType versionlock_type;
905 static LockType authlock_type;
906 static LockType linklock_type;
907 static LockType dirfragtreelock_type;
908 static LockType filelock_type;
909 static LockType xattrlock_type;
910 static LockType snaplock_type;
911 static LockType nestlock_type;
912 static LockType flocklock_type;
913 static LockType policylock_type;
914
94b18763 915 // FIXME not part of mempool
7c673cae
FG
916 LocalLock versionlock;
917 SimpleLock authlock;
918 SimpleLock linklock;
919 ScatterLock dirfragtreelock;
920 ScatterLock filelock;
921 SimpleLock xattrlock;
922 SimpleLock snaplock;
923 ScatterLock nestlock;
924 SimpleLock flocklock;
925 SimpleLock policylock;
926
927 SimpleLock* get_lock(int type) override {
928 switch (type) {
929 case CEPH_LOCK_IFILE: return &filelock;
930 case CEPH_LOCK_IAUTH: return &authlock;
931 case CEPH_LOCK_ILINK: return &linklock;
932 case CEPH_LOCK_IDFT: return &dirfragtreelock;
933 case CEPH_LOCK_IXATTR: return &xattrlock;
934 case CEPH_LOCK_ISNAP: return &snaplock;
935 case CEPH_LOCK_INEST: return &nestlock;
936 case CEPH_LOCK_IFLOCK: return &flocklock;
937 case CEPH_LOCK_IPOLICY: return &policylock;
938 }
939 return 0;
940 }
941
942 void set_object_info(MDSCacheObjectInfo &info) override;
943 void encode_lock_state(int type, bufferlist& bl) override;
11fdf7f2 944 void decode_lock_state(int type, const bufferlist& bl) override;
7c673cae
FG
945
946 void _finish_frag_update(CDir *dir, MutationRef& mut);
947
948 void clear_dirty_scattered(int type) override;
949 bool is_dirty_scattered();
950 void clear_scatter_dirty(); // on rejoin ack
951
952 void start_scatter(ScatterLock *lock);
953 void finish_scatter_update(ScatterLock *lock, CDir *dir,
954 version_t inode_version, version_t dir_accounted_version);
955 void finish_scatter_gather_update(int type);
956 void finish_scatter_gather_update_accounted(int type, MutationRef& mut, EMetaBlob *metablob);
957
958 // -- snap --
959 void open_snaprealm(bool no_split=false);
960 void close_snaprealm(bool no_join=false);
961 SnapRealm *find_snaprealm() const;
962 void encode_snap(bufferlist& bl);
11fdf7f2 963 void decode_snap(bufferlist::const_iterator& p);
7c673cae
FG
964
965 // -- caps -- (new)
966 // client caps
94b18763 967 client_t loner_cap = -1, want_loner_cap = -1;
7c673cae
FG
968
969 client_t get_loner() const { return loner_cap; }
970 client_t get_wanted_loner() const { return want_loner_cap; }
971
972 // this is the loner state our locks should aim for
973 client_t get_target_loner() const {
974 if (loner_cap == want_loner_cap)
975 return loner_cap;
976 else
977 return -1;
978 }
979
980 client_t calc_ideal_loner();
7c673cae 981 void set_loner_cap(client_t l);
b32b8144
FG
982 bool choose_ideal_loner();
983 bool try_set_loner();
7c673cae
FG
984 bool try_drop_loner();
985
986 // choose new lock state during recovery, based on issued caps
987 void choose_lock_state(SimpleLock *lock, int allissued);
988 void choose_lock_states(int dirty_caps);
989
990 int count_nonstale_caps() {
991 int n = 0;
94b18763 992 for (const auto &p : client_caps) {
11fdf7f2 993 if (!p.second.is_stale())
7c673cae 994 n++;
94b18763 995 }
7c673cae
FG
996 return n;
997 }
998 bool multiple_nonstale_caps() {
999 int n = 0;
94b18763 1000 for (const auto &p : client_caps) {
11fdf7f2 1001 if (!p.second.is_stale()) {
7c673cae
FG
1002 if (n)
1003 return true;
1004 n++;
1005 }
94b18763 1006 }
7c673cae
FG
1007 return false;
1008 }
1009
1010 bool is_any_caps() { return !client_caps.empty(); }
1011 bool is_any_nonstale_caps() { return count_nonstale_caps(); }
1012
94b18763 1013 const mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; }
11fdf7f2
TL
1014 void set_mds_caps_wanted(mempool::mds_co::compact_map<int32_t,int32_t>& m);
1015 void set_mds_caps_wanted(mds_rank_t mds, int32_t wanted);
7c673cae 1016
11fdf7f2 1017 const mempool_cap_map& get_client_caps() const { return client_caps; }
7c673cae
FG
1018 Capability *get_client_cap(client_t client) {
1019 auto client_caps_entry = client_caps.find(client);
1020 if (client_caps_entry != client_caps.end())
11fdf7f2 1021 return &client_caps_entry->second;
7c673cae
FG
1022 return 0;
1023 }
1024 int get_client_cap_pending(client_t client) const {
1025 auto client_caps_entry = client_caps.find(client);
1026 if (client_caps_entry != client_caps.end()) {
11fdf7f2 1027 return client_caps_entry->second.pending();
7c673cae
FG
1028 } else {
1029 return 0;
1030 }
1031 }
1032
11fdf7f2
TL
1033 int get_num_caps_wanted() const { return num_caps_wanted; }
1034 void adjust_num_caps_wanted(int d);
1035
7c673cae
FG
1036 Capability *add_client_cap(client_t client, Session *session, SnapRealm *conrealm=0);
1037 void remove_client_cap(client_t client);
1038 void move_to_realm(SnapRealm *realm);
1039
1040 Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session);
1041 void clear_client_caps_after_export();
1042 void export_client_caps(std::map<client_t,Capability::Export>& cl);
1043
1044 // caps allowed
1045 int get_caps_liked() const;
1046 int get_caps_allowed_ever() const;
1047 int get_caps_allowed_by_type(int type) const;
1048 int get_caps_careful() const;
1049 int get_xlocker_mask(client_t client) const;
11fdf7f2 1050 int get_caps_allowed_for_client(Session *s, Capability *cap, mempool_inode *file_i) const;
7c673cae
FG
1051
1052 // caps issued, wanted
1053 int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0,
1054 int shift = 0, int mask = -1);
1055 bool is_any_caps_wanted() const;
1056 int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = -1) const;
1057 bool issued_caps_need_gather(SimpleLock *lock);
7c673cae
FG
1058
1059 // -- authority --
1060 mds_authority_t authority() const override;
1061
1062 // -- auth pins --
91327a77 1063 bool can_auth_pin(int *err_ret=nullptr) const override;
7c673cae
FG
1064 void auth_pin(void *by) override;
1065 void auth_unpin(void *by) override;
1066
1067 // -- freeze --
1068 bool is_freezing_inode() const { return state_test(STATE_FREEZING); }
1069 bool is_frozen_inode() const { return state_test(STATE_FROZEN); }
1070 bool is_frozen_auth_pin() const { return state_test(STATE_FROZENAUTHPIN); }
1071 bool is_frozen() const override;
1072 bool is_frozen_dir() const;
1073 bool is_freezing() const override;
1074
1075 /* Freeze the inode. auth_pin_allowance lets the caller account for any
1076 * auth_pins it is itself holding/responsible for. */
1077 bool freeze_inode(int auth_pin_allowance=0);
11fdf7f2 1078 void unfreeze_inode(MDSContext::vec& finished);
7c673cae
FG
1079 void unfreeze_inode();
1080
1081 void freeze_auth_pin();
1082 void unfreeze_auth_pin();
1083
1084 // -- reference counting --
1085 void bad_put(int by) override {
1086 generic_dout(0) << " bad put " << *this << " by " << by << " " << pin_name(by) << " was " << ref
1087#ifdef MDS_REF_SET
1088 << " (" << ref_map << ")"
1089#endif
1090 << dendl;
1091#ifdef MDS_REF_SET
11fdf7f2 1092 ceph_assert(ref_map[by] > 0);
7c673cae 1093#endif
11fdf7f2 1094 ceph_assert(ref > 0);
7c673cae
FG
1095 }
1096 void bad_get(int by) override {
1097 generic_dout(0) << " bad get " << *this << " by " << by << " " << pin_name(by) << " was " << ref
1098#ifdef MDS_REF_SET
1099 << " (" << ref_map << ")"
1100#endif
1101 << dendl;
1102#ifdef MDS_REF_SET
11fdf7f2 1103 ceph_assert(ref_map[by] >= 0);
7c673cae
FG
1104#endif
1105 }
1106 void first_get() override;
1107 void last_put() override;
1108 void _put() override;
1109
1110
1111 // -- hierarchy stuff --
1112public:
1113 void set_primary_parent(CDentry *p) {
11fdf7f2
TL
1114 ceph_assert(parent == 0 ||
1115 g_conf().get_val<bool>("mds_hack_allow_loading_invalid_metadata"));
7c673cae
FG
1116 parent = p;
1117 }
1118 void remove_primary_parent(CDentry *dn) {
11fdf7f2 1119 ceph_assert(dn == parent);
7c673cae
FG
1120 parent = 0;
1121 }
1122 void add_remote_parent(CDentry *p);
1123 void remove_remote_parent(CDentry *p);
1124 int num_remote_parents() {
1125 return remote_parents.size();
1126 }
1127
1128 void push_projected_parent(CDentry *dn) {
1129 projected_parent.push_back(dn);
1130 }
1131 void pop_projected_parent() {
11fdf7f2 1132 ceph_assert(projected_parent.size());
7c673cae
FG
1133 parent = projected_parent.front();
1134 projected_parent.pop_front();
1135 }
1136
7c673cae 1137public:
31f18b77 1138 void maybe_export_pin(bool update=false);
7c673cae
FG
1139 void set_export_pin(mds_rank_t rank);
1140 mds_rank_t get_export_pin(bool inherit=true) const;
1141 bool is_exportable(mds_rank_t dest) const;
1142
1143 void print(ostream& out) override;
11fdf7f2 1144 void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
7c673cae
FG
1145
1146 /**
1147 * @defgroup Scrubbing and fsck
1148 * @{
1149 */
1150
1151 /**
1152 * Report the results of validation against a particular inode.
1153 * Each member is a pair of bools.
1154 * <member>.first represents if validation was performed against the member.
1155 * <member.second represents if the member passed validation.
1156 * performed_validation is set to true if the validation was actually
1157 * run. It might not be run if, for instance, the inode is marked as dirty.
1158 * passed_validation is set to true if everything that was checked
1159 * passed its validation.
1160 */
1161 struct validated_data {
1162 template<typename T>struct member_status {
b32b8144
FG
1163 bool checked = false;
1164 bool passed = false;
1165 bool repaired = false;
1166 int ondisk_read_retval = 0;
7c673cae
FG
1167 T ondisk_value;
1168 T memory_value;
1169 std::stringstream error_str;
7c673cae
FG
1170 };
1171
94b18763
FG
1172 bool performed_validation = false;
1173 bool passed_validation = false;
7c673cae
FG
1174
1175 struct raw_stats_t {
1176 frag_info_t dirstat;
1177 nest_info_t rstat;
1178 };
1179
1180 member_status<inode_backtrace_t> backtrace;
94b18763 1181 member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr
7c673cae
FG
1182 member_status<raw_stats_t> raw_stats;
1183
94b18763 1184 validated_data() {}
7c673cae
FG
1185
1186 void dump(Formatter *f) const;
b32b8144
FG
1187
1188 bool all_damage_repaired() const;
7c673cae
FG
1189 };
1190
1191 /**
1192 * Validate that the on-disk state of an inode matches what
1193 * we expect from our memory state. Currently this checks that:
1194 * 1) The backtrace associated with the file data exists and is correct
1195 * 2) For directories, the actual inode metadata matches our memory state,
1196 * 3) For directories, the rstats match
1197 *
1198 * @param results A freshly-created validated_data struct, with values set
1199 * as described in the struct documentation.
1200 * @param mdr The request to be responeded upon the completion of the
1201 * validation (or NULL)
1202 * @param fin Context to call back on completion (or NULL)
1203 */
1204 void validate_disk_state(validated_data *results,
11fdf7f2 1205 MDSContext *fin);
7c673cae
FG
1206 static void dump_validation_results(const validated_data& results,
1207 Formatter *f);
1208private:
1209 bool _validate_disk_state(class ValidationContinuation *c,
1210 int rval, int stage);
1211 friend class ValidationContinuation;
1212 /** @} Scrubbing and fsck */
1213};
1214
1215ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si);
1216
1217#undef dout_context
1218#endif