]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | ||
17 | #ifndef CEPH_CINODE_H | |
18 | #define CEPH_CINODE_H | |
19 | ||
94b18763 FG |
20 | #include <list> |
21 | #include <map> | |
22 | #include <set> | |
23 | #include <boost/utility/string_view.hpp> | |
24 | ||
7c673cae FG |
25 | #include "common/config.h" |
26 | #include "include/counter.h" | |
27 | #include "include/elist.h" | |
28 | #include "include/types.h" | |
29 | #include "include/lru.h" | |
30 | #include "include/compact_set.h" | |
31 | ||
32 | #include "MDSCacheObject.h" | |
33 | #include "flock.h" | |
34 | ||
35 | #include "CDentry.h" | |
36 | #include "SimpleLock.h" | |
37 | #include "ScatterLock.h" | |
38 | #include "LocalLock.h" | |
39 | #include "Capability.h" | |
40 | #include "SnapRealm.h" | |
41 | #include "Mutation.h" | |
42 | ||
7c673cae FG |
43 | #define dout_context g_ceph_context |
44 | ||
45 | class Context; | |
46 | class CDentry; | |
47 | class CDir; | |
48 | class Message; | |
49 | class CInode; | |
50 | class MDCache; | |
51 | class LogSegment; | |
52 | struct SnapRealm; | |
53 | class Session; | |
54 | class MClientCaps; | |
55 | struct ObjectOperation; | |
56 | class EMetaBlob; | |
57 | ||
58 | ||
59 | ostream& operator<<(ostream& out, const CInode& in); | |
60 | ||
61 | struct cinode_lock_info_t { | |
62 | int lock; | |
63 | int wr_caps; | |
64 | }; | |
65 | ||
66 | extern cinode_lock_info_t cinode_lock_info[]; | |
67 | extern int num_cinode_locks; | |
68 | ||
69 | ||
70 | /** | |
71 | * Base class for CInode, containing the backing store data and | |
72 | * serialization methods. This exists so that we can read and | |
73 | * handle CInodes from the backing store without hitting all | |
74 | * the business logic in CInode proper. | |
75 | */ | |
76 | class InodeStoreBase { | |
77 | public: | |
94b18763 FG |
78 | typedef inode_t<mempool::mds_co::pool_allocator> mempool_inode; |
79 | typedef old_inode_t<mempool::mds_co::pool_allocator> mempool_old_inode; | |
80 | typedef mempool::mds_co::compact_map<snapid_t, mempool_old_inode> mempool_old_inode_map; | |
81 | typedef xattr_map<mempool::mds_co::pool_allocator> mempool_xattr_map; // FIXME bufferptr not in mempool | |
82 | ||
83 | mempool_inode inode; // the inode itself | |
84 | mempool::mds_co::string symlink; // symlink dest, if symlink | |
85 | mempool_xattr_map xattrs; | |
7c673cae | 86 | fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. |
94b18763 FG |
87 | mempool_old_inode_map old_inodes; // key = last, value.first = first |
88 | snapid_t oldest_snap = CEPH_NOSNAP; | |
89 | damage_flags_t damage_flags = 0; | |
7c673cae | 90 | |
94b18763 | 91 | InodeStoreBase() {} |
7c673cae FG |
92 | |
93 | /* Helpers */ | |
94 | bool is_file() const { return inode.is_file(); } | |
95 | bool is_symlink() const { return inode.is_symlink(); } | |
96 | bool is_dir() const { return inode.is_dir(); } | |
97 | static object_t get_object_name(inodeno_t ino, frag_t fg, const char *suffix); | |
98 | ||
99 | /* Full serialization for use in ".inode" root inode objects */ | |
100 | void encode(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const; | |
101 | void decode(bufferlist::iterator &bl, bufferlist& snap_blob); | |
102 | ||
103 | /* Serialization without ENCODE_START/FINISH blocks for use embedded in dentry */ | |
104 | void encode_bare(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const; | |
105 | void decode_bare(bufferlist::iterator &bl, bufferlist &snap_blob, __u8 struct_v=5); | |
106 | ||
107 | /* For test/debug output */ | |
108 | void dump(Formatter *f) const; | |
109 | ||
110 | /* For use by offline tools */ | |
94b18763 FG |
111 | __u32 hash_dentry_name(boost::string_view dn); |
112 | frag_t pick_dirfrag(boost::string_view dn); | |
7c673cae FG |
113 | }; |
114 | ||
115 | class InodeStore : public InodeStoreBase { | |
116 | public: | |
94b18763 | 117 | // FIXME bufferlist not part of mempool |
7c673cae FG |
118 | bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't |
119 | // rehydrate it without full MDCache | |
120 | void encode(bufferlist &bl, uint64_t features) const { | |
121 | InodeStoreBase::encode(bl, features, &snap_blob); | |
122 | } | |
123 | void decode(bufferlist::iterator &bl) { | |
124 | InodeStoreBase::decode(bl, snap_blob); | |
125 | } | |
126 | void encode_bare(bufferlist &bl, uint64_t features) const { | |
127 | InodeStoreBase::encode_bare(bl, features, &snap_blob); | |
128 | } | |
129 | void decode_bare(bufferlist::iterator &bl) { | |
130 | InodeStoreBase::decode_bare(bl, snap_blob); | |
131 | } | |
132 | ||
133 | static void generate_test_instances(std::list<InodeStore*>& ls); | |
134 | }; | |
135 | WRITE_CLASS_ENCODER_FEATURES(InodeStore) | |
136 | ||
137 | // cached inode wrapper | |
138 | class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> { | |
139 | public: | |
181888fb | 140 | MEMPOOL_CLASS_HELPERS(); |
7c673cae FG |
141 | // -- pins -- |
142 | static const int PIN_DIRFRAG = -1; | |
143 | static const int PIN_CAPS = 2; // client caps | |
144 | static const int PIN_IMPORTING = -4; // importing | |
145 | static const int PIN_OPENINGDIR = 7; | |
146 | static const int PIN_REMOTEPARENT = 8; | |
147 | static const int PIN_BATCHOPENJOURNAL = 9; | |
148 | static const int PIN_SCATTERED = 10; | |
149 | static const int PIN_STICKYDIRS = 11; | |
150 | //static const int PIN_PURGING = -12; | |
151 | static const int PIN_FREEZING = 13; | |
152 | static const int PIN_FROZEN = 14; | |
153 | static const int PIN_IMPORTINGCAPS = -15; | |
154 | static const int PIN_PASTSNAPPARENT = -16; | |
155 | static const int PIN_OPENINGSNAPPARENTS = 17; | |
156 | static const int PIN_TRUNCATING = 18; | |
157 | static const int PIN_STRAY = 19; // we pin our stray inode while active | |
158 | static const int PIN_NEEDSNAPFLUSH = 20; | |
159 | static const int PIN_DIRTYRSTAT = 21; | |
160 | static const int PIN_EXPORTINGCAPS = 22; | |
161 | static const int PIN_DIRTYPARENT = 23; | |
162 | static const int PIN_DIRWAITER = 24; | |
163 | static const int PIN_SCRUBQUEUE = 25; | |
164 | ||
165 | const char *pin_name(int p) const override { | |
166 | switch (p) { | |
167 | case PIN_DIRFRAG: return "dirfrag"; | |
168 | case PIN_CAPS: return "caps"; | |
169 | case PIN_IMPORTING: return "importing"; | |
170 | case PIN_OPENINGDIR: return "openingdir"; | |
171 | case PIN_REMOTEPARENT: return "remoteparent"; | |
172 | case PIN_BATCHOPENJOURNAL: return "batchopenjournal"; | |
173 | case PIN_SCATTERED: return "scattered"; | |
174 | case PIN_STICKYDIRS: return "stickydirs"; | |
175 | //case PIN_PURGING: return "purging"; | |
176 | case PIN_FREEZING: return "freezing"; | |
177 | case PIN_FROZEN: return "frozen"; | |
178 | case PIN_IMPORTINGCAPS: return "importingcaps"; | |
179 | case PIN_EXPORTINGCAPS: return "exportingcaps"; | |
180 | case PIN_PASTSNAPPARENT: return "pastsnapparent"; | |
181 | case PIN_OPENINGSNAPPARENTS: return "openingsnapparents"; | |
182 | case PIN_TRUNCATING: return "truncating"; | |
183 | case PIN_STRAY: return "stray"; | |
184 | case PIN_NEEDSNAPFLUSH: return "needsnapflush"; | |
185 | case PIN_DIRTYRSTAT: return "dirtyrstat"; | |
186 | case PIN_DIRTYPARENT: return "dirtyparent"; | |
187 | case PIN_DIRWAITER: return "dirwaiter"; | |
188 | case PIN_SCRUBQUEUE: return "scrubqueue"; | |
189 | default: return generic_pin_name(p); | |
190 | } | |
191 | } | |
192 | ||
193 | // -- state -- | |
194 | static const int STATE_EXPORTING = (1<<2); // on nonauth bystander. | |
195 | static const int STATE_OPENINGDIR = (1<<5); | |
196 | static const int STATE_FREEZING = (1<<7); | |
197 | static const int STATE_FROZEN = (1<<8); | |
198 | static const int STATE_AMBIGUOUSAUTH = (1<<9); | |
199 | static const int STATE_EXPORTINGCAPS = (1<<10); | |
200 | static const int STATE_NEEDSRECOVER = (1<<11); | |
201 | static const int STATE_RECOVERING = (1<<12); | |
202 | static const int STATE_PURGING = (1<<13); | |
203 | static const int STATE_DIRTYPARENT = (1<<14); | |
204 | static const int STATE_DIRTYRSTAT = (1<<15); | |
205 | static const int STATE_STRAYPINNED = (1<<16); | |
206 | static const int STATE_FROZENAUTHPIN = (1<<17); | |
207 | static const int STATE_DIRTYPOOL = (1<<18); | |
208 | static const int STATE_REPAIRSTATS = (1<<19); | |
209 | static const int STATE_MISSINGOBJS = (1<<20); | |
210 | static const int STATE_EVALSTALECAPS = (1<<21); | |
31f18b77 | 211 | static const int STATE_QUEUEDEXPORTPIN = (1<<22); |
7c673cae FG |
212 | // orphan inode needs notification of releasing reference |
213 | static const int STATE_ORPHAN = STATE_NOTIFYREF; | |
214 | ||
215 | static const int MASK_STATE_EXPORTED = | |
216 | (STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL); | |
217 | static const int MASK_STATE_EXPORT_KEPT = | |
3efd9988 | 218 | (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS|STATE_QUEUEDEXPORTPIN); |
7c673cae FG |
219 | |
220 | // -- waiters -- | |
221 | static const uint64_t WAIT_DIR = (1<<0); | |
222 | static const uint64_t WAIT_FROZEN = (1<<1); | |
223 | static const uint64_t WAIT_TRUNC = (1<<2); | |
224 | static const uint64_t WAIT_FLOCK = (1<<3); | |
225 | ||
226 | static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1); | |
227 | ||
228 | // misc | |
229 | static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export | |
230 | ||
231 | ostream& print_db_line_prefix(ostream& out) override; | |
232 | ||
233 | public: | |
234 | MDCache *mdcache; | |
235 | ||
94b18763 FG |
236 | SnapRealm *snaprealm = nullptr; |
237 | SnapRealm *containing_realm = nullptr; | |
7c673cae | 238 | snapid_t first, last; |
94b18763 | 239 | mempool::mds_co::compact_set<snapid_t> dirty_old_rstats; |
7c673cae FG |
240 | |
241 | class scrub_stamp_info_t { | |
242 | public: | |
243 | /// version we started our latest scrub (whether in-progress or finished) | |
94b18763 | 244 | version_t scrub_start_version = 0; |
7c673cae FG |
245 | /// time we started our latest scrub (whether in-progress or finished) |
246 | utime_t scrub_start_stamp; | |
247 | /// version we started our most recent finished scrub | |
94b18763 | 248 | version_t last_scrub_version = 0; |
7c673cae FG |
249 | /// time we started our most recent finished scrub |
250 | utime_t last_scrub_stamp; | |
94b18763 | 251 | scrub_stamp_info_t() {} |
7c673cae | 252 | void reset() { |
b32b8144 FG |
253 | scrub_start_version = last_scrub_version = 0; |
254 | scrub_start_stamp = last_scrub_stamp = utime_t(); | |
7c673cae FG |
255 | } |
256 | }; | |
257 | ||
258 | class scrub_info_t : public scrub_stamp_info_t { | |
259 | public: | |
94b18763 FG |
260 | CDentry *scrub_parent = nullptr; |
261 | MDSInternalContextBase *on_finish = nullptr; | |
7c673cae | 262 | |
94b18763 FG |
263 | bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state? |
264 | bool scrub_in_progress = false; /// are we currently scrubbing? | |
265 | bool children_scrubbed = false; | |
7c673cae FG |
266 | |
267 | /// my own (temporary) stamps and versions for each dirfrag we have | |
94b18763 | 268 | std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool |
7c673cae | 269 | |
b32b8144 | 270 | ScrubHeaderRef header; |
7c673cae | 271 | |
94b18763 | 272 | scrub_info_t() {} |
7c673cae FG |
273 | }; |
274 | ||
275 | const scrub_info_t *scrub_info() const{ | |
276 | if (!scrub_infop) | |
277 | scrub_info_create(); | |
278 | return scrub_infop; | |
279 | } | |
280 | ||
b32b8144 FG |
281 | ScrubHeaderRef get_scrub_header() { |
282 | if (scrub_infop == nullptr) { | |
283 | return nullptr; | |
284 | } else { | |
285 | return scrub_infop->header; | |
286 | } | |
287 | } | |
288 | ||
7c673cae FG |
289 | bool scrub_is_in_progress() const { |
290 | return (scrub_infop && scrub_infop->scrub_in_progress); | |
291 | } | |
292 | /** | |
293 | * Start scrubbing on this inode. That could be very short if it's | |
294 | * a file, or take a long time if we're recursively scrubbing a directory. | |
295 | * @pre It is not currently scrubbing | |
296 | * @post it has set up internal scrubbing state | |
297 | * @param scrub_version What version are we scrubbing at (usually, parent | |
298 | * directory's get_projected_version()) | |
299 | */ | |
300 | void scrub_initialize(CDentry *scrub_parent, | |
b32b8144 | 301 | ScrubHeaderRef& header, |
7c673cae FG |
302 | MDSInternalContextBase *f); |
303 | /** | |
304 | * Get the next dirfrag to scrub. Gives you a frag_t in output param which | |
305 | * you must convert to a CDir (and possibly load off disk). | |
306 | * @param dir A pointer to frag_t, will be filled in with the next dirfrag to | |
307 | * scrub if there is one. | |
308 | * @returns 0 on success, you should scrub the passed-out frag_t right now; | |
309 | * ENOENT: There are no remaining dirfrags to scrub | |
310 | * <0 There was some other error (It will return -ENOTDIR if not a directory) | |
311 | */ | |
312 | int scrub_dirfrag_next(frag_t* out_dirfrag); | |
313 | /** | |
314 | * Get the currently scrubbing dirfrags. When returned, the | |
315 | * passed-in list will be filled in with all frag_ts which have | |
316 | * been returned from scrub_dirfrag_next but not sent back | |
317 | * via scrub_dirfrag_finished. | |
318 | */ | |
319 | void scrub_dirfrags_scrubbing(list<frag_t> *out_dirfrags); | |
320 | /** | |
321 | * Report to the CInode that a dirfrag it owns has been scrubbed. Call | |
322 | * this for every frag_t returned from scrub_dirfrag_next(). | |
323 | * @param dirfrag The frag_t that was scrubbed | |
324 | */ | |
325 | void scrub_dirfrag_finished(frag_t dirfrag); | |
326 | /** | |
327 | * Call this once the scrub has been completed, whether it's a full | |
328 | * recursive scrub on a directory or simply the data on a file (or | |
329 | * anything in between). | |
330 | * @param c An out param which is filled in with a Context* that must | |
331 | * be complete()ed. | |
332 | */ | |
333 | void scrub_finished(MDSInternalContextBase **c); | |
334 | /** | |
335 | * Report to the CInode that alldirfrags it owns have been scrubbed. | |
336 | */ | |
337 | void scrub_children_finished() { | |
338 | scrub_infop->children_scrubbed = true; | |
339 | } | |
340 | void scrub_set_finisher(MDSInternalContextBase *c) { | |
341 | assert(!scrub_infop->on_finish); | |
342 | scrub_infop->on_finish = c; | |
343 | } | |
344 | ||
345 | private: | |
346 | /** | |
347 | * Create a scrub_info_t struct for the scrub_infop poitner. | |
348 | */ | |
349 | void scrub_info_create() const; | |
350 | /** | |
351 | * Delete the scrub_info_t struct if it's not got any useful data | |
352 | */ | |
353 | void scrub_maybe_delete_info(); | |
354 | public: | |
355 | ||
356 | bool is_multiversion() const { | |
357 | return snaprealm || // other snaprealms will link to me | |
358 | inode.is_dir() || // links to me in other snaps | |
359 | inode.nlink > 1 || // there are remote links, possibly snapped, that will need to find me | |
360 | !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out. | |
361 | } | |
362 | snapid_t get_oldest_snap(); | |
363 | ||
94b18763 | 364 | uint64_t last_journaled = 0; // log offset for the last time i was journaled |
7c673cae FG |
365 | //loff_t last_open_journaled; // log offset for the last journaled EOpen |
366 | utime_t last_dirstat_prop; | |
367 | ||
368 | ||
369 | // list item node for when we have unpropagated rstat data | |
370 | elist<CInode*>::item dirty_rstat_item; | |
371 | ||
372 | bool is_dirty_rstat() { | |
373 | return state_test(STATE_DIRTYRSTAT); | |
374 | } | |
375 | void mark_dirty_rstat(); | |
376 | void clear_dirty_rstat(); | |
377 | ||
94b18763 | 378 | //bool hack_accessed = false; |
7c673cae FG |
379 | //utime_t hack_load_stamp; |
380 | ||
381 | /** | |
382 | * Projection methods, used to store inode changes until they have been journaled, | |
383 | * at which point they are popped. | |
384 | * Usage: | |
94b18763 FG |
385 | * project_inode as needed. If you're changing xattrs or sr_t, then pass true |
386 | * as needed then change the xattrs/snapnode member as needed. (Dirty | |
387 | * exception: project_past_snaprealm_parent allows you to project the | |
388 | * snapnode after doing project_inode (i.e. you don't need to pass | |
389 | * snap=true). | |
7c673cae FG |
390 | * |
391 | * Then, journal. Once journaling is done, pop_and_dirty_projected_inode. | |
392 | * This function will take care of the inode itself, the xattrs, and the snaprealm. | |
393 | */ | |
394 | ||
94b18763 FG |
395 | class projected_inode { |
396 | public: | |
397 | mempool_inode inode; | |
398 | std::unique_ptr<mempool_xattr_map> xattrs; | |
399 | std::unique_ptr<sr_t> snapnode; | |
400 | ||
401 | projected_inode() = delete; | |
402 | projected_inode(const mempool_inode &in) : inode(in) {} | |
7c673cae | 403 | }; |
94b18763 FG |
404 | |
405 | private: | |
406 | mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty) | |
407 | size_t num_projected_xattrs = 0; | |
408 | size_t num_projected_srnodes = 0; | |
409 | ||
410 | sr_t &project_snaprealm(projected_inode &pi); | |
411 | public: | |
412 | CInode::projected_inode &project_inode(bool xattr = false, bool snap = false); | |
7c673cae FG |
413 | void pop_and_dirty_projected_inode(LogSegment *ls); |
414 | ||
94b18763 | 415 | projected_inode *get_projected_node() { |
7c673cae FG |
416 | if (projected_nodes.empty()) |
417 | return NULL; | |
418 | else | |
94b18763 | 419 | return &projected_nodes.back(); |
7c673cae FG |
420 | } |
421 | ||
422 | version_t get_projected_version() const { | |
423 | if (projected_nodes.empty()) | |
424 | return inode.version; | |
425 | else | |
94b18763 | 426 | return projected_nodes.back().inode.version; |
7c673cae FG |
427 | } |
428 | bool is_projected() const { | |
429 | return !projected_nodes.empty(); | |
430 | } | |
431 | ||
94b18763 | 432 | const mempool_inode *get_projected_inode() const { |
7c673cae FG |
433 | if (projected_nodes.empty()) |
434 | return &inode; | |
435 | else | |
94b18763 | 436 | return &projected_nodes.back().inode; |
7c673cae | 437 | } |
94b18763 | 438 | mempool_inode *get_projected_inode() { |
7c673cae FG |
439 | if (projected_nodes.empty()) |
440 | return &inode; | |
441 | else | |
94b18763 | 442 | return &projected_nodes.back().inode; |
7c673cae | 443 | } |
94b18763 | 444 | mempool_inode *get_previous_projected_inode() { |
7c673cae | 445 | assert(!projected_nodes.empty()); |
94b18763 FG |
446 | auto it = projected_nodes.rbegin(); |
447 | ++it; | |
448 | if (it != projected_nodes.rend()) | |
449 | return &it->inode; | |
7c673cae FG |
450 | else |
451 | return &inode; | |
452 | } | |
453 | ||
94b18763 | 454 | mempool_xattr_map *get_projected_xattrs() { |
7c673cae | 455 | if (num_projected_xattrs > 0) { |
94b18763 FG |
456 | for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) |
457 | if (it->xattrs) | |
458 | return it->xattrs.get(); | |
7c673cae FG |
459 | } |
460 | return &xattrs; | |
461 | } | |
94b18763 FG |
462 | mempool_xattr_map *get_previous_projected_xattrs() { |
463 | if (num_projected_xattrs > 0) { | |
464 | for (auto it = ++projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) | |
465 | if (it->xattrs) | |
466 | return it->xattrs.get(); | |
467 | } | |
7c673cae FG |
468 | return &xattrs; |
469 | } | |
470 | ||
7c673cae FG |
471 | const sr_t *get_projected_srnode() const { |
472 | if (num_projected_srnodes > 0) { | |
94b18763 FG |
473 | for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) |
474 | if (it->snapnode) | |
475 | return it->snapnode.get(); | |
7c673cae FG |
476 | } |
477 | if (snaprealm) | |
478 | return &snaprealm->srnode; | |
479 | else | |
480 | return NULL; | |
481 | } | |
482 | sr_t *get_projected_srnode() { | |
483 | if (num_projected_srnodes > 0) { | |
94b18763 FG |
484 | for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) |
485 | if (it->snapnode) | |
486 | return it->snapnode.get(); | |
7c673cae FG |
487 | } |
488 | if (snaprealm) | |
489 | return &snaprealm->srnode; | |
490 | else | |
491 | return NULL; | |
492 | } | |
493 | void project_past_snaprealm_parent(SnapRealm *newparent); | |
494 | ||
495 | private: | |
496 | void pop_projected_snaprealm(sr_t *next_snaprealm); | |
497 | ||
498 | public: | |
94b18763 | 499 | mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head); |
7c673cae | 500 | void split_old_inode(snapid_t snap); |
94b18763 | 501 | mempool_old_inode *pick_old_inode(snapid_t last); |
7c673cae FG |
502 | void pre_cow_old_inode(); |
503 | void purge_stale_snap_data(const std::set<snapid_t>& snaps); | |
504 | ||
505 | // -- cache infrastructure -- | |
506 | private: | |
94b18763 FG |
507 | mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode |
508 | int stickydir_ref = 0; | |
509 | scrub_info_t *scrub_infop = nullptr; | |
7c673cae FG |
510 | |
511 | public: | |
512 | bool has_dirfrags() { return !dirfrags.empty(); } | |
513 | CDir* get_dirfrag(frag_t fg) { | |
514 | if (dirfrags.count(fg)) { | |
515 | //assert(g_conf->debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME | |
516 | return dirfrags[fg]; | |
517 | } else | |
518 | return NULL; | |
519 | } | |
520 | bool get_dirfrags_under(frag_t fg, std::list<CDir*>& ls); | |
521 | CDir* get_approx_dirfrag(frag_t fg); | |
522 | void get_dirfrags(std::list<CDir*>& ls); | |
523 | void get_nested_dirfrags(std::list<CDir*>& ls); | |
524 | void get_subtree_dirfrags(std::list<CDir*>& ls); | |
525 | CDir *get_or_open_dirfrag(MDCache *mdcache, frag_t fg); | |
526 | CDir *add_dirfrag(CDir *dir); | |
527 | void close_dirfrag(frag_t fg); | |
528 | void close_dirfrags(); | |
529 | bool has_subtree_root_dirfrag(int auth=-1); | |
530 | bool has_subtree_or_exporting_dirfrag(); | |
531 | ||
532 | void force_dirfrags(); | |
533 | void verify_dirfrags(); | |
534 | ||
535 | void get_stickydirs(); | |
536 | void put_stickydirs(); | |
537 | ||
538 | protected: | |
539 | // parent dentries in cache | |
94b18763 FG |
540 | CDentry *parent = nullptr; // primary link |
541 | mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked | |
7c673cae | 542 | |
94b18763 | 543 | mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc. |
7c673cae | 544 | |
94b18763 | 545 | mds_authority_t inode_auth = CDIR_AUTH_DEFAULT; |
7c673cae FG |
546 | |
547 | // -- distributed state -- | |
548 | protected: | |
549 | // file capabilities | |
94b18763 FG |
550 | using cap_map = mempool::mds_co::map<client_t, Capability*>; |
551 | cap_map client_caps; // client -> caps | |
552 | mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted | |
553 | int replica_caps_wanted = 0; // [replica] what i've requested from auth | |
7c673cae FG |
554 | |
555 | public: | |
94b18763 FG |
556 | mempool::mds_co::compact_map<int, mempool::mds_co::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head |
557 | mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush; | |
7c673cae FG |
558 | |
559 | void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); | |
560 | void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); | |
561 | bool split_need_snapflush(CInode *cowin, CInode *in); | |
562 | ||
563 | protected: | |
564 | ||
94b18763 FG |
565 | ceph_lock_state_t *fcntl_locks = nullptr; |
566 | ceph_lock_state_t *flock_locks = nullptr; | |
7c673cae FG |
567 | |
568 | ceph_lock_state_t *get_fcntl_lock_state() { | |
569 | if (!fcntl_locks) | |
570 | fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL); | |
571 | return fcntl_locks; | |
572 | } | |
573 | void clear_fcntl_lock_state() { | |
574 | delete fcntl_locks; | |
575 | fcntl_locks = NULL; | |
576 | } | |
577 | ceph_lock_state_t *get_flock_lock_state() { | |
578 | if (!flock_locks) | |
579 | flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK); | |
580 | return flock_locks; | |
581 | } | |
582 | void clear_flock_lock_state() { | |
583 | delete flock_locks; | |
584 | flock_locks = NULL; | |
585 | } | |
586 | void clear_file_locks() { | |
587 | clear_fcntl_lock_state(); | |
588 | clear_flock_lock_state(); | |
589 | } | |
590 | void _encode_file_locks(bufferlist& bl) const { | |
591 | bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty(); | |
592 | ::encode(has_fcntl_locks, bl); | |
593 | if (has_fcntl_locks) | |
594 | ::encode(*fcntl_locks, bl); | |
595 | bool has_flock_locks = flock_locks && !flock_locks->empty(); | |
596 | ::encode(has_flock_locks, bl); | |
597 | if (has_flock_locks) | |
598 | ::encode(*flock_locks, bl); | |
599 | } | |
600 | void _decode_file_locks(bufferlist::iterator& p) { | |
601 | bool has_fcntl_locks; | |
602 | ::decode(has_fcntl_locks, p); | |
603 | if (has_fcntl_locks) | |
604 | ::decode(*get_fcntl_lock_state(), p); | |
605 | else | |
606 | clear_fcntl_lock_state(); | |
607 | bool has_flock_locks; | |
608 | ::decode(has_flock_locks, p); | |
609 | if (has_flock_locks) | |
610 | ::decode(*get_flock_lock_state(), p); | |
611 | else | |
612 | clear_flock_lock_state(); | |
613 | } | |
614 | ||
615 | // LogSegment lists i (may) belong to | |
616 | public: | |
617 | elist<CInode*>::item item_dirty; | |
618 | elist<CInode*>::item item_caps; | |
619 | elist<CInode*>::item item_open_file; | |
620 | elist<CInode*>::item item_dirty_parent; | |
621 | elist<CInode*>::item item_dirty_dirfrag_dir; | |
622 | elist<CInode*>::item item_dirty_dirfrag_nest; | |
623 | elist<CInode*>::item item_dirty_dirfrag_dirfragtree; | |
624 | elist<CInode*>::item item_scrub; | |
625 | ||
b32b8144 FG |
626 | // also update RecoveryQueue::RecoveryQueue() if you change this |
627 | elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir; | |
628 | elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest; | |
629 | ||
7c673cae | 630 | public: |
94b18763 | 631 | int auth_pin_freeze_allowance = 0; |
7c673cae FG |
632 | |
633 | inode_load_vec_t pop; | |
28e407b8 | 634 | elist<CInode*>::item item_pop_lru; |
7c673cae FG |
635 | |
636 | // friends | |
637 | friend class Server; | |
638 | friend class Locker; | |
639 | friend class Migrator; | |
640 | friend class MDCache; | |
641 | friend class StrayManager; | |
642 | friend class CDir; | |
643 | friend class CInodeExport; | |
7c673cae FG |
644 | |
645 | // --------------------------- | |
94b18763 | 646 | CInode() = delete; |
7c673cae FG |
647 | CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP) : |
648 | mdcache(c), | |
7c673cae | 649 | first(f), last(l), |
94b18763 FG |
650 | item_dirty(this), |
651 | item_caps(this), | |
652 | item_open_file(this), | |
653 | item_dirty_parent(this), | |
7c673cae FG |
654 | item_dirty_dirfrag_dir(this), |
655 | item_dirty_dirfrag_nest(this), | |
656 | item_dirty_dirfrag_dirfragtree(this), | |
7c673cae FG |
657 | pop(ceph_clock_now()), |
658 | versionlock(this, &versionlock_type), | |
659 | authlock(this, &authlock_type), | |
660 | linklock(this, &linklock_type), | |
661 | dirfragtreelock(this, &dirfragtreelock_type), | |
662 | filelock(this, &filelock_type), | |
663 | xattrlock(this, &xattrlock_type), | |
664 | snaplock(this, &snaplock_type), | |
665 | nestlock(this, &nestlock_type), | |
666 | flocklock(this, &flocklock_type), | |
94b18763 | 667 | policylock(this, &policylock_type) |
7c673cae | 668 | { |
7c673cae FG |
669 | if (auth) state_set(STATE_AUTH); |
670 | } | |
671 | ~CInode() override { | |
672 | close_dirfrags(); | |
673 | close_snaprealm(); | |
674 | clear_file_locks(); | |
675 | assert(num_projected_xattrs == 0); | |
676 | assert(num_projected_srnodes == 0); | |
677 | } | |
678 | ||
679 | ||
680 | // -- accessors -- | |
681 | bool is_root() const { return inode.ino == MDS_INO_ROOT; } | |
682 | bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); } | |
683 | mds_rank_t get_stray_owner() const { | |
684 | return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino); | |
685 | } | |
686 | bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); } | |
687 | bool is_base() const { return is_root() || is_mdsdir(); } | |
688 | bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; } | |
689 | bool is_normal() const { return !(is_base() || is_system() || is_stray()); } | |
690 | ||
691 | bool is_head() const { return last == CEPH_NOSNAP; } | |
692 | ||
693 | // note: this overloads MDSCacheObject | |
694 | bool is_ambiguous_auth() const { | |
695 | return state_test(STATE_AMBIGUOUSAUTH) || | |
696 | MDSCacheObject::is_ambiguous_auth(); | |
697 | } | |
698 | void set_ambiguous_auth() { | |
699 | state_set(STATE_AMBIGUOUSAUTH); | |
700 | } | |
701 | void clear_ambiguous_auth(std::list<MDSInternalContextBase*>& finished); | |
702 | void clear_ambiguous_auth(); | |
703 | ||
704 | inodeno_t ino() const { return inode.ino; } | |
705 | vinodeno_t vino() const { return vinodeno_t(inode.ino, last); } | |
706 | int d_type() const { return IFTODT(inode.mode); } | |
707 | ||
94b18763 | 708 | mempool_inode& get_inode() { return inode; } |
7c673cae FG |
709 | CDentry* get_parent_dn() { return parent; } |
710 | const CDentry* get_parent_dn() const { return parent; } | |
711 | const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; } | |
712 | CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; } | |
713 | CDir *get_parent_dir(); | |
714 | const CDir *get_projected_parent_dir() const; | |
715 | CDir *get_projected_parent_dir(); | |
716 | CInode *get_parent_inode(); | |
717 | ||
718 | bool is_lt(const MDSCacheObject *r) const override { | |
719 | const CInode *o = static_cast<const CInode*>(r); | |
720 | return ino() < o->ino() || | |
721 | (ino() == o->ino() && last < o->last); | |
722 | } | |
723 | ||
724 | // -- misc -- | |
725 | bool is_projected_ancestor_of(CInode *other); | |
726 | ||
727 | void make_path_string(std::string& s, bool projected=false, const CDentry *use_parent=NULL) const; | |
728 | void make_path(filepath& s, bool projected=false) const; | |
729 | void name_stray_dentry(std::string& dname); | |
730 | ||
731 | // -- dirtyness -- | |
732 | version_t get_version() const { return inode.version; } | |
733 | ||
734 | version_t pre_dirty(); | |
735 | void _mark_dirty(LogSegment *ls); | |
736 | void mark_dirty(version_t projected_dirv, LogSegment *ls); | |
737 | void mark_clean(); | |
738 | ||
739 | void store(MDSInternalContextBase *fin); | |
740 | void _stored(int r, version_t cv, Context *fin); | |
741 | /** | |
742 | * Flush a CInode to disk. This includes the backtrace, the parent | |
743 | * directory's link, and the Inode object itself (if a base directory). | |
744 | * @pre is_auth() on both the inode and its containing directory | |
745 | * @pre can_auth_pin() | |
746 | * @param fin The Context to call when the flush is completed. | |
747 | */ | |
748 | void flush(MDSInternalContextBase *fin); | |
749 | void fetch(MDSInternalContextBase *fin); | |
750 | void _fetched(bufferlist& bl, bufferlist& bl2, Context *fin); | |
751 | ||
752 | ||
753 | void build_backtrace(int64_t pool, inode_backtrace_t& bt); | |
754 | void store_backtrace(MDSInternalContextBase *fin, int op_prio=-1); | |
755 | void _stored_backtrace(int r, version_t v, Context *fin); | |
756 | void fetch_backtrace(Context *fin, bufferlist *backtrace); | |
757 | protected: | |
758 | /** | |
759 | * Return the pool ID where we currently write backtraces for | |
760 | * this inode (in addition to inode.old_pools) | |
761 | * | |
762 | * @returns a pool ID >=0 | |
763 | */ | |
764 | int64_t get_backtrace_pool() const; | |
765 | public: | |
28e407b8 | 766 | void mark_dirty_parent(LogSegment *ls, bool dirty_pool=false); |
7c673cae FG |
767 | void clear_dirty_parent(); |
768 | void verify_diri_backtrace(bufferlist &bl, int err); | |
769 | bool is_dirty_parent() { return state_test(STATE_DIRTYPARENT); } | |
770 | bool is_dirty_pool() { return state_test(STATE_DIRTYPOOL); } | |
771 | ||
772 | void encode_snap_blob(bufferlist &bl); | |
773 | void decode_snap_blob(bufferlist &bl); | |
774 | void encode_store(bufferlist& bl, uint64_t features); | |
775 | void decode_store(bufferlist::iterator& bl); | |
776 | ||
b32b8144 | 777 | void encode_replica(mds_rank_t rep, bufferlist& bl, uint64_t features, bool need_recover) { |
7c673cae FG |
778 | assert(is_auth()); |
779 | ||
780 | // relax locks? | |
781 | if (!is_replicated()) | |
782 | replicate_relax_locks(); | |
783 | ||
784 | __u32 nonce = add_replica(rep); | |
785 | ::encode(nonce, bl); | |
786 | ||
787 | _encode_base(bl, features); | |
b32b8144 | 788 | _encode_locks_state_for_replica(bl, need_recover); |
7c673cae FG |
789 | } |
790 | void decode_replica(bufferlist::iterator& p, bool is_new) { | |
791 | __u32 nonce; | |
792 | ::decode(nonce, p); | |
793 | replica_nonce = nonce; | |
794 | ||
795 | _decode_base(p); | |
796 | _decode_locks_state(p, is_new); | |
797 | } | |
798 | ||
799 | // -- waiting -- | |
800 | protected: | |
94b18763 | 801 | mempool::mds_co::compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir; |
7c673cae FG |
802 | public: |
803 | void add_dir_waiter(frag_t fg, MDSInternalContextBase *c); | |
804 | void take_dir_waiting(frag_t fg, std::list<MDSInternalContextBase*>& ls); | |
805 | bool is_waiting_for_dir(frag_t fg) { | |
806 | return waiting_on_dir.count(fg); | |
807 | } | |
808 | void add_waiter(uint64_t tag, MDSInternalContextBase *c) override; | |
809 | void take_waiting(uint64_t tag, std::list<MDSInternalContextBase*>& ls) override; | |
810 | ||
811 | // -- encode/decode helpers -- | |
812 | void _encode_base(bufferlist& bl, uint64_t features); | |
813 | void _decode_base(bufferlist::iterator& p); | |
814 | void _encode_locks_full(bufferlist& bl); | |
815 | void _decode_locks_full(bufferlist::iterator& p); | |
b32b8144 | 816 | void _encode_locks_state_for_replica(bufferlist& bl, bool need_recover); |
7c673cae FG |
817 | void _encode_locks_state_for_rejoin(bufferlist& bl, int rep); |
818 | void _decode_locks_state(bufferlist::iterator& p, bool is_new); | |
819 | void _decode_locks_rejoin(bufferlist::iterator& p, std::list<MDSInternalContextBase*>& waiters, | |
b32b8144 | 820 | std::list<SimpleLock*>& eval_locks, bool survivor); |
7c673cae FG |
821 | |
822 | // -- import/export -- | |
823 | void encode_export(bufferlist& bl); | |
824 | void finish_export(utime_t now); | |
825 | void abort_export() { | |
826 | put(PIN_TEMPEXPORTING); | |
827 | assert(state_test(STATE_EXPORTINGCAPS)); | |
828 | state_clear(STATE_EXPORTINGCAPS); | |
829 | put(PIN_EXPORTINGCAPS); | |
830 | } | |
831 | void decode_import(bufferlist::iterator& p, LogSegment *ls); | |
832 | ||
833 | ||
834 | // for giving to clients | |
835 | int encode_inodestat(bufferlist& bl, Session *session, SnapRealm *realm, | |
836 | snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0, | |
837 | int getattr_wants=0); | |
838 | void encode_cap_message(MClientCaps *m, Capability *cap); | |
839 | ||
840 | ||
841 | // -- locks -- | |
842 | public: | |
843 | static LockType versionlock_type; | |
844 | static LockType authlock_type; | |
845 | static LockType linklock_type; | |
846 | static LockType dirfragtreelock_type; | |
847 | static LockType filelock_type; | |
848 | static LockType xattrlock_type; | |
849 | static LockType snaplock_type; | |
850 | static LockType nestlock_type; | |
851 | static LockType flocklock_type; | |
852 | static LockType policylock_type; | |
853 | ||
94b18763 | 854 | // FIXME not part of mempool |
7c673cae FG |
855 | LocalLock versionlock; |
856 | SimpleLock authlock; | |
857 | SimpleLock linklock; | |
858 | ScatterLock dirfragtreelock; | |
859 | ScatterLock filelock; | |
860 | SimpleLock xattrlock; | |
861 | SimpleLock snaplock; | |
862 | ScatterLock nestlock; | |
863 | SimpleLock flocklock; | |
864 | SimpleLock policylock; | |
865 | ||
866 | SimpleLock* get_lock(int type) override { | |
867 | switch (type) { | |
868 | case CEPH_LOCK_IFILE: return &filelock; | |
869 | case CEPH_LOCK_IAUTH: return &authlock; | |
870 | case CEPH_LOCK_ILINK: return &linklock; | |
871 | case CEPH_LOCK_IDFT: return &dirfragtreelock; | |
872 | case CEPH_LOCK_IXATTR: return &xattrlock; | |
873 | case CEPH_LOCK_ISNAP: return &snaplock; | |
874 | case CEPH_LOCK_INEST: return &nestlock; | |
875 | case CEPH_LOCK_IFLOCK: return &flocklock; | |
876 | case CEPH_LOCK_IPOLICY: return &policylock; | |
877 | } | |
878 | return 0; | |
879 | } | |
880 | ||
881 | void set_object_info(MDSCacheObjectInfo &info) override; | |
882 | void encode_lock_state(int type, bufferlist& bl) override; | |
883 | void decode_lock_state(int type, bufferlist& bl) override; | |
884 | ||
885 | void _finish_frag_update(CDir *dir, MutationRef& mut); | |
886 | ||
887 | void clear_dirty_scattered(int type) override; | |
888 | bool is_dirty_scattered(); | |
889 | void clear_scatter_dirty(); // on rejoin ack | |
890 | ||
891 | void start_scatter(ScatterLock *lock); | |
892 | void finish_scatter_update(ScatterLock *lock, CDir *dir, | |
893 | version_t inode_version, version_t dir_accounted_version); | |
894 | void finish_scatter_gather_update(int type); | |
895 | void finish_scatter_gather_update_accounted(int type, MutationRef& mut, EMetaBlob *metablob); | |
896 | ||
897 | // -- snap -- | |
898 | void open_snaprealm(bool no_split=false); | |
899 | void close_snaprealm(bool no_join=false); | |
900 | SnapRealm *find_snaprealm() const; | |
901 | void encode_snap(bufferlist& bl); | |
902 | void decode_snap(bufferlist::iterator& p); | |
903 | ||
904 | // -- caps -- (new) | |
905 | // client caps | |
94b18763 | 906 | client_t loner_cap = -1, want_loner_cap = -1; |
7c673cae FG |
907 | |
908 | client_t get_loner() const { return loner_cap; } | |
909 | client_t get_wanted_loner() const { return want_loner_cap; } | |
910 | ||
911 | // this is the loner state our locks should aim for | |
912 | client_t get_target_loner() const { | |
913 | if (loner_cap == want_loner_cap) | |
914 | return loner_cap; | |
915 | else | |
916 | return -1; | |
917 | } | |
918 | ||
919 | client_t calc_ideal_loner(); | |
7c673cae | 920 | void set_loner_cap(client_t l); |
b32b8144 FG |
921 | bool choose_ideal_loner(); |
922 | bool try_set_loner(); | |
7c673cae FG |
923 | bool try_drop_loner(); |
924 | ||
925 | // choose new lock state during recovery, based on issued caps | |
926 | void choose_lock_state(SimpleLock *lock, int allissued); | |
927 | void choose_lock_states(int dirty_caps); | |
928 | ||
929 | int count_nonstale_caps() { | |
930 | int n = 0; | |
94b18763 FG |
931 | for (const auto &p : client_caps) { |
932 | if (!p.second->is_stale()) | |
7c673cae | 933 | n++; |
94b18763 | 934 | } |
7c673cae FG |
935 | return n; |
936 | } | |
937 | bool multiple_nonstale_caps() { | |
938 | int n = 0; | |
94b18763 FG |
939 | for (const auto &p : client_caps) { |
940 | if (!p.second->is_stale()) { | |
7c673cae FG |
941 | if (n) |
942 | return true; | |
943 | n++; | |
944 | } | |
94b18763 | 945 | } |
7c673cae FG |
946 | return false; |
947 | } | |
948 | ||
949 | bool is_any_caps() { return !client_caps.empty(); } | |
950 | bool is_any_nonstale_caps() { return count_nonstale_caps(); } | |
951 | ||
94b18763 FG |
952 | const mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; } |
953 | mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; } | |
7c673cae | 954 | |
94b18763 | 955 | const cap_map& get_client_caps() const { return client_caps; } |
7c673cae FG |
956 | Capability *get_client_cap(client_t client) { |
957 | auto client_caps_entry = client_caps.find(client); | |
958 | if (client_caps_entry != client_caps.end()) | |
959 | return client_caps_entry->second; | |
960 | return 0; | |
961 | } | |
962 | int get_client_cap_pending(client_t client) const { | |
963 | auto client_caps_entry = client_caps.find(client); | |
964 | if (client_caps_entry != client_caps.end()) { | |
965 | return client_caps_entry->second->pending(); | |
966 | } else { | |
967 | return 0; | |
968 | } | |
969 | } | |
970 | ||
971 | Capability *add_client_cap(client_t client, Session *session, SnapRealm *conrealm=0); | |
972 | void remove_client_cap(client_t client); | |
973 | void move_to_realm(SnapRealm *realm); | |
974 | ||
975 | Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session); | |
976 | void clear_client_caps_after_export(); | |
977 | void export_client_caps(std::map<client_t,Capability::Export>& cl); | |
978 | ||
979 | // caps allowed | |
980 | int get_caps_liked() const; | |
981 | int get_caps_allowed_ever() const; | |
982 | int get_caps_allowed_by_type(int type) const; | |
983 | int get_caps_careful() const; | |
984 | int get_xlocker_mask(client_t client) const; | |
94b18763 | 985 | int get_caps_allowed_for_client(Session *s, mempool_inode *file_i) const; |
7c673cae FG |
986 | |
987 | // caps issued, wanted | |
988 | int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0, | |
989 | int shift = 0, int mask = -1); | |
990 | bool is_any_caps_wanted() const; | |
991 | int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = -1) const; | |
992 | bool issued_caps_need_gather(SimpleLock *lock); | |
993 | void replicate_relax_locks(); | |
994 | ||
995 | // -- authority -- | |
996 | mds_authority_t authority() const override; | |
997 | ||
998 | // -- auth pins -- | |
999 | void adjust_nested_auth_pins(int a, void *by); | |
1000 | bool can_auth_pin() const override; | |
1001 | void auth_pin(void *by) override; | |
1002 | void auth_unpin(void *by) override; | |
1003 | ||
1004 | // -- freeze -- | |
1005 | bool is_freezing_inode() const { return state_test(STATE_FREEZING); } | |
1006 | bool is_frozen_inode() const { return state_test(STATE_FROZEN); } | |
1007 | bool is_frozen_auth_pin() const { return state_test(STATE_FROZENAUTHPIN); } | |
1008 | bool is_frozen() const override; | |
1009 | bool is_frozen_dir() const; | |
1010 | bool is_freezing() const override; | |
1011 | ||
1012 | /* Freeze the inode. auth_pin_allowance lets the caller account for any | |
1013 | * auth_pins it is itself holding/responsible for. */ | |
1014 | bool freeze_inode(int auth_pin_allowance=0); | |
1015 | void unfreeze_inode(std::list<MDSInternalContextBase*>& finished); | |
1016 | void unfreeze_inode(); | |
1017 | ||
1018 | void freeze_auth_pin(); | |
1019 | void unfreeze_auth_pin(); | |
1020 | ||
1021 | // -- reference counting -- | |
1022 | void bad_put(int by) override { | |
1023 | generic_dout(0) << " bad put " << *this << " by " << by << " " << pin_name(by) << " was " << ref | |
1024 | #ifdef MDS_REF_SET | |
1025 | << " (" << ref_map << ")" | |
1026 | #endif | |
1027 | << dendl; | |
1028 | #ifdef MDS_REF_SET | |
1029 | assert(ref_map[by] > 0); | |
1030 | #endif | |
1031 | assert(ref > 0); | |
1032 | } | |
1033 | void bad_get(int by) override { | |
1034 | generic_dout(0) << " bad get " << *this << " by " << by << " " << pin_name(by) << " was " << ref | |
1035 | #ifdef MDS_REF_SET | |
1036 | << " (" << ref_map << ")" | |
1037 | #endif | |
1038 | << dendl; | |
1039 | #ifdef MDS_REF_SET | |
1040 | assert(ref_map[by] >= 0); | |
1041 | #endif | |
1042 | } | |
1043 | void first_get() override; | |
1044 | void last_put() override; | |
1045 | void _put() override; | |
1046 | ||
1047 | ||
1048 | // -- hierarchy stuff -- | |
1049 | public: | |
1050 | void set_primary_parent(CDentry *p) { | |
94b18763 FG |
1051 | assert(parent == 0 || |
1052 | g_conf->get_val<bool>("mds_hack_allow_loading_invalid_metadata")); | |
7c673cae FG |
1053 | parent = p; |
1054 | } | |
1055 | void remove_primary_parent(CDentry *dn) { | |
1056 | assert(dn == parent); | |
1057 | parent = 0; | |
1058 | } | |
1059 | void add_remote_parent(CDentry *p); | |
1060 | void remove_remote_parent(CDentry *p); | |
1061 | int num_remote_parents() { | |
1062 | return remote_parents.size(); | |
1063 | } | |
1064 | ||
1065 | void push_projected_parent(CDentry *dn) { | |
1066 | projected_parent.push_back(dn); | |
1067 | } | |
1068 | void pop_projected_parent() { | |
1069 | assert(projected_parent.size()); | |
1070 | parent = projected_parent.front(); | |
1071 | projected_parent.pop_front(); | |
1072 | } | |
1073 | ||
7c673cae | 1074 | public: |
31f18b77 | 1075 | void maybe_export_pin(bool update=false); |
7c673cae FG |
1076 | void set_export_pin(mds_rank_t rank); |
1077 | mds_rank_t get_export_pin(bool inherit=true) const; | |
1078 | bool is_exportable(mds_rank_t dest) const; | |
1079 | ||
1080 | void print(ostream& out) override; | |
1081 | void dump(Formatter *f) const; | |
1082 | ||
1083 | /** | |
1084 | * @defgroup Scrubbing and fsck | |
1085 | * @{ | |
1086 | */ | |
1087 | ||
1088 | /** | |
1089 | * Report the results of validation against a particular inode. | |
1090 | * Each member is a pair of bools. | |
1091 | * <member>.first represents if validation was performed against the member. | |
1092 | * <member.second represents if the member passed validation. | |
1093 | * performed_validation is set to true if the validation was actually | |
1094 | * run. It might not be run if, for instance, the inode is marked as dirty. | |
1095 | * passed_validation is set to true if everything that was checked | |
1096 | * passed its validation. | |
1097 | */ | |
1098 | struct validated_data { | |
1099 | template<typename T>struct member_status { | |
b32b8144 FG |
1100 | bool checked = false; |
1101 | bool passed = false; | |
1102 | bool repaired = false; | |
1103 | int ondisk_read_retval = 0; | |
7c673cae FG |
1104 | T ondisk_value; |
1105 | T memory_value; | |
1106 | std::stringstream error_str; | |
7c673cae FG |
1107 | }; |
1108 | ||
94b18763 FG |
1109 | bool performed_validation = false; |
1110 | bool passed_validation = false; | |
7c673cae FG |
1111 | |
1112 | struct raw_stats_t { | |
1113 | frag_info_t dirstat; | |
1114 | nest_info_t rstat; | |
1115 | }; | |
1116 | ||
1117 | member_status<inode_backtrace_t> backtrace; | |
94b18763 | 1118 | member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr |
7c673cae FG |
1119 | member_status<raw_stats_t> raw_stats; |
1120 | ||
94b18763 | 1121 | validated_data() {} |
7c673cae FG |
1122 | |
1123 | void dump(Formatter *f) const; | |
b32b8144 FG |
1124 | |
1125 | bool all_damage_repaired() const; | |
7c673cae FG |
1126 | }; |
1127 | ||
1128 | /** | |
1129 | * Validate that the on-disk state of an inode matches what | |
1130 | * we expect from our memory state. Currently this checks that: | |
1131 | * 1) The backtrace associated with the file data exists and is correct | |
1132 | * 2) For directories, the actual inode metadata matches our memory state, | |
1133 | * 3) For directories, the rstats match | |
1134 | * | |
1135 | * @param results A freshly-created validated_data struct, with values set | |
1136 | * as described in the struct documentation. | |
1137 | * @param mdr The request to be responeded upon the completion of the | |
1138 | * validation (or NULL) | |
1139 | * @param fin Context to call back on completion (or NULL) | |
1140 | */ | |
1141 | void validate_disk_state(validated_data *results, | |
1142 | MDSInternalContext *fin); | |
1143 | static void dump_validation_results(const validated_data& results, | |
1144 | Formatter *f); | |
1145 | private: | |
1146 | bool _validate_disk_state(class ValidationContinuation *c, | |
1147 | int rval, int stage); | |
1148 | friend class ValidationContinuation; | |
1149 | /** @} Scrubbing and fsck */ | |
1150 | }; | |
1151 | ||
1152 | ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si); | |
1153 | ||
1154 | #undef dout_context | |
1155 | #endif |