]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | ||
17 | #ifndef CEPH_CINODE_H | |
18 | #define CEPH_CINODE_H | |
19 | ||
20 | #include "common/config.h" | |
21 | #include "include/counter.h" | |
22 | #include "include/elist.h" | |
23 | #include "include/types.h" | |
24 | #include "include/lru.h" | |
25 | #include "include/compact_set.h" | |
26 | ||
27 | #include "MDSCacheObject.h" | |
28 | #include "flock.h" | |
29 | ||
30 | #include "CDentry.h" | |
31 | #include "SimpleLock.h" | |
32 | #include "ScatterLock.h" | |
33 | #include "LocalLock.h" | |
34 | #include "Capability.h" | |
35 | #include "SnapRealm.h" | |
36 | #include "Mutation.h" | |
37 | ||
38 | #include <list> | |
39 | #include <set> | |
40 | #include <map> | |
41 | ||
42 | #define dout_context g_ceph_context | |
43 | ||
44 | class Context; | |
45 | class CDentry; | |
46 | class CDir; | |
47 | class Message; | |
48 | class CInode; | |
49 | class MDCache; | |
50 | class LogSegment; | |
51 | struct SnapRealm; | |
52 | class Session; | |
53 | class MClientCaps; | |
54 | struct ObjectOperation; | |
55 | class EMetaBlob; | |
56 | ||
57 | ||
58 | ostream& operator<<(ostream& out, const CInode& in); | |
59 | ||
60 | struct cinode_lock_info_t { | |
61 | int lock; | |
62 | int wr_caps; | |
63 | }; | |
64 | ||
65 | extern cinode_lock_info_t cinode_lock_info[]; | |
66 | extern int num_cinode_locks; | |
67 | ||
68 | ||
69 | /** | |
70 | * Base class for CInode, containing the backing store data and | |
71 | * serialization methods. This exists so that we can read and | |
72 | * handle CInodes from the backing store without hitting all | |
73 | * the business logic in CInode proper. | |
74 | */ | |
75 | class InodeStoreBase { | |
76 | public: | |
77 | inode_t inode; // the inode itself | |
78 | std::string symlink; // symlink dest, if symlink | |
79 | std::map<std::string, bufferptr> xattrs; | |
80 | fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. | |
81 | compact_map<snapid_t, old_inode_t> old_inodes; // key = last, value.first = first | |
82 | snapid_t oldest_snap; | |
83 | damage_flags_t damage_flags; | |
84 | ||
85 | InodeStoreBase() : oldest_snap(CEPH_NOSNAP), damage_flags(0) { } | |
86 | ||
87 | /* Helpers */ | |
88 | bool is_file() const { return inode.is_file(); } | |
89 | bool is_symlink() const { return inode.is_symlink(); } | |
90 | bool is_dir() const { return inode.is_dir(); } | |
91 | static object_t get_object_name(inodeno_t ino, frag_t fg, const char *suffix); | |
92 | ||
93 | /* Full serialization for use in ".inode" root inode objects */ | |
94 | void encode(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const; | |
95 | void decode(bufferlist::iterator &bl, bufferlist& snap_blob); | |
96 | ||
97 | /* Serialization without ENCODE_START/FINISH blocks for use embedded in dentry */ | |
98 | void encode_bare(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const; | |
99 | void decode_bare(bufferlist::iterator &bl, bufferlist &snap_blob, __u8 struct_v=5); | |
100 | ||
101 | /* For test/debug output */ | |
102 | void dump(Formatter *f) const; | |
103 | ||
104 | /* For use by offline tools */ | |
105 | __u32 hash_dentry_name(const std::string &dn); | |
106 | frag_t pick_dirfrag(const std::string &dn); | |
107 | }; | |
108 | ||
109 | class InodeStore : public InodeStoreBase { | |
110 | public: | |
111 | bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't | |
112 | // rehydrate it without full MDCache | |
113 | void encode(bufferlist &bl, uint64_t features) const { | |
114 | InodeStoreBase::encode(bl, features, &snap_blob); | |
115 | } | |
116 | void decode(bufferlist::iterator &bl) { | |
117 | InodeStoreBase::decode(bl, snap_blob); | |
118 | } | |
119 | void encode_bare(bufferlist &bl, uint64_t features) const { | |
120 | InodeStoreBase::encode_bare(bl, features, &snap_blob); | |
121 | } | |
122 | void decode_bare(bufferlist::iterator &bl) { | |
123 | InodeStoreBase::decode_bare(bl, snap_blob); | |
124 | } | |
125 | ||
126 | static void generate_test_instances(std::list<InodeStore*>& ls); | |
127 | }; | |
128 | WRITE_CLASS_ENCODER_FEATURES(InodeStore) | |
129 | ||
130 | // cached inode wrapper | |
131 | class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> { | |
132 | public: | |
181888fb | 133 | MEMPOOL_CLASS_HELPERS(); |
7c673cae FG |
134 | // -- pins -- |
135 | static const int PIN_DIRFRAG = -1; | |
136 | static const int PIN_CAPS = 2; // client caps | |
137 | static const int PIN_IMPORTING = -4; // importing | |
138 | static const int PIN_OPENINGDIR = 7; | |
139 | static const int PIN_REMOTEPARENT = 8; | |
140 | static const int PIN_BATCHOPENJOURNAL = 9; | |
141 | static const int PIN_SCATTERED = 10; | |
142 | static const int PIN_STICKYDIRS = 11; | |
143 | //static const int PIN_PURGING = -12; | |
144 | static const int PIN_FREEZING = 13; | |
145 | static const int PIN_FROZEN = 14; | |
146 | static const int PIN_IMPORTINGCAPS = -15; | |
147 | static const int PIN_PASTSNAPPARENT = -16; | |
148 | static const int PIN_OPENINGSNAPPARENTS = 17; | |
149 | static const int PIN_TRUNCATING = 18; | |
150 | static const int PIN_STRAY = 19; // we pin our stray inode while active | |
151 | static const int PIN_NEEDSNAPFLUSH = 20; | |
152 | static const int PIN_DIRTYRSTAT = 21; | |
153 | static const int PIN_EXPORTINGCAPS = 22; | |
154 | static const int PIN_DIRTYPARENT = 23; | |
155 | static const int PIN_DIRWAITER = 24; | |
156 | static const int PIN_SCRUBQUEUE = 25; | |
157 | ||
158 | const char *pin_name(int p) const override { | |
159 | switch (p) { | |
160 | case PIN_DIRFRAG: return "dirfrag"; | |
161 | case PIN_CAPS: return "caps"; | |
162 | case PIN_IMPORTING: return "importing"; | |
163 | case PIN_OPENINGDIR: return "openingdir"; | |
164 | case PIN_REMOTEPARENT: return "remoteparent"; | |
165 | case PIN_BATCHOPENJOURNAL: return "batchopenjournal"; | |
166 | case PIN_SCATTERED: return "scattered"; | |
167 | case PIN_STICKYDIRS: return "stickydirs"; | |
168 | //case PIN_PURGING: return "purging"; | |
169 | case PIN_FREEZING: return "freezing"; | |
170 | case PIN_FROZEN: return "frozen"; | |
171 | case PIN_IMPORTINGCAPS: return "importingcaps"; | |
172 | case PIN_EXPORTINGCAPS: return "exportingcaps"; | |
173 | case PIN_PASTSNAPPARENT: return "pastsnapparent"; | |
174 | case PIN_OPENINGSNAPPARENTS: return "openingsnapparents"; | |
175 | case PIN_TRUNCATING: return "truncating"; | |
176 | case PIN_STRAY: return "stray"; | |
177 | case PIN_NEEDSNAPFLUSH: return "needsnapflush"; | |
178 | case PIN_DIRTYRSTAT: return "dirtyrstat"; | |
179 | case PIN_DIRTYPARENT: return "dirtyparent"; | |
180 | case PIN_DIRWAITER: return "dirwaiter"; | |
181 | case PIN_SCRUBQUEUE: return "scrubqueue"; | |
182 | default: return generic_pin_name(p); | |
183 | } | |
184 | } | |
185 | ||
186 | // -- state -- | |
187 | static const int STATE_EXPORTING = (1<<2); // on nonauth bystander. | |
188 | static const int STATE_OPENINGDIR = (1<<5); | |
189 | static const int STATE_FREEZING = (1<<7); | |
190 | static const int STATE_FROZEN = (1<<8); | |
191 | static const int STATE_AMBIGUOUSAUTH = (1<<9); | |
192 | static const int STATE_EXPORTINGCAPS = (1<<10); | |
193 | static const int STATE_NEEDSRECOVER = (1<<11); | |
194 | static const int STATE_RECOVERING = (1<<12); | |
195 | static const int STATE_PURGING = (1<<13); | |
196 | static const int STATE_DIRTYPARENT = (1<<14); | |
197 | static const int STATE_DIRTYRSTAT = (1<<15); | |
198 | static const int STATE_STRAYPINNED = (1<<16); | |
199 | static const int STATE_FROZENAUTHPIN = (1<<17); | |
200 | static const int STATE_DIRTYPOOL = (1<<18); | |
201 | static const int STATE_REPAIRSTATS = (1<<19); | |
202 | static const int STATE_MISSINGOBJS = (1<<20); | |
203 | static const int STATE_EVALSTALECAPS = (1<<21); | |
31f18b77 | 204 | static const int STATE_QUEUEDEXPORTPIN = (1<<22); |
7c673cae FG |
205 | // orphan inode needs notification of releasing reference |
206 | static const int STATE_ORPHAN = STATE_NOTIFYREF; | |
207 | ||
208 | static const int MASK_STATE_EXPORTED = | |
209 | (STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL); | |
210 | static const int MASK_STATE_EXPORT_KEPT = | |
211 | (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS); | |
212 | ||
213 | // -- waiters -- | |
214 | static const uint64_t WAIT_DIR = (1<<0); | |
215 | static const uint64_t WAIT_FROZEN = (1<<1); | |
216 | static const uint64_t WAIT_TRUNC = (1<<2); | |
217 | static const uint64_t WAIT_FLOCK = (1<<3); | |
218 | ||
219 | static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1); | |
220 | ||
221 | // misc | |
222 | static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export | |
223 | ||
224 | ostream& print_db_line_prefix(ostream& out) override; | |
225 | ||
226 | public: | |
227 | MDCache *mdcache; | |
228 | ||
229 | SnapRealm *snaprealm; | |
230 | SnapRealm *containing_realm; | |
231 | snapid_t first, last; | |
232 | compact_set<snapid_t> dirty_old_rstats; | |
233 | ||
234 | class scrub_stamp_info_t { | |
235 | public: | |
236 | /// version we started our latest scrub (whether in-progress or finished) | |
237 | version_t scrub_start_version; | |
238 | /// time we started our latest scrub (whether in-progress or finished) | |
239 | utime_t scrub_start_stamp; | |
240 | /// version we started our most recent finished scrub | |
241 | version_t last_scrub_version; | |
242 | /// time we started our most recent finished scrub | |
243 | utime_t last_scrub_stamp; | |
244 | scrub_stamp_info_t() : scrub_start_version(0), last_scrub_version(0) {} | |
245 | void reset() { | |
246 | scrub_start_version = 0; | |
247 | scrub_start_stamp = utime_t(); | |
248 | } | |
249 | }; | |
250 | ||
251 | class scrub_info_t : public scrub_stamp_info_t { | |
252 | public: | |
253 | CDentry *scrub_parent; | |
254 | MDSInternalContextBase *on_finish; | |
255 | ||
256 | bool last_scrub_dirty; /// are our stamps dirty with respect to disk state? | |
257 | bool scrub_in_progress; /// are we currently scrubbing? | |
258 | bool children_scrubbed; | |
259 | ||
260 | /// my own (temporary) stamps and versions for each dirfrag we have | |
261 | std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; | |
262 | ||
263 | ScrubHeaderRefConst header; | |
264 | ||
265 | scrub_info_t() : scrub_stamp_info_t(), | |
266 | scrub_parent(NULL), on_finish(NULL), | |
267 | last_scrub_dirty(false), scrub_in_progress(false), | |
268 | children_scrubbed(false) {} | |
269 | }; | |
270 | ||
271 | const scrub_info_t *scrub_info() const{ | |
272 | if (!scrub_infop) | |
273 | scrub_info_create(); | |
274 | return scrub_infop; | |
275 | } | |
276 | ||
277 | bool scrub_is_in_progress() const { | |
278 | return (scrub_infop && scrub_infop->scrub_in_progress); | |
279 | } | |
280 | /** | |
281 | * Start scrubbing on this inode. That could be very short if it's | |
282 | * a file, or take a long time if we're recursively scrubbing a directory. | |
283 | * @pre It is not currently scrubbing | |
284 | * @post it has set up internal scrubbing state | |
285 | * @param scrub_version What version are we scrubbing at (usually, parent | |
286 | * directory's get_projected_version()) | |
287 | */ | |
288 | void scrub_initialize(CDentry *scrub_parent, | |
289 | const ScrubHeaderRefConst& header, | |
290 | MDSInternalContextBase *f); | |
291 | /** | |
292 | * Get the next dirfrag to scrub. Gives you a frag_t in output param which | |
293 | * you must convert to a CDir (and possibly load off disk). | |
294 | * @param dir A pointer to frag_t, will be filled in with the next dirfrag to | |
295 | * scrub if there is one. | |
296 | * @returns 0 on success, you should scrub the passed-out frag_t right now; | |
297 | * ENOENT: There are no remaining dirfrags to scrub | |
298 | * <0 There was some other error (It will return -ENOTDIR if not a directory) | |
299 | */ | |
300 | int scrub_dirfrag_next(frag_t* out_dirfrag); | |
301 | /** | |
302 | * Get the currently scrubbing dirfrags. When returned, the | |
303 | * passed-in list will be filled in with all frag_ts which have | |
304 | * been returned from scrub_dirfrag_next but not sent back | |
305 | * via scrub_dirfrag_finished. | |
306 | */ | |
307 | void scrub_dirfrags_scrubbing(list<frag_t> *out_dirfrags); | |
308 | /** | |
309 | * Report to the CInode that a dirfrag it owns has been scrubbed. Call | |
310 | * this for every frag_t returned from scrub_dirfrag_next(). | |
311 | * @param dirfrag The frag_t that was scrubbed | |
312 | */ | |
313 | void scrub_dirfrag_finished(frag_t dirfrag); | |
314 | /** | |
315 | * Call this once the scrub has been completed, whether it's a full | |
316 | * recursive scrub on a directory or simply the data on a file (or | |
317 | * anything in between). | |
318 | * @param c An out param which is filled in with a Context* that must | |
319 | * be complete()ed. | |
320 | */ | |
321 | void scrub_finished(MDSInternalContextBase **c); | |
322 | /** | |
323 | * Report to the CInode that alldirfrags it owns have been scrubbed. | |
324 | */ | |
325 | void scrub_children_finished() { | |
326 | scrub_infop->children_scrubbed = true; | |
327 | } | |
328 | void scrub_set_finisher(MDSInternalContextBase *c) { | |
329 | assert(!scrub_infop->on_finish); | |
330 | scrub_infop->on_finish = c; | |
331 | } | |
332 | ||
333 | private: | |
334 | /** | |
335 | * Create a scrub_info_t struct for the scrub_infop poitner. | |
336 | */ | |
337 | void scrub_info_create() const; | |
338 | /** | |
339 | * Delete the scrub_info_t struct if it's not got any useful data | |
340 | */ | |
341 | void scrub_maybe_delete_info(); | |
342 | public: | |
343 | ||
344 | bool is_multiversion() const { | |
345 | return snaprealm || // other snaprealms will link to me | |
346 | inode.is_dir() || // links to me in other snaps | |
347 | inode.nlink > 1 || // there are remote links, possibly snapped, that will need to find me | |
348 | !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out. | |
349 | } | |
350 | snapid_t get_oldest_snap(); | |
351 | ||
352 | uint64_t last_journaled; // log offset for the last time i was journaled | |
353 | //loff_t last_open_journaled; // log offset for the last journaled EOpen | |
354 | utime_t last_dirstat_prop; | |
355 | ||
356 | ||
357 | // list item node for when we have unpropagated rstat data | |
358 | elist<CInode*>::item dirty_rstat_item; | |
359 | ||
360 | bool is_dirty_rstat() { | |
361 | return state_test(STATE_DIRTYRSTAT); | |
362 | } | |
363 | void mark_dirty_rstat(); | |
364 | void clear_dirty_rstat(); | |
365 | ||
366 | //bool hack_accessed; | |
367 | //utime_t hack_load_stamp; | |
368 | ||
369 | /** | |
370 | * Projection methods, used to store inode changes until they have been journaled, | |
371 | * at which point they are popped. | |
372 | * Usage: | |
373 | * project_inode as needed. If you're also projecting xattrs, pass | |
374 | * in an xattr map (by pointer), then edit the map. | |
375 | * If you're also projecting the snaprealm, call project_snaprealm after | |
376 | * calling project_inode, and modify the snaprealm as necessary. | |
377 | * | |
378 | * Then, journal. Once journaling is done, pop_and_dirty_projected_inode. | |
379 | * This function will take care of the inode itself, the xattrs, and the snaprealm. | |
380 | */ | |
381 | ||
382 | struct projected_inode_t { | |
383 | inode_t *inode; | |
384 | std::map<std::string,bufferptr> *xattrs; | |
385 | sr_t *snapnode; | |
386 | ||
387 | projected_inode_t() | |
388 | : inode(NULL), xattrs(NULL), snapnode(NULL) {} | |
389 | projected_inode_t(inode_t *in, sr_t *sn) | |
390 | : inode(in), xattrs(NULL), snapnode(sn) {} | |
391 | projected_inode_t(inode_t *in, std::map<std::string, bufferptr> *xp = NULL, sr_t *sn = NULL) | |
392 | : inode(in), xattrs(xp), snapnode(sn) {} | |
393 | }; | |
394 | std::list<projected_inode_t*> projected_nodes; // projected values (only defined while dirty) | |
395 | int num_projected_xattrs; | |
396 | int num_projected_srnodes; | |
397 | ||
398 | inode_t *project_inode(std::map<std::string,bufferptr> *px=0); | |
399 | void pop_and_dirty_projected_inode(LogSegment *ls); | |
400 | ||
401 | projected_inode_t *get_projected_node() { | |
402 | if (projected_nodes.empty()) | |
403 | return NULL; | |
404 | else | |
405 | return projected_nodes.back(); | |
406 | } | |
407 | ||
408 | version_t get_projected_version() const { | |
409 | if (projected_nodes.empty()) | |
410 | return inode.version; | |
411 | else | |
412 | return projected_nodes.back()->inode->version; | |
413 | } | |
414 | bool is_projected() const { | |
415 | return !projected_nodes.empty(); | |
416 | } | |
417 | ||
418 | const inode_t *get_projected_inode() const { | |
419 | if (projected_nodes.empty()) | |
420 | return &inode; | |
421 | else | |
422 | return projected_nodes.back()->inode; | |
423 | } | |
424 | inode_t *get_projected_inode() { | |
425 | if (projected_nodes.empty()) | |
426 | return &inode; | |
427 | else | |
428 | return projected_nodes.back()->inode; | |
429 | } | |
430 | inode_t *get_previous_projected_inode() { | |
431 | assert(!projected_nodes.empty()); | |
432 | std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
433 | ++p; | |
434 | if (p != projected_nodes.rend()) | |
435 | return (*p)->inode; | |
436 | else | |
437 | return &inode; | |
438 | } | |
439 | ||
440 | std::map<std::string,bufferptr> *get_projected_xattrs() { | |
441 | if (num_projected_xattrs > 0) { | |
442 | for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
443 | p != projected_nodes.rend(); | |
444 | ++p) | |
445 | if ((*p)->xattrs) | |
446 | return (*p)->xattrs; | |
447 | } | |
448 | return &xattrs; | |
449 | } | |
450 | std::map<std::string,bufferptr> *get_previous_projected_xattrs() { | |
451 | std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
452 | for (++p; // skip the most recent projected value | |
453 | p != projected_nodes.rend(); | |
454 | ++p) | |
455 | if ((*p)->xattrs) | |
456 | return (*p)->xattrs; | |
457 | return &xattrs; | |
458 | } | |
459 | ||
460 | sr_t *project_snaprealm(snapid_t snapid=0); | |
461 | const sr_t *get_projected_srnode() const { | |
462 | if (num_projected_srnodes > 0) { | |
463 | for (std::list<projected_inode_t*>::const_reverse_iterator p = projected_nodes.rbegin(); | |
464 | p != projected_nodes.rend(); | |
465 | ++p) | |
466 | if ((*p)->snapnode) | |
467 | return (*p)->snapnode; | |
468 | } | |
469 | if (snaprealm) | |
470 | return &snaprealm->srnode; | |
471 | else | |
472 | return NULL; | |
473 | } | |
474 | sr_t *get_projected_srnode() { | |
475 | if (num_projected_srnodes > 0) { | |
476 | for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
477 | p != projected_nodes.rend(); | |
478 | ++p) | |
479 | if ((*p)->snapnode) | |
480 | return (*p)->snapnode; | |
481 | } | |
482 | if (snaprealm) | |
483 | return &snaprealm->srnode; | |
484 | else | |
485 | return NULL; | |
486 | } | |
487 | void project_past_snaprealm_parent(SnapRealm *newparent); | |
488 | ||
489 | private: | |
490 | void pop_projected_snaprealm(sr_t *next_snaprealm); | |
491 | ||
492 | public: | |
493 | old_inode_t& cow_old_inode(snapid_t follows, bool cow_head); | |
494 | void split_old_inode(snapid_t snap); | |
495 | old_inode_t *pick_old_inode(snapid_t last); | |
496 | void pre_cow_old_inode(); | |
497 | void purge_stale_snap_data(const std::set<snapid_t>& snaps); | |
498 | ||
499 | // -- cache infrastructure -- | |
500 | private: | |
501 | compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode | |
502 | int stickydir_ref; | |
503 | scrub_info_t *scrub_infop; | |
504 | ||
505 | public: | |
506 | bool has_dirfrags() { return !dirfrags.empty(); } | |
507 | CDir* get_dirfrag(frag_t fg) { | |
508 | if (dirfrags.count(fg)) { | |
509 | //assert(g_conf->debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME | |
510 | return dirfrags[fg]; | |
511 | } else | |
512 | return NULL; | |
513 | } | |
514 | bool get_dirfrags_under(frag_t fg, std::list<CDir*>& ls); | |
515 | CDir* get_approx_dirfrag(frag_t fg); | |
516 | void get_dirfrags(std::list<CDir*>& ls); | |
517 | void get_nested_dirfrags(std::list<CDir*>& ls); | |
518 | void get_subtree_dirfrags(std::list<CDir*>& ls); | |
519 | CDir *get_or_open_dirfrag(MDCache *mdcache, frag_t fg); | |
520 | CDir *add_dirfrag(CDir *dir); | |
521 | void close_dirfrag(frag_t fg); | |
522 | void close_dirfrags(); | |
523 | bool has_subtree_root_dirfrag(int auth=-1); | |
524 | bool has_subtree_or_exporting_dirfrag(); | |
525 | ||
526 | void force_dirfrags(); | |
527 | void verify_dirfrags(); | |
528 | ||
529 | void get_stickydirs(); | |
530 | void put_stickydirs(); | |
531 | ||
532 | protected: | |
533 | // parent dentries in cache | |
534 | CDentry *parent; // primary link | |
535 | compact_set<CDentry*> remote_parents; // if hard linked | |
536 | ||
537 | std::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc. | |
538 | ||
539 | mds_authority_t inode_auth; | |
540 | ||
541 | // -- distributed state -- | |
542 | protected: | |
543 | // file capabilities | |
544 | std::map<client_t, Capability*> client_caps; // client -> caps | |
545 | compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted | |
546 | int replica_caps_wanted; // [replica] what i've requested from auth | |
547 | ||
548 | public: | |
549 | compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head | |
550 | compact_map<snapid_t, std::set<client_t> > client_need_snapflush; | |
551 | ||
552 | void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); | |
553 | void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); | |
554 | bool split_need_snapflush(CInode *cowin, CInode *in); | |
555 | ||
556 | protected: | |
557 | ||
558 | ceph_lock_state_t *fcntl_locks; | |
559 | ceph_lock_state_t *flock_locks; | |
560 | ||
561 | ceph_lock_state_t *get_fcntl_lock_state() { | |
562 | if (!fcntl_locks) | |
563 | fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL); | |
564 | return fcntl_locks; | |
565 | } | |
566 | void clear_fcntl_lock_state() { | |
567 | delete fcntl_locks; | |
568 | fcntl_locks = NULL; | |
569 | } | |
570 | ceph_lock_state_t *get_flock_lock_state() { | |
571 | if (!flock_locks) | |
572 | flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK); | |
573 | return flock_locks; | |
574 | } | |
575 | void clear_flock_lock_state() { | |
576 | delete flock_locks; | |
577 | flock_locks = NULL; | |
578 | } | |
579 | void clear_file_locks() { | |
580 | clear_fcntl_lock_state(); | |
581 | clear_flock_lock_state(); | |
582 | } | |
583 | void _encode_file_locks(bufferlist& bl) const { | |
584 | bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty(); | |
585 | ::encode(has_fcntl_locks, bl); | |
586 | if (has_fcntl_locks) | |
587 | ::encode(*fcntl_locks, bl); | |
588 | bool has_flock_locks = flock_locks && !flock_locks->empty(); | |
589 | ::encode(has_flock_locks, bl); | |
590 | if (has_flock_locks) | |
591 | ::encode(*flock_locks, bl); | |
592 | } | |
593 | void _decode_file_locks(bufferlist::iterator& p) { | |
594 | bool has_fcntl_locks; | |
595 | ::decode(has_fcntl_locks, p); | |
596 | if (has_fcntl_locks) | |
597 | ::decode(*get_fcntl_lock_state(), p); | |
598 | else | |
599 | clear_fcntl_lock_state(); | |
600 | bool has_flock_locks; | |
601 | ::decode(has_flock_locks, p); | |
602 | if (has_flock_locks) | |
603 | ::decode(*get_flock_lock_state(), p); | |
604 | else | |
605 | clear_flock_lock_state(); | |
606 | } | |
607 | ||
608 | // LogSegment lists i (may) belong to | |
609 | public: | |
610 | elist<CInode*>::item item_dirty; | |
611 | elist<CInode*>::item item_caps; | |
612 | elist<CInode*>::item item_open_file; | |
613 | elist<CInode*>::item item_dirty_parent; | |
614 | elist<CInode*>::item item_dirty_dirfrag_dir; | |
615 | elist<CInode*>::item item_dirty_dirfrag_nest; | |
616 | elist<CInode*>::item item_dirty_dirfrag_dirfragtree; | |
617 | elist<CInode*>::item item_scrub; | |
618 | ||
619 | public: | |
620 | int auth_pin_freeze_allowance; | |
621 | ||
622 | inode_load_vec_t pop; | |
623 | ||
624 | // friends | |
625 | friend class Server; | |
626 | friend class Locker; | |
627 | friend class Migrator; | |
628 | friend class MDCache; | |
629 | friend class StrayManager; | |
630 | friend class CDir; | |
631 | friend class CInodeExport; | |
7c673cae FG |
632 | |
633 | // --------------------------- | |
634 | CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP) : | |
635 | mdcache(c), | |
636 | snaprealm(0), containing_realm(0), | |
637 | first(f), last(l), | |
638 | last_journaled(0), //last_open_journaled(0), | |
639 | //hack_accessed(true), | |
640 | num_projected_xattrs(0), | |
641 | num_projected_srnodes(0), | |
642 | stickydir_ref(0), | |
643 | scrub_infop(NULL), | |
644 | parent(0), | |
645 | inode_auth(CDIR_AUTH_DEFAULT), | |
646 | replica_caps_wanted(0), | |
647 | fcntl_locks(0), flock_locks(0), | |
648 | item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this), | |
649 | item_dirty_dirfrag_dir(this), | |
650 | item_dirty_dirfrag_nest(this), | |
651 | item_dirty_dirfrag_dirfragtree(this), | |
652 | auth_pin_freeze_allowance(0), | |
653 | pop(ceph_clock_now()), | |
654 | versionlock(this, &versionlock_type), | |
655 | authlock(this, &authlock_type), | |
656 | linklock(this, &linklock_type), | |
657 | dirfragtreelock(this, &dirfragtreelock_type), | |
658 | filelock(this, &filelock_type), | |
659 | xattrlock(this, &xattrlock_type), | |
660 | snaplock(this, &snaplock_type), | |
661 | nestlock(this, &nestlock_type), | |
662 | flocklock(this, &flocklock_type), | |
663 | policylock(this, &policylock_type), | |
664 | loner_cap(-1), want_loner_cap(-1) | |
665 | { | |
666 | state = 0; | |
667 | if (auth) state_set(STATE_AUTH); | |
668 | } | |
669 | ~CInode() override { | |
670 | close_dirfrags(); | |
671 | close_snaprealm(); | |
672 | clear_file_locks(); | |
673 | assert(num_projected_xattrs == 0); | |
674 | assert(num_projected_srnodes == 0); | |
675 | } | |
676 | ||
677 | ||
678 | // -- accessors -- | |
679 | bool is_root() const { return inode.ino == MDS_INO_ROOT; } | |
680 | bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); } | |
681 | mds_rank_t get_stray_owner() const { | |
682 | return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino); | |
683 | } | |
684 | bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); } | |
685 | bool is_base() const { return is_root() || is_mdsdir(); } | |
686 | bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; } | |
687 | bool is_normal() const { return !(is_base() || is_system() || is_stray()); } | |
688 | ||
689 | bool is_head() const { return last == CEPH_NOSNAP; } | |
690 | ||
691 | // note: this overloads MDSCacheObject | |
692 | bool is_ambiguous_auth() const { | |
693 | return state_test(STATE_AMBIGUOUSAUTH) || | |
694 | MDSCacheObject::is_ambiguous_auth(); | |
695 | } | |
696 | void set_ambiguous_auth() { | |
697 | state_set(STATE_AMBIGUOUSAUTH); | |
698 | } | |
699 | void clear_ambiguous_auth(std::list<MDSInternalContextBase*>& finished); | |
700 | void clear_ambiguous_auth(); | |
701 | ||
702 | inodeno_t ino() const { return inode.ino; } | |
703 | vinodeno_t vino() const { return vinodeno_t(inode.ino, last); } | |
704 | int d_type() const { return IFTODT(inode.mode); } | |
705 | ||
706 | inode_t& get_inode() { return inode; } | |
707 | CDentry* get_parent_dn() { return parent; } | |
708 | const CDentry* get_parent_dn() const { return parent; } | |
709 | const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; } | |
710 | CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; } | |
711 | CDir *get_parent_dir(); | |
712 | const CDir *get_projected_parent_dir() const; | |
713 | CDir *get_projected_parent_dir(); | |
714 | CInode *get_parent_inode(); | |
715 | ||
716 | bool is_lt(const MDSCacheObject *r) const override { | |
717 | const CInode *o = static_cast<const CInode*>(r); | |
718 | return ino() < o->ino() || | |
719 | (ino() == o->ino() && last < o->last); | |
720 | } | |
721 | ||
722 | // -- misc -- | |
723 | bool is_projected_ancestor_of(CInode *other); | |
724 | ||
725 | void make_path_string(std::string& s, bool projected=false, const CDentry *use_parent=NULL) const; | |
726 | void make_path(filepath& s, bool projected=false) const; | |
727 | void name_stray_dentry(std::string& dname); | |
728 | ||
729 | // -- dirtyness -- | |
730 | version_t get_version() const { return inode.version; } | |
731 | ||
732 | version_t pre_dirty(); | |
733 | void _mark_dirty(LogSegment *ls); | |
734 | void mark_dirty(version_t projected_dirv, LogSegment *ls); | |
735 | void mark_clean(); | |
736 | ||
737 | void store(MDSInternalContextBase *fin); | |
738 | void _stored(int r, version_t cv, Context *fin); | |
739 | /** | |
740 | * Flush a CInode to disk. This includes the backtrace, the parent | |
741 | * directory's link, and the Inode object itself (if a base directory). | |
742 | * @pre is_auth() on both the inode and its containing directory | |
743 | * @pre can_auth_pin() | |
744 | * @param fin The Context to call when the flush is completed. | |
745 | */ | |
746 | void flush(MDSInternalContextBase *fin); | |
747 | void fetch(MDSInternalContextBase *fin); | |
748 | void _fetched(bufferlist& bl, bufferlist& bl2, Context *fin); | |
749 | ||
750 | ||
751 | void build_backtrace(int64_t pool, inode_backtrace_t& bt); | |
752 | void store_backtrace(MDSInternalContextBase *fin, int op_prio=-1); | |
753 | void _stored_backtrace(int r, version_t v, Context *fin); | |
754 | void fetch_backtrace(Context *fin, bufferlist *backtrace); | |
755 | protected: | |
756 | /** | |
757 | * Return the pool ID where we currently write backtraces for | |
758 | * this inode (in addition to inode.old_pools) | |
759 | * | |
760 | * @returns a pool ID >=0 | |
761 | */ | |
762 | int64_t get_backtrace_pool() const; | |
763 | public: | |
764 | void _mark_dirty_parent(LogSegment *ls, bool dirty_pool=false); | |
765 | void clear_dirty_parent(); | |
766 | void verify_diri_backtrace(bufferlist &bl, int err); | |
767 | bool is_dirty_parent() { return state_test(STATE_DIRTYPARENT); } | |
768 | bool is_dirty_pool() { return state_test(STATE_DIRTYPOOL); } | |
769 | ||
770 | void encode_snap_blob(bufferlist &bl); | |
771 | void decode_snap_blob(bufferlist &bl); | |
772 | void encode_store(bufferlist& bl, uint64_t features); | |
773 | void decode_store(bufferlist::iterator& bl); | |
774 | ||
775 | void encode_replica(mds_rank_t rep, bufferlist& bl, uint64_t features) { | |
776 | assert(is_auth()); | |
777 | ||
778 | // relax locks? | |
779 | if (!is_replicated()) | |
780 | replicate_relax_locks(); | |
781 | ||
782 | __u32 nonce = add_replica(rep); | |
783 | ::encode(nonce, bl); | |
784 | ||
785 | _encode_base(bl, features); | |
786 | _encode_locks_state_for_replica(bl); | |
787 | } | |
788 | void decode_replica(bufferlist::iterator& p, bool is_new) { | |
789 | __u32 nonce; | |
790 | ::decode(nonce, p); | |
791 | replica_nonce = nonce; | |
792 | ||
793 | _decode_base(p); | |
794 | _decode_locks_state(p, is_new); | |
795 | } | |
796 | ||
797 | // -- waiting -- | |
798 | protected: | |
799 | compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir; | |
800 | public: | |
801 | void add_dir_waiter(frag_t fg, MDSInternalContextBase *c); | |
802 | void take_dir_waiting(frag_t fg, std::list<MDSInternalContextBase*>& ls); | |
803 | bool is_waiting_for_dir(frag_t fg) { | |
804 | return waiting_on_dir.count(fg); | |
805 | } | |
806 | void add_waiter(uint64_t tag, MDSInternalContextBase *c) override; | |
807 | void take_waiting(uint64_t tag, std::list<MDSInternalContextBase*>& ls) override; | |
808 | ||
809 | // -- encode/decode helpers -- | |
810 | void _encode_base(bufferlist& bl, uint64_t features); | |
811 | void _decode_base(bufferlist::iterator& p); | |
812 | void _encode_locks_full(bufferlist& bl); | |
813 | void _decode_locks_full(bufferlist::iterator& p); | |
814 | void _encode_locks_state_for_replica(bufferlist& bl); | |
815 | void _encode_locks_state_for_rejoin(bufferlist& bl, int rep); | |
816 | void _decode_locks_state(bufferlist::iterator& p, bool is_new); | |
817 | void _decode_locks_rejoin(bufferlist::iterator& p, std::list<MDSInternalContextBase*>& waiters, | |
818 | std::list<SimpleLock*>& eval_locks); | |
819 | ||
820 | // -- import/export -- | |
821 | void encode_export(bufferlist& bl); | |
822 | void finish_export(utime_t now); | |
823 | void abort_export() { | |
824 | put(PIN_TEMPEXPORTING); | |
825 | assert(state_test(STATE_EXPORTINGCAPS)); | |
826 | state_clear(STATE_EXPORTINGCAPS); | |
827 | put(PIN_EXPORTINGCAPS); | |
828 | } | |
829 | void decode_import(bufferlist::iterator& p, LogSegment *ls); | |
830 | ||
831 | ||
832 | // for giving to clients | |
833 | int encode_inodestat(bufferlist& bl, Session *session, SnapRealm *realm, | |
834 | snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0, | |
835 | int getattr_wants=0); | |
836 | void encode_cap_message(MClientCaps *m, Capability *cap); | |
837 | ||
838 | ||
839 | // -- locks -- | |
840 | public: | |
841 | static LockType versionlock_type; | |
842 | static LockType authlock_type; | |
843 | static LockType linklock_type; | |
844 | static LockType dirfragtreelock_type; | |
845 | static LockType filelock_type; | |
846 | static LockType xattrlock_type; | |
847 | static LockType snaplock_type; | |
848 | static LockType nestlock_type; | |
849 | static LockType flocklock_type; | |
850 | static LockType policylock_type; | |
851 | ||
852 | LocalLock versionlock; | |
853 | SimpleLock authlock; | |
854 | SimpleLock linklock; | |
855 | ScatterLock dirfragtreelock; | |
856 | ScatterLock filelock; | |
857 | SimpleLock xattrlock; | |
858 | SimpleLock snaplock; | |
859 | ScatterLock nestlock; | |
860 | SimpleLock flocklock; | |
861 | SimpleLock policylock; | |
862 | ||
863 | SimpleLock* get_lock(int type) override { | |
864 | switch (type) { | |
865 | case CEPH_LOCK_IFILE: return &filelock; | |
866 | case CEPH_LOCK_IAUTH: return &authlock; | |
867 | case CEPH_LOCK_ILINK: return &linklock; | |
868 | case CEPH_LOCK_IDFT: return &dirfragtreelock; | |
869 | case CEPH_LOCK_IXATTR: return &xattrlock; | |
870 | case CEPH_LOCK_ISNAP: return &snaplock; | |
871 | case CEPH_LOCK_INEST: return &nestlock; | |
872 | case CEPH_LOCK_IFLOCK: return &flocklock; | |
873 | case CEPH_LOCK_IPOLICY: return &policylock; | |
874 | } | |
875 | return 0; | |
876 | } | |
877 | ||
878 | void set_object_info(MDSCacheObjectInfo &info) override; | |
879 | void encode_lock_state(int type, bufferlist& bl) override; | |
880 | void decode_lock_state(int type, bufferlist& bl) override; | |
881 | ||
882 | void _finish_frag_update(CDir *dir, MutationRef& mut); | |
883 | ||
884 | void clear_dirty_scattered(int type) override; | |
885 | bool is_dirty_scattered(); | |
886 | void clear_scatter_dirty(); // on rejoin ack | |
887 | ||
888 | void start_scatter(ScatterLock *lock); | |
889 | void finish_scatter_update(ScatterLock *lock, CDir *dir, | |
890 | version_t inode_version, version_t dir_accounted_version); | |
891 | void finish_scatter_gather_update(int type); | |
892 | void finish_scatter_gather_update_accounted(int type, MutationRef& mut, EMetaBlob *metablob); | |
893 | ||
894 | // -- snap -- | |
895 | void open_snaprealm(bool no_split=false); | |
896 | void close_snaprealm(bool no_join=false); | |
897 | SnapRealm *find_snaprealm() const; | |
898 | void encode_snap(bufferlist& bl); | |
899 | void decode_snap(bufferlist::iterator& p); | |
900 | ||
901 | // -- caps -- (new) | |
902 | // client caps | |
903 | client_t loner_cap, want_loner_cap; | |
904 | ||
905 | client_t get_loner() const { return loner_cap; } | |
906 | client_t get_wanted_loner() const { return want_loner_cap; } | |
907 | ||
908 | // this is the loner state our locks should aim for | |
909 | client_t get_target_loner() const { | |
910 | if (loner_cap == want_loner_cap) | |
911 | return loner_cap; | |
912 | else | |
913 | return -1; | |
914 | } | |
915 | ||
916 | client_t calc_ideal_loner(); | |
917 | client_t choose_ideal_loner(); | |
918 | bool try_set_loner(); | |
919 | void set_loner_cap(client_t l); | |
920 | bool try_drop_loner(); | |
921 | ||
922 | // choose new lock state during recovery, based on issued caps | |
923 | void choose_lock_state(SimpleLock *lock, int allissued); | |
924 | void choose_lock_states(int dirty_caps); | |
925 | ||
926 | int count_nonstale_caps() { | |
927 | int n = 0; | |
928 | for (std::map<client_t,Capability*>::iterator it = client_caps.begin(); | |
929 | it != client_caps.end(); | |
930 | ++it) | |
931 | if (!it->second->is_stale()) | |
932 | n++; | |
933 | return n; | |
934 | } | |
935 | bool multiple_nonstale_caps() { | |
936 | int n = 0; | |
937 | for (std::map<client_t,Capability*>::iterator it = client_caps.begin(); | |
938 | it != client_caps.end(); | |
939 | ++it) | |
940 | if (!it->second->is_stale()) { | |
941 | if (n) | |
942 | return true; | |
943 | n++; | |
944 | } | |
945 | return false; | |
946 | } | |
947 | ||
948 | bool is_any_caps() { return !client_caps.empty(); } | |
949 | bool is_any_nonstale_caps() { return count_nonstale_caps(); } | |
950 | ||
951 | const compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; } | |
952 | compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; } | |
953 | ||
954 | const std::map<client_t,Capability*>& get_client_caps() const { return client_caps; } | |
955 | Capability *get_client_cap(client_t client) { | |
956 | auto client_caps_entry = client_caps.find(client); | |
957 | if (client_caps_entry != client_caps.end()) | |
958 | return client_caps_entry->second; | |
959 | return 0; | |
960 | } | |
961 | int get_client_cap_pending(client_t client) const { | |
962 | auto client_caps_entry = client_caps.find(client); | |
963 | if (client_caps_entry != client_caps.end()) { | |
964 | return client_caps_entry->second->pending(); | |
965 | } else { | |
966 | return 0; | |
967 | } | |
968 | } | |
969 | ||
970 | Capability *add_client_cap(client_t client, Session *session, SnapRealm *conrealm=0); | |
971 | void remove_client_cap(client_t client); | |
972 | void move_to_realm(SnapRealm *realm); | |
973 | ||
974 | Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session); | |
975 | void clear_client_caps_after_export(); | |
976 | void export_client_caps(std::map<client_t,Capability::Export>& cl); | |
977 | ||
978 | // caps allowed | |
979 | int get_caps_liked() const; | |
980 | int get_caps_allowed_ever() const; | |
981 | int get_caps_allowed_by_type(int type) const; | |
982 | int get_caps_careful() const; | |
983 | int get_xlocker_mask(client_t client) const; | |
984 | int get_caps_allowed_for_client(Session *s, inode_t *file_i) const; | |
985 | ||
986 | // caps issued, wanted | |
987 | int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0, | |
988 | int shift = 0, int mask = -1); | |
989 | bool is_any_caps_wanted() const; | |
990 | int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = -1) const; | |
991 | bool issued_caps_need_gather(SimpleLock *lock); | |
992 | void replicate_relax_locks(); | |
993 | ||
994 | // -- authority -- | |
995 | mds_authority_t authority() const override; | |
996 | ||
997 | // -- auth pins -- | |
998 | void adjust_nested_auth_pins(int a, void *by); | |
999 | bool can_auth_pin() const override; | |
1000 | void auth_pin(void *by) override; | |
1001 | void auth_unpin(void *by) override; | |
1002 | ||
1003 | // -- freeze -- | |
1004 | bool is_freezing_inode() const { return state_test(STATE_FREEZING); } | |
1005 | bool is_frozen_inode() const { return state_test(STATE_FROZEN); } | |
1006 | bool is_frozen_auth_pin() const { return state_test(STATE_FROZENAUTHPIN); } | |
1007 | bool is_frozen() const override; | |
1008 | bool is_frozen_dir() const; | |
1009 | bool is_freezing() const override; | |
1010 | ||
1011 | /* Freeze the inode. auth_pin_allowance lets the caller account for any | |
1012 | * auth_pins it is itself holding/responsible for. */ | |
1013 | bool freeze_inode(int auth_pin_allowance=0); | |
1014 | void unfreeze_inode(std::list<MDSInternalContextBase*>& finished); | |
1015 | void unfreeze_inode(); | |
1016 | ||
1017 | void freeze_auth_pin(); | |
1018 | void unfreeze_auth_pin(); | |
1019 | ||
1020 | // -- reference counting -- | |
1021 | void bad_put(int by) override { | |
1022 | generic_dout(0) << " bad put " << *this << " by " << by << " " << pin_name(by) << " was " << ref | |
1023 | #ifdef MDS_REF_SET | |
1024 | << " (" << ref_map << ")" | |
1025 | #endif | |
1026 | << dendl; | |
1027 | #ifdef MDS_REF_SET | |
1028 | assert(ref_map[by] > 0); | |
1029 | #endif | |
1030 | assert(ref > 0); | |
1031 | } | |
1032 | void bad_get(int by) override { | |
1033 | generic_dout(0) << " bad get " << *this << " by " << by << " " << pin_name(by) << " was " << ref | |
1034 | #ifdef MDS_REF_SET | |
1035 | << " (" << ref_map << ")" | |
1036 | #endif | |
1037 | << dendl; | |
1038 | #ifdef MDS_REF_SET | |
1039 | assert(ref_map[by] >= 0); | |
1040 | #endif | |
1041 | } | |
1042 | void first_get() override; | |
1043 | void last_put() override; | |
1044 | void _put() override; | |
1045 | ||
1046 | ||
1047 | // -- hierarchy stuff -- | |
1048 | public: | |
1049 | void set_primary_parent(CDentry *p) { | |
1050 | assert(parent == 0); | |
1051 | parent = p; | |
1052 | } | |
1053 | void remove_primary_parent(CDentry *dn) { | |
1054 | assert(dn == parent); | |
1055 | parent = 0; | |
1056 | } | |
1057 | void add_remote_parent(CDentry *p); | |
1058 | void remove_remote_parent(CDentry *p); | |
1059 | int num_remote_parents() { | |
1060 | return remote_parents.size(); | |
1061 | } | |
1062 | ||
1063 | void push_projected_parent(CDentry *dn) { | |
1064 | projected_parent.push_back(dn); | |
1065 | } | |
1066 | void pop_projected_parent() { | |
1067 | assert(projected_parent.size()); | |
1068 | parent = projected_parent.front(); | |
1069 | projected_parent.pop_front(); | |
1070 | } | |
1071 | ||
7c673cae | 1072 | public: |
31f18b77 | 1073 | void maybe_export_pin(bool update=false); |
7c673cae FG |
1074 | void set_export_pin(mds_rank_t rank); |
1075 | mds_rank_t get_export_pin(bool inherit=true) const; | |
1076 | bool is_exportable(mds_rank_t dest) const; | |
1077 | ||
1078 | void print(ostream& out) override; | |
1079 | void dump(Formatter *f) const; | |
1080 | ||
1081 | /** | |
1082 | * @defgroup Scrubbing and fsck | |
1083 | * @{ | |
1084 | */ | |
1085 | ||
1086 | /** | |
1087 | * Report the results of validation against a particular inode. | |
1088 | * Each member is a pair of bools. | |
1089 | * <member>.first represents if validation was performed against the member. | |
1090 | * <member.second represents if the member passed validation. | |
1091 | * performed_validation is set to true if the validation was actually | |
1092 | * run. It might not be run if, for instance, the inode is marked as dirty. | |
1093 | * passed_validation is set to true if everything that was checked | |
1094 | * passed its validation. | |
1095 | */ | |
1096 | struct validated_data { | |
1097 | template<typename T>struct member_status { | |
1098 | bool checked; | |
1099 | bool passed; | |
1100 | int ondisk_read_retval; | |
1101 | T ondisk_value; | |
1102 | T memory_value; | |
1103 | std::stringstream error_str; | |
1104 | member_status() : checked(false), passed(false), | |
1105 | ondisk_read_retval(0) {} | |
1106 | }; | |
1107 | ||
1108 | bool performed_validation; | |
1109 | bool passed_validation; | |
1110 | ||
1111 | struct raw_stats_t { | |
1112 | frag_info_t dirstat; | |
1113 | nest_info_t rstat; | |
1114 | }; | |
1115 | ||
1116 | member_status<inode_backtrace_t> backtrace; | |
1117 | member_status<inode_t> inode; | |
1118 | member_status<raw_stats_t> raw_stats; | |
1119 | ||
1120 | validated_data() : performed_validation(false), | |
1121 | passed_validation(false) {} | |
1122 | ||
1123 | void dump(Formatter *f) const; | |
1124 | }; | |
1125 | ||
1126 | /** | |
1127 | * Validate that the on-disk state of an inode matches what | |
1128 | * we expect from our memory state. Currently this checks that: | |
1129 | * 1) The backtrace associated with the file data exists and is correct | |
1130 | * 2) For directories, the actual inode metadata matches our memory state, | |
1131 | * 3) For directories, the rstats match | |
1132 | * | |
1133 | * @param results A freshly-created validated_data struct, with values set | |
1134 | * as described in the struct documentation. | |
1135 | * @param mdr The request to be responeded upon the completion of the | |
1136 | * validation (or NULL) | |
1137 | * @param fin Context to call back on completion (or NULL) | |
1138 | */ | |
1139 | void validate_disk_state(validated_data *results, | |
1140 | MDSInternalContext *fin); | |
1141 | static void dump_validation_results(const validated_data& results, | |
1142 | Formatter *f); | |
1143 | private: | |
1144 | bool _validate_disk_state(class ValidationContinuation *c, | |
1145 | int rval, int stage); | |
1146 | friend class ValidationContinuation; | |
1147 | /** @} Scrubbing and fsck */ | |
1148 | }; | |
1149 | ||
1150 | ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si); | |
1151 | ||
1152 | #undef dout_context | |
1153 | #endif |