]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | ||
17 | #ifndef CEPH_CINODE_H | |
18 | #define CEPH_CINODE_H | |
19 | ||
20 | #include "common/config.h" | |
21 | #include "include/counter.h" | |
22 | #include "include/elist.h" | |
23 | #include "include/types.h" | |
24 | #include "include/lru.h" | |
25 | #include "include/compact_set.h" | |
26 | ||
27 | #include "MDSCacheObject.h" | |
28 | #include "flock.h" | |
29 | ||
30 | #include "CDentry.h" | |
31 | #include "SimpleLock.h" | |
32 | #include "ScatterLock.h" | |
33 | #include "LocalLock.h" | |
34 | #include "Capability.h" | |
35 | #include "SnapRealm.h" | |
36 | #include "Mutation.h" | |
37 | ||
38 | #include <list> | |
39 | #include <set> | |
40 | #include <map> | |
41 | ||
42 | #define dout_context g_ceph_context | |
43 | ||
44 | class Context; | |
45 | class CDentry; | |
46 | class CDir; | |
47 | class Message; | |
48 | class CInode; | |
49 | class MDCache; | |
50 | class LogSegment; | |
51 | struct SnapRealm; | |
52 | class Session; | |
53 | class MClientCaps; | |
54 | struct ObjectOperation; | |
55 | class EMetaBlob; | |
56 | ||
57 | ||
58 | ostream& operator<<(ostream& out, const CInode& in); | |
59 | ||
60 | struct cinode_lock_info_t { | |
61 | int lock; | |
62 | int wr_caps; | |
63 | }; | |
64 | ||
65 | extern cinode_lock_info_t cinode_lock_info[]; | |
66 | extern int num_cinode_locks; | |
67 | ||
68 | ||
69 | /** | |
70 | * Base class for CInode, containing the backing store data and | |
71 | * serialization methods. This exists so that we can read and | |
72 | * handle CInodes from the backing store without hitting all | |
73 | * the business logic in CInode proper. | |
74 | */ | |
75 | class InodeStoreBase { | |
76 | public: | |
77 | inode_t inode; // the inode itself | |
78 | std::string symlink; // symlink dest, if symlink | |
79 | std::map<std::string, bufferptr> xattrs; | |
80 | fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. | |
81 | compact_map<snapid_t, old_inode_t> old_inodes; // key = last, value.first = first | |
82 | snapid_t oldest_snap; | |
83 | damage_flags_t damage_flags; | |
84 | ||
85 | InodeStoreBase() : oldest_snap(CEPH_NOSNAP), damage_flags(0) { } | |
86 | ||
87 | /* Helpers */ | |
88 | bool is_file() const { return inode.is_file(); } | |
89 | bool is_symlink() const { return inode.is_symlink(); } | |
90 | bool is_dir() const { return inode.is_dir(); } | |
91 | static object_t get_object_name(inodeno_t ino, frag_t fg, const char *suffix); | |
92 | ||
93 | /* Full serialization for use in ".inode" root inode objects */ | |
94 | void encode(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const; | |
95 | void decode(bufferlist::iterator &bl, bufferlist& snap_blob); | |
96 | ||
97 | /* Serialization without ENCODE_START/FINISH blocks for use embedded in dentry */ | |
98 | void encode_bare(bufferlist &bl, uint64_t features, const bufferlist *snap_blob=NULL) const; | |
99 | void decode_bare(bufferlist::iterator &bl, bufferlist &snap_blob, __u8 struct_v=5); | |
100 | ||
101 | /* For test/debug output */ | |
102 | void dump(Formatter *f) const; | |
103 | ||
104 | /* For use by offline tools */ | |
105 | __u32 hash_dentry_name(const std::string &dn); | |
106 | frag_t pick_dirfrag(const std::string &dn); | |
107 | }; | |
108 | ||
109 | class InodeStore : public InodeStoreBase { | |
110 | public: | |
111 | bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't | |
112 | // rehydrate it without full MDCache | |
113 | void encode(bufferlist &bl, uint64_t features) const { | |
114 | InodeStoreBase::encode(bl, features, &snap_blob); | |
115 | } | |
116 | void decode(bufferlist::iterator &bl) { | |
117 | InodeStoreBase::decode(bl, snap_blob); | |
118 | } | |
119 | void encode_bare(bufferlist &bl, uint64_t features) const { | |
120 | InodeStoreBase::encode_bare(bl, features, &snap_blob); | |
121 | } | |
122 | void decode_bare(bufferlist::iterator &bl) { | |
123 | InodeStoreBase::decode_bare(bl, snap_blob); | |
124 | } | |
125 | ||
126 | static void generate_test_instances(std::list<InodeStore*>& ls); | |
127 | }; | |
128 | WRITE_CLASS_ENCODER_FEATURES(InodeStore) | |
129 | ||
130 | // cached inode wrapper | |
131 | class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> { | |
132 | public: | |
133 | // -- pins -- | |
134 | static const int PIN_DIRFRAG = -1; | |
135 | static const int PIN_CAPS = 2; // client caps | |
136 | static const int PIN_IMPORTING = -4; // importing | |
137 | static const int PIN_OPENINGDIR = 7; | |
138 | static const int PIN_REMOTEPARENT = 8; | |
139 | static const int PIN_BATCHOPENJOURNAL = 9; | |
140 | static const int PIN_SCATTERED = 10; | |
141 | static const int PIN_STICKYDIRS = 11; | |
142 | //static const int PIN_PURGING = -12; | |
143 | static const int PIN_FREEZING = 13; | |
144 | static const int PIN_FROZEN = 14; | |
145 | static const int PIN_IMPORTINGCAPS = -15; | |
146 | static const int PIN_PASTSNAPPARENT = -16; | |
147 | static const int PIN_OPENINGSNAPPARENTS = 17; | |
148 | static const int PIN_TRUNCATING = 18; | |
149 | static const int PIN_STRAY = 19; // we pin our stray inode while active | |
150 | static const int PIN_NEEDSNAPFLUSH = 20; | |
151 | static const int PIN_DIRTYRSTAT = 21; | |
152 | static const int PIN_EXPORTINGCAPS = 22; | |
153 | static const int PIN_DIRTYPARENT = 23; | |
154 | static const int PIN_DIRWAITER = 24; | |
155 | static const int PIN_SCRUBQUEUE = 25; | |
156 | ||
157 | const char *pin_name(int p) const override { | |
158 | switch (p) { | |
159 | case PIN_DIRFRAG: return "dirfrag"; | |
160 | case PIN_CAPS: return "caps"; | |
161 | case PIN_IMPORTING: return "importing"; | |
162 | case PIN_OPENINGDIR: return "openingdir"; | |
163 | case PIN_REMOTEPARENT: return "remoteparent"; | |
164 | case PIN_BATCHOPENJOURNAL: return "batchopenjournal"; | |
165 | case PIN_SCATTERED: return "scattered"; | |
166 | case PIN_STICKYDIRS: return "stickydirs"; | |
167 | //case PIN_PURGING: return "purging"; | |
168 | case PIN_FREEZING: return "freezing"; | |
169 | case PIN_FROZEN: return "frozen"; | |
170 | case PIN_IMPORTINGCAPS: return "importingcaps"; | |
171 | case PIN_EXPORTINGCAPS: return "exportingcaps"; | |
172 | case PIN_PASTSNAPPARENT: return "pastsnapparent"; | |
173 | case PIN_OPENINGSNAPPARENTS: return "openingsnapparents"; | |
174 | case PIN_TRUNCATING: return "truncating"; | |
175 | case PIN_STRAY: return "stray"; | |
176 | case PIN_NEEDSNAPFLUSH: return "needsnapflush"; | |
177 | case PIN_DIRTYRSTAT: return "dirtyrstat"; | |
178 | case PIN_DIRTYPARENT: return "dirtyparent"; | |
179 | case PIN_DIRWAITER: return "dirwaiter"; | |
180 | case PIN_SCRUBQUEUE: return "scrubqueue"; | |
181 | default: return generic_pin_name(p); | |
182 | } | |
183 | } | |
184 | ||
185 | // -- state -- | |
186 | static const int STATE_EXPORTING = (1<<2); // on nonauth bystander. | |
187 | static const int STATE_OPENINGDIR = (1<<5); | |
188 | static const int STATE_FREEZING = (1<<7); | |
189 | static const int STATE_FROZEN = (1<<8); | |
190 | static const int STATE_AMBIGUOUSAUTH = (1<<9); | |
191 | static const int STATE_EXPORTINGCAPS = (1<<10); | |
192 | static const int STATE_NEEDSRECOVER = (1<<11); | |
193 | static const int STATE_RECOVERING = (1<<12); | |
194 | static const int STATE_PURGING = (1<<13); | |
195 | static const int STATE_DIRTYPARENT = (1<<14); | |
196 | static const int STATE_DIRTYRSTAT = (1<<15); | |
197 | static const int STATE_STRAYPINNED = (1<<16); | |
198 | static const int STATE_FROZENAUTHPIN = (1<<17); | |
199 | static const int STATE_DIRTYPOOL = (1<<18); | |
200 | static const int STATE_REPAIRSTATS = (1<<19); | |
201 | static const int STATE_MISSINGOBJS = (1<<20); | |
202 | static const int STATE_EVALSTALECAPS = (1<<21); | |
31f18b77 | 203 | static const int STATE_QUEUEDEXPORTPIN = (1<<22); |
7c673cae FG |
204 | // orphan inode needs notification of releasing reference |
205 | static const int STATE_ORPHAN = STATE_NOTIFYREF; | |
206 | ||
207 | static const int MASK_STATE_EXPORTED = | |
208 | (STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL); | |
209 | static const int MASK_STATE_EXPORT_KEPT = | |
210 | (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS); | |
211 | ||
212 | // -- waiters -- | |
213 | static const uint64_t WAIT_DIR = (1<<0); | |
214 | static const uint64_t WAIT_FROZEN = (1<<1); | |
215 | static const uint64_t WAIT_TRUNC = (1<<2); | |
216 | static const uint64_t WAIT_FLOCK = (1<<3); | |
217 | ||
218 | static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1); | |
219 | ||
220 | // misc | |
221 | static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export | |
222 | ||
223 | ostream& print_db_line_prefix(ostream& out) override; | |
224 | ||
225 | public: | |
226 | MDCache *mdcache; | |
227 | ||
228 | SnapRealm *snaprealm; | |
229 | SnapRealm *containing_realm; | |
230 | snapid_t first, last; | |
231 | compact_set<snapid_t> dirty_old_rstats; | |
232 | ||
233 | class scrub_stamp_info_t { | |
234 | public: | |
235 | /// version we started our latest scrub (whether in-progress or finished) | |
236 | version_t scrub_start_version; | |
237 | /// time we started our latest scrub (whether in-progress or finished) | |
238 | utime_t scrub_start_stamp; | |
239 | /// version we started our most recent finished scrub | |
240 | version_t last_scrub_version; | |
241 | /// time we started our most recent finished scrub | |
242 | utime_t last_scrub_stamp; | |
243 | scrub_stamp_info_t() : scrub_start_version(0), last_scrub_version(0) {} | |
244 | void reset() { | |
245 | scrub_start_version = 0; | |
246 | scrub_start_stamp = utime_t(); | |
247 | } | |
248 | }; | |
249 | ||
250 | class scrub_info_t : public scrub_stamp_info_t { | |
251 | public: | |
252 | CDentry *scrub_parent; | |
253 | MDSInternalContextBase *on_finish; | |
254 | ||
255 | bool last_scrub_dirty; /// are our stamps dirty with respect to disk state? | |
256 | bool scrub_in_progress; /// are we currently scrubbing? | |
257 | bool children_scrubbed; | |
258 | ||
259 | /// my own (temporary) stamps and versions for each dirfrag we have | |
260 | std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; | |
261 | ||
262 | ScrubHeaderRefConst header; | |
263 | ||
264 | scrub_info_t() : scrub_stamp_info_t(), | |
265 | scrub_parent(NULL), on_finish(NULL), | |
266 | last_scrub_dirty(false), scrub_in_progress(false), | |
267 | children_scrubbed(false) {} | |
268 | }; | |
269 | ||
270 | const scrub_info_t *scrub_info() const{ | |
271 | if (!scrub_infop) | |
272 | scrub_info_create(); | |
273 | return scrub_infop; | |
274 | } | |
275 | ||
276 | bool scrub_is_in_progress() const { | |
277 | return (scrub_infop && scrub_infop->scrub_in_progress); | |
278 | } | |
279 | /** | |
280 | * Start scrubbing on this inode. That could be very short if it's | |
281 | * a file, or take a long time if we're recursively scrubbing a directory. | |
282 | * @pre It is not currently scrubbing | |
283 | * @post it has set up internal scrubbing state | |
284 | * @param scrub_version What version are we scrubbing at (usually, parent | |
285 | * directory's get_projected_version()) | |
286 | */ | |
287 | void scrub_initialize(CDentry *scrub_parent, | |
288 | const ScrubHeaderRefConst& header, | |
289 | MDSInternalContextBase *f); | |
290 | /** | |
291 | * Get the next dirfrag to scrub. Gives you a frag_t in output param which | |
292 | * you must convert to a CDir (and possibly load off disk). | |
293 | * @param dir A pointer to frag_t, will be filled in with the next dirfrag to | |
294 | * scrub if there is one. | |
295 | * @returns 0 on success, you should scrub the passed-out frag_t right now; | |
296 | * ENOENT: There are no remaining dirfrags to scrub | |
297 | * <0 There was some other error (It will return -ENOTDIR if not a directory) | |
298 | */ | |
299 | int scrub_dirfrag_next(frag_t* out_dirfrag); | |
300 | /** | |
301 | * Get the currently scrubbing dirfrags. When returned, the | |
302 | * passed-in list will be filled in with all frag_ts which have | |
303 | * been returned from scrub_dirfrag_next but not sent back | |
304 | * via scrub_dirfrag_finished. | |
305 | */ | |
306 | void scrub_dirfrags_scrubbing(list<frag_t> *out_dirfrags); | |
307 | /** | |
308 | * Report to the CInode that a dirfrag it owns has been scrubbed. Call | |
309 | * this for every frag_t returned from scrub_dirfrag_next(). | |
310 | * @param dirfrag The frag_t that was scrubbed | |
311 | */ | |
312 | void scrub_dirfrag_finished(frag_t dirfrag); | |
313 | /** | |
314 | * Call this once the scrub has been completed, whether it's a full | |
315 | * recursive scrub on a directory or simply the data on a file (or | |
316 | * anything in between). | |
317 | * @param c An out param which is filled in with a Context* that must | |
318 | * be complete()ed. | |
319 | */ | |
320 | void scrub_finished(MDSInternalContextBase **c); | |
321 | /** | |
322 | * Report to the CInode that alldirfrags it owns have been scrubbed. | |
323 | */ | |
324 | void scrub_children_finished() { | |
325 | scrub_infop->children_scrubbed = true; | |
326 | } | |
327 | void scrub_set_finisher(MDSInternalContextBase *c) { | |
328 | assert(!scrub_infop->on_finish); | |
329 | scrub_infop->on_finish = c; | |
330 | } | |
331 | ||
332 | private: | |
333 | /** | |
334 | * Create a scrub_info_t struct for the scrub_infop poitner. | |
335 | */ | |
336 | void scrub_info_create() const; | |
337 | /** | |
338 | * Delete the scrub_info_t struct if it's not got any useful data | |
339 | */ | |
340 | void scrub_maybe_delete_info(); | |
341 | public: | |
342 | ||
343 | bool is_multiversion() const { | |
344 | return snaprealm || // other snaprealms will link to me | |
345 | inode.is_dir() || // links to me in other snaps | |
346 | inode.nlink > 1 || // there are remote links, possibly snapped, that will need to find me | |
347 | !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out. | |
348 | } | |
349 | snapid_t get_oldest_snap(); | |
350 | ||
351 | uint64_t last_journaled; // log offset for the last time i was journaled | |
352 | //loff_t last_open_journaled; // log offset for the last journaled EOpen | |
353 | utime_t last_dirstat_prop; | |
354 | ||
355 | ||
356 | // list item node for when we have unpropagated rstat data | |
357 | elist<CInode*>::item dirty_rstat_item; | |
358 | ||
359 | bool is_dirty_rstat() { | |
360 | return state_test(STATE_DIRTYRSTAT); | |
361 | } | |
362 | void mark_dirty_rstat(); | |
363 | void clear_dirty_rstat(); | |
364 | ||
365 | //bool hack_accessed; | |
366 | //utime_t hack_load_stamp; | |
367 | ||
368 | /** | |
369 | * Projection methods, used to store inode changes until they have been journaled, | |
370 | * at which point they are popped. | |
371 | * Usage: | |
372 | * project_inode as needed. If you're also projecting xattrs, pass | |
373 | * in an xattr map (by pointer), then edit the map. | |
374 | * If you're also projecting the snaprealm, call project_snaprealm after | |
375 | * calling project_inode, and modify the snaprealm as necessary. | |
376 | * | |
377 | * Then, journal. Once journaling is done, pop_and_dirty_projected_inode. | |
378 | * This function will take care of the inode itself, the xattrs, and the snaprealm. | |
379 | */ | |
380 | ||
381 | struct projected_inode_t { | |
382 | inode_t *inode; | |
383 | std::map<std::string,bufferptr> *xattrs; | |
384 | sr_t *snapnode; | |
385 | ||
386 | projected_inode_t() | |
387 | : inode(NULL), xattrs(NULL), snapnode(NULL) {} | |
388 | projected_inode_t(inode_t *in, sr_t *sn) | |
389 | : inode(in), xattrs(NULL), snapnode(sn) {} | |
390 | projected_inode_t(inode_t *in, std::map<std::string, bufferptr> *xp = NULL, sr_t *sn = NULL) | |
391 | : inode(in), xattrs(xp), snapnode(sn) {} | |
392 | }; | |
393 | std::list<projected_inode_t*> projected_nodes; // projected values (only defined while dirty) | |
394 | int num_projected_xattrs; | |
395 | int num_projected_srnodes; | |
396 | ||
397 | inode_t *project_inode(std::map<std::string,bufferptr> *px=0); | |
398 | void pop_and_dirty_projected_inode(LogSegment *ls); | |
399 | ||
400 | projected_inode_t *get_projected_node() { | |
401 | if (projected_nodes.empty()) | |
402 | return NULL; | |
403 | else | |
404 | return projected_nodes.back(); | |
405 | } | |
406 | ||
407 | version_t get_projected_version() const { | |
408 | if (projected_nodes.empty()) | |
409 | return inode.version; | |
410 | else | |
411 | return projected_nodes.back()->inode->version; | |
412 | } | |
413 | bool is_projected() const { | |
414 | return !projected_nodes.empty(); | |
415 | } | |
416 | ||
417 | const inode_t *get_projected_inode() const { | |
418 | if (projected_nodes.empty()) | |
419 | return &inode; | |
420 | else | |
421 | return projected_nodes.back()->inode; | |
422 | } | |
423 | inode_t *get_projected_inode() { | |
424 | if (projected_nodes.empty()) | |
425 | return &inode; | |
426 | else | |
427 | return projected_nodes.back()->inode; | |
428 | } | |
429 | inode_t *get_previous_projected_inode() { | |
430 | assert(!projected_nodes.empty()); | |
431 | std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
432 | ++p; | |
433 | if (p != projected_nodes.rend()) | |
434 | return (*p)->inode; | |
435 | else | |
436 | return &inode; | |
437 | } | |
438 | ||
439 | std::map<std::string,bufferptr> *get_projected_xattrs() { | |
440 | if (num_projected_xattrs > 0) { | |
441 | for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
442 | p != projected_nodes.rend(); | |
443 | ++p) | |
444 | if ((*p)->xattrs) | |
445 | return (*p)->xattrs; | |
446 | } | |
447 | return &xattrs; | |
448 | } | |
449 | std::map<std::string,bufferptr> *get_previous_projected_xattrs() { | |
450 | std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
451 | for (++p; // skip the most recent projected value | |
452 | p != projected_nodes.rend(); | |
453 | ++p) | |
454 | if ((*p)->xattrs) | |
455 | return (*p)->xattrs; | |
456 | return &xattrs; | |
457 | } | |
458 | ||
459 | sr_t *project_snaprealm(snapid_t snapid=0); | |
460 | const sr_t *get_projected_srnode() const { | |
461 | if (num_projected_srnodes > 0) { | |
462 | for (std::list<projected_inode_t*>::const_reverse_iterator p = projected_nodes.rbegin(); | |
463 | p != projected_nodes.rend(); | |
464 | ++p) | |
465 | if ((*p)->snapnode) | |
466 | return (*p)->snapnode; | |
467 | } | |
468 | if (snaprealm) | |
469 | return &snaprealm->srnode; | |
470 | else | |
471 | return NULL; | |
472 | } | |
473 | sr_t *get_projected_srnode() { | |
474 | if (num_projected_srnodes > 0) { | |
475 | for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); | |
476 | p != projected_nodes.rend(); | |
477 | ++p) | |
478 | if ((*p)->snapnode) | |
479 | return (*p)->snapnode; | |
480 | } | |
481 | if (snaprealm) | |
482 | return &snaprealm->srnode; | |
483 | else | |
484 | return NULL; | |
485 | } | |
486 | void project_past_snaprealm_parent(SnapRealm *newparent); | |
487 | ||
488 | private: | |
489 | void pop_projected_snaprealm(sr_t *next_snaprealm); | |
490 | ||
491 | public: | |
492 | old_inode_t& cow_old_inode(snapid_t follows, bool cow_head); | |
493 | void split_old_inode(snapid_t snap); | |
494 | old_inode_t *pick_old_inode(snapid_t last); | |
495 | void pre_cow_old_inode(); | |
496 | void purge_stale_snap_data(const std::set<snapid_t>& snaps); | |
497 | ||
498 | // -- cache infrastructure -- | |
499 | private: | |
500 | compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode | |
501 | int stickydir_ref; | |
502 | scrub_info_t *scrub_infop; | |
503 | ||
504 | public: | |
505 | bool has_dirfrags() { return !dirfrags.empty(); } | |
506 | CDir* get_dirfrag(frag_t fg) { | |
507 | if (dirfrags.count(fg)) { | |
508 | //assert(g_conf->debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME | |
509 | return dirfrags[fg]; | |
510 | } else | |
511 | return NULL; | |
512 | } | |
513 | bool get_dirfrags_under(frag_t fg, std::list<CDir*>& ls); | |
514 | CDir* get_approx_dirfrag(frag_t fg); | |
515 | void get_dirfrags(std::list<CDir*>& ls); | |
516 | void get_nested_dirfrags(std::list<CDir*>& ls); | |
517 | void get_subtree_dirfrags(std::list<CDir*>& ls); | |
518 | CDir *get_or_open_dirfrag(MDCache *mdcache, frag_t fg); | |
519 | CDir *add_dirfrag(CDir *dir); | |
520 | void close_dirfrag(frag_t fg); | |
521 | void close_dirfrags(); | |
522 | bool has_subtree_root_dirfrag(int auth=-1); | |
523 | bool has_subtree_or_exporting_dirfrag(); | |
524 | ||
525 | void force_dirfrags(); | |
526 | void verify_dirfrags(); | |
527 | ||
528 | void get_stickydirs(); | |
529 | void put_stickydirs(); | |
530 | ||
531 | protected: | |
532 | // parent dentries in cache | |
533 | CDentry *parent; // primary link | |
534 | compact_set<CDentry*> remote_parents; // if hard linked | |
535 | ||
536 | std::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc. | |
537 | ||
538 | mds_authority_t inode_auth; | |
539 | ||
540 | // -- distributed state -- | |
541 | protected: | |
542 | // file capabilities | |
543 | std::map<client_t, Capability*> client_caps; // client -> caps | |
544 | compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted | |
545 | int replica_caps_wanted; // [replica] what i've requested from auth | |
546 | ||
547 | public: | |
548 | compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head | |
549 | compact_map<snapid_t, std::set<client_t> > client_need_snapflush; | |
550 | ||
551 | void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); | |
552 | void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); | |
553 | bool split_need_snapflush(CInode *cowin, CInode *in); | |
554 | ||
555 | protected: | |
556 | ||
557 | ceph_lock_state_t *fcntl_locks; | |
558 | ceph_lock_state_t *flock_locks; | |
559 | ||
560 | ceph_lock_state_t *get_fcntl_lock_state() { | |
561 | if (!fcntl_locks) | |
562 | fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL); | |
563 | return fcntl_locks; | |
564 | } | |
565 | void clear_fcntl_lock_state() { | |
566 | delete fcntl_locks; | |
567 | fcntl_locks = NULL; | |
568 | } | |
569 | ceph_lock_state_t *get_flock_lock_state() { | |
570 | if (!flock_locks) | |
571 | flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK); | |
572 | return flock_locks; | |
573 | } | |
574 | void clear_flock_lock_state() { | |
575 | delete flock_locks; | |
576 | flock_locks = NULL; | |
577 | } | |
578 | void clear_file_locks() { | |
579 | clear_fcntl_lock_state(); | |
580 | clear_flock_lock_state(); | |
581 | } | |
582 | void _encode_file_locks(bufferlist& bl) const { | |
583 | bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty(); | |
584 | ::encode(has_fcntl_locks, bl); | |
585 | if (has_fcntl_locks) | |
586 | ::encode(*fcntl_locks, bl); | |
587 | bool has_flock_locks = flock_locks && !flock_locks->empty(); | |
588 | ::encode(has_flock_locks, bl); | |
589 | if (has_flock_locks) | |
590 | ::encode(*flock_locks, bl); | |
591 | } | |
592 | void _decode_file_locks(bufferlist::iterator& p) { | |
593 | bool has_fcntl_locks; | |
594 | ::decode(has_fcntl_locks, p); | |
595 | if (has_fcntl_locks) | |
596 | ::decode(*get_fcntl_lock_state(), p); | |
597 | else | |
598 | clear_fcntl_lock_state(); | |
599 | bool has_flock_locks; | |
600 | ::decode(has_flock_locks, p); | |
601 | if (has_flock_locks) | |
602 | ::decode(*get_flock_lock_state(), p); | |
603 | else | |
604 | clear_flock_lock_state(); | |
605 | } | |
606 | ||
607 | // LogSegment lists i (may) belong to | |
608 | public: | |
609 | elist<CInode*>::item item_dirty; | |
610 | elist<CInode*>::item item_caps; | |
611 | elist<CInode*>::item item_open_file; | |
612 | elist<CInode*>::item item_dirty_parent; | |
613 | elist<CInode*>::item item_dirty_dirfrag_dir; | |
614 | elist<CInode*>::item item_dirty_dirfrag_nest; | |
615 | elist<CInode*>::item item_dirty_dirfrag_dirfragtree; | |
616 | elist<CInode*>::item item_scrub; | |
617 | ||
618 | public: | |
619 | int auth_pin_freeze_allowance; | |
620 | ||
621 | inode_load_vec_t pop; | |
622 | ||
623 | // friends | |
624 | friend class Server; | |
625 | friend class Locker; | |
626 | friend class Migrator; | |
627 | friend class MDCache; | |
628 | friend class StrayManager; | |
629 | friend class CDir; | |
630 | friend class CInodeExport; | |
7c673cae FG |
631 | |
632 | // --------------------------- | |
633 | CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP) : | |
634 | mdcache(c), | |
635 | snaprealm(0), containing_realm(0), | |
636 | first(f), last(l), | |
637 | last_journaled(0), //last_open_journaled(0), | |
638 | //hack_accessed(true), | |
639 | num_projected_xattrs(0), | |
640 | num_projected_srnodes(0), | |
641 | stickydir_ref(0), | |
642 | scrub_infop(NULL), | |
643 | parent(0), | |
644 | inode_auth(CDIR_AUTH_DEFAULT), | |
645 | replica_caps_wanted(0), | |
646 | fcntl_locks(0), flock_locks(0), | |
647 | item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this), | |
648 | item_dirty_dirfrag_dir(this), | |
649 | item_dirty_dirfrag_nest(this), | |
650 | item_dirty_dirfrag_dirfragtree(this), | |
651 | auth_pin_freeze_allowance(0), | |
652 | pop(ceph_clock_now()), | |
653 | versionlock(this, &versionlock_type), | |
654 | authlock(this, &authlock_type), | |
655 | linklock(this, &linklock_type), | |
656 | dirfragtreelock(this, &dirfragtreelock_type), | |
657 | filelock(this, &filelock_type), | |
658 | xattrlock(this, &xattrlock_type), | |
659 | snaplock(this, &snaplock_type), | |
660 | nestlock(this, &nestlock_type), | |
661 | flocklock(this, &flocklock_type), | |
662 | policylock(this, &policylock_type), | |
663 | loner_cap(-1), want_loner_cap(-1) | |
664 | { | |
665 | state = 0; | |
666 | if (auth) state_set(STATE_AUTH); | |
667 | } | |
668 | ~CInode() override { | |
669 | close_dirfrags(); | |
670 | close_snaprealm(); | |
671 | clear_file_locks(); | |
672 | assert(num_projected_xattrs == 0); | |
673 | assert(num_projected_srnodes == 0); | |
674 | } | |
675 | ||
676 | ||
677 | // -- accessors -- | |
678 | bool is_root() const { return inode.ino == MDS_INO_ROOT; } | |
679 | bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); } | |
680 | mds_rank_t get_stray_owner() const { | |
681 | return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino); | |
682 | } | |
683 | bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); } | |
684 | bool is_base() const { return is_root() || is_mdsdir(); } | |
685 | bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; } | |
686 | bool is_normal() const { return !(is_base() || is_system() || is_stray()); } | |
687 | ||
688 | bool is_head() const { return last == CEPH_NOSNAP; } | |
689 | ||
690 | // note: this overloads MDSCacheObject | |
691 | bool is_ambiguous_auth() const { | |
692 | return state_test(STATE_AMBIGUOUSAUTH) || | |
693 | MDSCacheObject::is_ambiguous_auth(); | |
694 | } | |
695 | void set_ambiguous_auth() { | |
696 | state_set(STATE_AMBIGUOUSAUTH); | |
697 | } | |
698 | void clear_ambiguous_auth(std::list<MDSInternalContextBase*>& finished); | |
699 | void clear_ambiguous_auth(); | |
700 | ||
701 | inodeno_t ino() const { return inode.ino; } | |
702 | vinodeno_t vino() const { return vinodeno_t(inode.ino, last); } | |
703 | int d_type() const { return IFTODT(inode.mode); } | |
704 | ||
705 | inode_t& get_inode() { return inode; } | |
706 | CDentry* get_parent_dn() { return parent; } | |
707 | const CDentry* get_parent_dn() const { return parent; } | |
708 | const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; } | |
709 | CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; } | |
710 | CDir *get_parent_dir(); | |
711 | const CDir *get_projected_parent_dir() const; | |
712 | CDir *get_projected_parent_dir(); | |
713 | CInode *get_parent_inode(); | |
714 | ||
715 | bool is_lt(const MDSCacheObject *r) const override { | |
716 | const CInode *o = static_cast<const CInode*>(r); | |
717 | return ino() < o->ino() || | |
718 | (ino() == o->ino() && last < o->last); | |
719 | } | |
720 | ||
721 | // -- misc -- | |
722 | bool is_projected_ancestor_of(CInode *other); | |
723 | ||
724 | void make_path_string(std::string& s, bool projected=false, const CDentry *use_parent=NULL) const; | |
725 | void make_path(filepath& s, bool projected=false) const; | |
726 | void name_stray_dentry(std::string& dname); | |
727 | ||
728 | // -- dirtyness -- | |
729 | version_t get_version() const { return inode.version; } | |
730 | ||
731 | version_t pre_dirty(); | |
732 | void _mark_dirty(LogSegment *ls); | |
733 | void mark_dirty(version_t projected_dirv, LogSegment *ls); | |
734 | void mark_clean(); | |
735 | ||
736 | void store(MDSInternalContextBase *fin); | |
737 | void _stored(int r, version_t cv, Context *fin); | |
738 | /** | |
739 | * Flush a CInode to disk. This includes the backtrace, the parent | |
740 | * directory's link, and the Inode object itself (if a base directory). | |
741 | * @pre is_auth() on both the inode and its containing directory | |
742 | * @pre can_auth_pin() | |
743 | * @param fin The Context to call when the flush is completed. | |
744 | */ | |
745 | void flush(MDSInternalContextBase *fin); | |
746 | void fetch(MDSInternalContextBase *fin); | |
747 | void _fetched(bufferlist& bl, bufferlist& bl2, Context *fin); | |
748 | ||
749 | ||
750 | void build_backtrace(int64_t pool, inode_backtrace_t& bt); | |
751 | void store_backtrace(MDSInternalContextBase *fin, int op_prio=-1); | |
752 | void _stored_backtrace(int r, version_t v, Context *fin); | |
753 | void fetch_backtrace(Context *fin, bufferlist *backtrace); | |
754 | protected: | |
755 | /** | |
756 | * Return the pool ID where we currently write backtraces for | |
757 | * this inode (in addition to inode.old_pools) | |
758 | * | |
759 | * @returns a pool ID >=0 | |
760 | */ | |
761 | int64_t get_backtrace_pool() const; | |
762 | public: | |
763 | void _mark_dirty_parent(LogSegment *ls, bool dirty_pool=false); | |
764 | void clear_dirty_parent(); | |
765 | void verify_diri_backtrace(bufferlist &bl, int err); | |
766 | bool is_dirty_parent() { return state_test(STATE_DIRTYPARENT); } | |
767 | bool is_dirty_pool() { return state_test(STATE_DIRTYPOOL); } | |
768 | ||
769 | void encode_snap_blob(bufferlist &bl); | |
770 | void decode_snap_blob(bufferlist &bl); | |
771 | void encode_store(bufferlist& bl, uint64_t features); | |
772 | void decode_store(bufferlist::iterator& bl); | |
773 | ||
774 | void encode_replica(mds_rank_t rep, bufferlist& bl, uint64_t features) { | |
775 | assert(is_auth()); | |
776 | ||
777 | // relax locks? | |
778 | if (!is_replicated()) | |
779 | replicate_relax_locks(); | |
780 | ||
781 | __u32 nonce = add_replica(rep); | |
782 | ::encode(nonce, bl); | |
783 | ||
784 | _encode_base(bl, features); | |
785 | _encode_locks_state_for_replica(bl); | |
786 | } | |
787 | void decode_replica(bufferlist::iterator& p, bool is_new) { | |
788 | __u32 nonce; | |
789 | ::decode(nonce, p); | |
790 | replica_nonce = nonce; | |
791 | ||
792 | _decode_base(p); | |
793 | _decode_locks_state(p, is_new); | |
794 | } | |
795 | ||
796 | // -- waiting -- | |
797 | protected: | |
798 | compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir; | |
799 | public: | |
800 | void add_dir_waiter(frag_t fg, MDSInternalContextBase *c); | |
801 | void take_dir_waiting(frag_t fg, std::list<MDSInternalContextBase*>& ls); | |
802 | bool is_waiting_for_dir(frag_t fg) { | |
803 | return waiting_on_dir.count(fg); | |
804 | } | |
805 | void add_waiter(uint64_t tag, MDSInternalContextBase *c) override; | |
806 | void take_waiting(uint64_t tag, std::list<MDSInternalContextBase*>& ls) override; | |
807 | ||
808 | // -- encode/decode helpers -- | |
809 | void _encode_base(bufferlist& bl, uint64_t features); | |
810 | void _decode_base(bufferlist::iterator& p); | |
811 | void _encode_locks_full(bufferlist& bl); | |
812 | void _decode_locks_full(bufferlist::iterator& p); | |
813 | void _encode_locks_state_for_replica(bufferlist& bl); | |
814 | void _encode_locks_state_for_rejoin(bufferlist& bl, int rep); | |
815 | void _decode_locks_state(bufferlist::iterator& p, bool is_new); | |
816 | void _decode_locks_rejoin(bufferlist::iterator& p, std::list<MDSInternalContextBase*>& waiters, | |
817 | std::list<SimpleLock*>& eval_locks); | |
818 | ||
819 | // -- import/export -- | |
820 | void encode_export(bufferlist& bl); | |
821 | void finish_export(utime_t now); | |
822 | void abort_export() { | |
823 | put(PIN_TEMPEXPORTING); | |
824 | assert(state_test(STATE_EXPORTINGCAPS)); | |
825 | state_clear(STATE_EXPORTINGCAPS); | |
826 | put(PIN_EXPORTINGCAPS); | |
827 | } | |
828 | void decode_import(bufferlist::iterator& p, LogSegment *ls); | |
829 | ||
830 | ||
831 | // for giving to clients | |
832 | int encode_inodestat(bufferlist& bl, Session *session, SnapRealm *realm, | |
833 | snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0, | |
834 | int getattr_wants=0); | |
835 | void encode_cap_message(MClientCaps *m, Capability *cap); | |
836 | ||
837 | ||
838 | // -- locks -- | |
839 | public: | |
840 | static LockType versionlock_type; | |
841 | static LockType authlock_type; | |
842 | static LockType linklock_type; | |
843 | static LockType dirfragtreelock_type; | |
844 | static LockType filelock_type; | |
845 | static LockType xattrlock_type; | |
846 | static LockType snaplock_type; | |
847 | static LockType nestlock_type; | |
848 | static LockType flocklock_type; | |
849 | static LockType policylock_type; | |
850 | ||
851 | LocalLock versionlock; | |
852 | SimpleLock authlock; | |
853 | SimpleLock linklock; | |
854 | ScatterLock dirfragtreelock; | |
855 | ScatterLock filelock; | |
856 | SimpleLock xattrlock; | |
857 | SimpleLock snaplock; | |
858 | ScatterLock nestlock; | |
859 | SimpleLock flocklock; | |
860 | SimpleLock policylock; | |
861 | ||
862 | SimpleLock* get_lock(int type) override { | |
863 | switch (type) { | |
864 | case CEPH_LOCK_IFILE: return &filelock; | |
865 | case CEPH_LOCK_IAUTH: return &authlock; | |
866 | case CEPH_LOCK_ILINK: return &linklock; | |
867 | case CEPH_LOCK_IDFT: return &dirfragtreelock; | |
868 | case CEPH_LOCK_IXATTR: return &xattrlock; | |
869 | case CEPH_LOCK_ISNAP: return &snaplock; | |
870 | case CEPH_LOCK_INEST: return &nestlock; | |
871 | case CEPH_LOCK_IFLOCK: return &flocklock; | |
872 | case CEPH_LOCK_IPOLICY: return &policylock; | |
873 | } | |
874 | return 0; | |
875 | } | |
876 | ||
877 | void set_object_info(MDSCacheObjectInfo &info) override; | |
878 | void encode_lock_state(int type, bufferlist& bl) override; | |
879 | void decode_lock_state(int type, bufferlist& bl) override; | |
880 | ||
881 | void _finish_frag_update(CDir *dir, MutationRef& mut); | |
882 | ||
883 | void clear_dirty_scattered(int type) override; | |
884 | bool is_dirty_scattered(); | |
885 | void clear_scatter_dirty(); // on rejoin ack | |
886 | ||
887 | void start_scatter(ScatterLock *lock); | |
888 | void finish_scatter_update(ScatterLock *lock, CDir *dir, | |
889 | version_t inode_version, version_t dir_accounted_version); | |
890 | void finish_scatter_gather_update(int type); | |
891 | void finish_scatter_gather_update_accounted(int type, MutationRef& mut, EMetaBlob *metablob); | |
892 | ||
893 | // -- snap -- | |
894 | void open_snaprealm(bool no_split=false); | |
895 | void close_snaprealm(bool no_join=false); | |
896 | SnapRealm *find_snaprealm() const; | |
897 | void encode_snap(bufferlist& bl); | |
898 | void decode_snap(bufferlist::iterator& p); | |
899 | ||
900 | // -- caps -- (new) | |
901 | // client caps | |
902 | client_t loner_cap, want_loner_cap; | |
903 | ||
904 | client_t get_loner() const { return loner_cap; } | |
905 | client_t get_wanted_loner() const { return want_loner_cap; } | |
906 | ||
907 | // this is the loner state our locks should aim for | |
908 | client_t get_target_loner() const { | |
909 | if (loner_cap == want_loner_cap) | |
910 | return loner_cap; | |
911 | else | |
912 | return -1; | |
913 | } | |
914 | ||
915 | client_t calc_ideal_loner(); | |
916 | client_t choose_ideal_loner(); | |
917 | bool try_set_loner(); | |
918 | void set_loner_cap(client_t l); | |
919 | bool try_drop_loner(); | |
920 | ||
921 | // choose new lock state during recovery, based on issued caps | |
922 | void choose_lock_state(SimpleLock *lock, int allissued); | |
923 | void choose_lock_states(int dirty_caps); | |
924 | ||
925 | int count_nonstale_caps() { | |
926 | int n = 0; | |
927 | for (std::map<client_t,Capability*>::iterator it = client_caps.begin(); | |
928 | it != client_caps.end(); | |
929 | ++it) | |
930 | if (!it->second->is_stale()) | |
931 | n++; | |
932 | return n; | |
933 | } | |
934 | bool multiple_nonstale_caps() { | |
935 | int n = 0; | |
936 | for (std::map<client_t,Capability*>::iterator it = client_caps.begin(); | |
937 | it != client_caps.end(); | |
938 | ++it) | |
939 | if (!it->second->is_stale()) { | |
940 | if (n) | |
941 | return true; | |
942 | n++; | |
943 | } | |
944 | return false; | |
945 | } | |
946 | ||
947 | bool is_any_caps() { return !client_caps.empty(); } | |
948 | bool is_any_nonstale_caps() { return count_nonstale_caps(); } | |
949 | ||
950 | const compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; } | |
951 | compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; } | |
952 | ||
953 | const std::map<client_t,Capability*>& get_client_caps() const { return client_caps; } | |
954 | Capability *get_client_cap(client_t client) { | |
955 | auto client_caps_entry = client_caps.find(client); | |
956 | if (client_caps_entry != client_caps.end()) | |
957 | return client_caps_entry->second; | |
958 | return 0; | |
959 | } | |
960 | int get_client_cap_pending(client_t client) const { | |
961 | auto client_caps_entry = client_caps.find(client); | |
962 | if (client_caps_entry != client_caps.end()) { | |
963 | return client_caps_entry->second->pending(); | |
964 | } else { | |
965 | return 0; | |
966 | } | |
967 | } | |
968 | ||
969 | Capability *add_client_cap(client_t client, Session *session, SnapRealm *conrealm=0); | |
970 | void remove_client_cap(client_t client); | |
971 | void move_to_realm(SnapRealm *realm); | |
972 | ||
973 | Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session); | |
974 | void clear_client_caps_after_export(); | |
975 | void export_client_caps(std::map<client_t,Capability::Export>& cl); | |
976 | ||
977 | // caps allowed | |
978 | int get_caps_liked() const; | |
979 | int get_caps_allowed_ever() const; | |
980 | int get_caps_allowed_by_type(int type) const; | |
981 | int get_caps_careful() const; | |
982 | int get_xlocker_mask(client_t client) const; | |
983 | int get_caps_allowed_for_client(Session *s, inode_t *file_i) const; | |
984 | ||
985 | // caps issued, wanted | |
986 | int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0, | |
987 | int shift = 0, int mask = -1); | |
988 | bool is_any_caps_wanted() const; | |
989 | int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = -1) const; | |
990 | bool issued_caps_need_gather(SimpleLock *lock); | |
991 | void replicate_relax_locks(); | |
992 | ||
993 | // -- authority -- | |
994 | mds_authority_t authority() const override; | |
995 | ||
996 | // -- auth pins -- | |
997 | void adjust_nested_auth_pins(int a, void *by); | |
998 | bool can_auth_pin() const override; | |
999 | void auth_pin(void *by) override; | |
1000 | void auth_unpin(void *by) override; | |
1001 | ||
1002 | // -- freeze -- | |
1003 | bool is_freezing_inode() const { return state_test(STATE_FREEZING); } | |
1004 | bool is_frozen_inode() const { return state_test(STATE_FROZEN); } | |
1005 | bool is_frozen_auth_pin() const { return state_test(STATE_FROZENAUTHPIN); } | |
1006 | bool is_frozen() const override; | |
1007 | bool is_frozen_dir() const; | |
1008 | bool is_freezing() const override; | |
1009 | ||
1010 | /* Freeze the inode. auth_pin_allowance lets the caller account for any | |
1011 | * auth_pins it is itself holding/responsible for. */ | |
1012 | bool freeze_inode(int auth_pin_allowance=0); | |
1013 | void unfreeze_inode(std::list<MDSInternalContextBase*>& finished); | |
1014 | void unfreeze_inode(); | |
1015 | ||
1016 | void freeze_auth_pin(); | |
1017 | void unfreeze_auth_pin(); | |
1018 | ||
1019 | // -- reference counting -- | |
1020 | void bad_put(int by) override { | |
1021 | generic_dout(0) << " bad put " << *this << " by " << by << " " << pin_name(by) << " was " << ref | |
1022 | #ifdef MDS_REF_SET | |
1023 | << " (" << ref_map << ")" | |
1024 | #endif | |
1025 | << dendl; | |
1026 | #ifdef MDS_REF_SET | |
1027 | assert(ref_map[by] > 0); | |
1028 | #endif | |
1029 | assert(ref > 0); | |
1030 | } | |
1031 | void bad_get(int by) override { | |
1032 | generic_dout(0) << " bad get " << *this << " by " << by << " " << pin_name(by) << " was " << ref | |
1033 | #ifdef MDS_REF_SET | |
1034 | << " (" << ref_map << ")" | |
1035 | #endif | |
1036 | << dendl; | |
1037 | #ifdef MDS_REF_SET | |
1038 | assert(ref_map[by] >= 0); | |
1039 | #endif | |
1040 | } | |
1041 | void first_get() override; | |
1042 | void last_put() override; | |
1043 | void _put() override; | |
1044 | ||
1045 | ||
1046 | // -- hierarchy stuff -- | |
1047 | public: | |
1048 | void set_primary_parent(CDentry *p) { | |
1049 | assert(parent == 0); | |
1050 | parent = p; | |
1051 | } | |
1052 | void remove_primary_parent(CDentry *dn) { | |
1053 | assert(dn == parent); | |
1054 | parent = 0; | |
1055 | } | |
1056 | void add_remote_parent(CDentry *p); | |
1057 | void remove_remote_parent(CDentry *p); | |
1058 | int num_remote_parents() { | |
1059 | return remote_parents.size(); | |
1060 | } | |
1061 | ||
1062 | void push_projected_parent(CDentry *dn) { | |
1063 | projected_parent.push_back(dn); | |
1064 | } | |
1065 | void pop_projected_parent() { | |
1066 | assert(projected_parent.size()); | |
1067 | parent = projected_parent.front(); | |
1068 | projected_parent.pop_front(); | |
1069 | } | |
1070 | ||
7c673cae | 1071 | public: |
31f18b77 | 1072 | void maybe_export_pin(bool update=false); |
7c673cae FG |
1073 | void set_export_pin(mds_rank_t rank); |
1074 | mds_rank_t get_export_pin(bool inherit=true) const; | |
1075 | bool is_exportable(mds_rank_t dest) const; | |
1076 | ||
1077 | void print(ostream& out) override; | |
1078 | void dump(Formatter *f) const; | |
1079 | ||
1080 | /** | |
1081 | * @defgroup Scrubbing and fsck | |
1082 | * @{ | |
1083 | */ | |
1084 | ||
1085 | /** | |
1086 | * Report the results of validation against a particular inode. | |
1087 | * Each member is a pair of bools. | |
1088 | * <member>.first represents if validation was performed against the member. | |
1089 | * <member.second represents if the member passed validation. | |
1090 | * performed_validation is set to true if the validation was actually | |
1091 | * run. It might not be run if, for instance, the inode is marked as dirty. | |
1092 | * passed_validation is set to true if everything that was checked | |
1093 | * passed its validation. | |
1094 | */ | |
1095 | struct validated_data { | |
1096 | template<typename T>struct member_status { | |
1097 | bool checked; | |
1098 | bool passed; | |
1099 | int ondisk_read_retval; | |
1100 | T ondisk_value; | |
1101 | T memory_value; | |
1102 | std::stringstream error_str; | |
1103 | member_status() : checked(false), passed(false), | |
1104 | ondisk_read_retval(0) {} | |
1105 | }; | |
1106 | ||
1107 | bool performed_validation; | |
1108 | bool passed_validation; | |
1109 | ||
1110 | struct raw_stats_t { | |
1111 | frag_info_t dirstat; | |
1112 | nest_info_t rstat; | |
1113 | }; | |
1114 | ||
1115 | member_status<inode_backtrace_t> backtrace; | |
1116 | member_status<inode_t> inode; | |
1117 | member_status<raw_stats_t> raw_stats; | |
1118 | ||
1119 | validated_data() : performed_validation(false), | |
1120 | passed_validation(false) {} | |
1121 | ||
1122 | void dump(Formatter *f) const; | |
1123 | }; | |
1124 | ||
1125 | /** | |
1126 | * Validate that the on-disk state of an inode matches what | |
1127 | * we expect from our memory state. Currently this checks that: | |
1128 | * 1) The backtrace associated with the file data exists and is correct | |
1129 | * 2) For directories, the actual inode metadata matches our memory state, | |
1130 | * 3) For directories, the rstats match | |
1131 | * | |
1132 | * @param results A freshly-created validated_data struct, with values set | |
1133 | * as described in the struct documentation. | |
1134 | * @param mdr The request to be responeded upon the completion of the | |
1135 | * validation (or NULL) | |
1136 | * @param fin Context to call back on completion (or NULL) | |
1137 | */ | |
1138 | void validate_disk_state(validated_data *results, | |
1139 | MDSInternalContext *fin); | |
1140 | static void dump_validation_results(const validated_data& results, | |
1141 | Formatter *f); | |
1142 | private: | |
1143 | bool _validate_disk_state(class ValidationContinuation *c, | |
1144 | int rval, int stage); | |
1145 | friend class ValidationContinuation; | |
1146 | /** @} Scrubbing and fsck */ | |
1147 | }; | |
1148 | ||
1149 | ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si); | |
1150 | ||
1151 | #undef dout_context | |
1152 | #endif |