]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <iostream> | |
16 | ||
17 | #include "ScrubStack.h" | |
18 | #include "common/Finisher.h" | |
19 | #include "mds/MDSRank.h" | |
20 | #include "mds/MDCache.h" | |
21 | #include "mds/MDSContinuation.h" | |
22 | ||
23 | #define dout_context g_ceph_context | |
24 | #define dout_subsys ceph_subsys_mds | |
25 | #undef dout_prefix | |
26 | #define dout_prefix _prefix(_dout, scrubstack->mdcache->mds) | |
27 | static ostream& _prefix(std::ostream *_dout, MDSRank *mds) { | |
28 | return *_dout << "mds." << mds->get_nodeid() << ".scrubstack "; | |
29 | } | |
30 | ||
31 | void ScrubStack::push_inode(CInode *in) | |
32 | { | |
33 | dout(20) << "pushing " << *in << " on top of ScrubStack" << dendl; | |
34 | if (!in->item_scrub.is_on_list()) { | |
35 | in->get(CInode::PIN_SCRUBQUEUE); | |
36 | stack_size++; | |
37 | } | |
38 | inode_stack.push_front(&in->item_scrub); | |
39 | } | |
40 | ||
41 | void ScrubStack::push_inode_bottom(CInode *in) | |
42 | { | |
43 | dout(20) << "pushing " << *in << " on bottom of ScrubStack" << dendl; | |
44 | if (!in->item_scrub.is_on_list()) { | |
45 | in->get(CInode::PIN_SCRUBQUEUE); | |
46 | stack_size++; | |
47 | } | |
48 | inode_stack.push_back(&in->item_scrub); | |
49 | } | |
50 | ||
51 | void ScrubStack::pop_inode(CInode *in) | |
52 | { | |
53 | dout(20) << "popping " << *in | |
54 | << " off of ScrubStack" << dendl; | |
55 | assert(in->item_scrub.is_on_list()); | |
56 | in->put(CInode::PIN_SCRUBQUEUE); | |
57 | in->item_scrub.remove_myself(); | |
58 | stack_size--; | |
59 | } | |
60 | ||
61 | void ScrubStack::_enqueue_inode(CInode *in, CDentry *parent, | |
62 | const ScrubHeaderRefConst& header, | |
63 | MDSInternalContextBase *on_finish, bool top) | |
64 | { | |
65 | dout(10) << __func__ << " with {" << *in << "}" | |
66 | << ", on_finish=" << on_finish << ", top=" << top << dendl; | |
67 | assert(mdcache->mds->mds_lock.is_locked_by_me()); | |
68 | in->scrub_initialize(parent, header, on_finish); | |
69 | if (top) | |
70 | push_inode(in); | |
71 | else | |
72 | push_inode_bottom(in); | |
73 | } | |
74 | ||
75 | void ScrubStack::enqueue_inode(CInode *in, const ScrubHeaderRefConst& header, | |
76 | MDSInternalContextBase *on_finish, bool top) | |
77 | { | |
78 | _enqueue_inode(in, NULL, header, on_finish, top); | |
79 | kick_off_scrubs(); | |
80 | } | |
81 | ||
82 | void ScrubStack::kick_off_scrubs() | |
83 | { | |
84 | dout(20) << __func__ << " entering with " << scrubs_in_progress << " in " | |
85 | "progress and " << stack_size << " in the stack" << dendl; | |
86 | bool can_continue = true; | |
87 | elist<CInode*>::iterator i = inode_stack.begin(); | |
88 | while (g_conf->mds_max_scrub_ops_in_progress > scrubs_in_progress && | |
89 | can_continue && !i.end()) { | |
90 | CInode *curi = *i; | |
91 | ++i; // we have our reference, push iterator forward | |
92 | ||
93 | dout(20) << __func__ << " examining " << *curi << dendl; | |
94 | ||
95 | if (!curi->is_dir()) { | |
96 | // it's a regular file, symlink, or hard link | |
97 | pop_inode(curi); // we only touch it this once, so remove from stack | |
98 | ||
99 | if (!curi->scrub_info()->on_finish) { | |
100 | scrubs_in_progress++; | |
101 | curi->scrub_set_finisher(&scrub_kick); | |
102 | } | |
103 | scrub_file_inode(curi); | |
104 | can_continue = true; | |
105 | } else { | |
106 | bool completed; // it's done, so pop it off the stack | |
107 | bool terminal; // not done, but we can start ops on other directories | |
108 | bool progress; // it added new dentries to the top of the stack | |
109 | scrub_dir_inode(curi, &progress, &terminal, &completed); | |
110 | if (completed) { | |
111 | dout(20) << __func__ << " dir completed" << dendl; | |
112 | pop_inode(curi); | |
113 | } else if (progress) { | |
114 | dout(20) << __func__ << " dir progressed" << dendl; | |
115 | // we added new stuff to top of stack, so reset ourselves there | |
116 | i = inode_stack.begin(); | |
117 | } else { | |
118 | dout(20) << __func__ << " dir no-op" << dendl; | |
119 | } | |
120 | ||
121 | can_continue = progress || terminal || completed; | |
122 | } | |
123 | } | |
124 | } | |
125 | ||
126 | void ScrubStack::scrub_dir_inode(CInode *in, | |
127 | bool *added_children, | |
128 | bool *terminal, | |
129 | bool *done) | |
130 | { | |
131 | dout(10) << __func__ << *in << dendl; | |
132 | ||
133 | *added_children = false; | |
134 | bool all_frags_terminal = true; | |
135 | bool all_frags_done = true; | |
136 | ||
137 | const ScrubHeaderRefConst& header = in->scrub_info()->header; | |
138 | ||
139 | if (header->get_recursive()) { | |
140 | list<frag_t> scrubbing_frags; | |
141 | list<CDir*> scrubbing_cdirs; | |
142 | in->scrub_dirfrags_scrubbing(&scrubbing_frags); | |
143 | dout(20) << __func__ << " iterating over " << scrubbing_frags.size() | |
144 | << " scrubbing frags" << dendl; | |
145 | for (list<frag_t>::iterator i = scrubbing_frags.begin(); | |
146 | i != scrubbing_frags.end(); | |
147 | ++i) { | |
148 | // turn frags into CDir * | |
149 | CDir *dir = in->get_dirfrag(*i); | |
150 | if (dir) { | |
151 | scrubbing_cdirs.push_back(dir); | |
152 | dout(25) << __func__ << " got CDir " << *dir << " presently scrubbing" << dendl; | |
153 | } else { | |
154 | in->scrub_dirfrag_finished(*i); | |
155 | dout(25) << __func__ << " missing dirfrag " << *i << " skip scrubbing" << dendl; | |
156 | } | |
157 | } | |
158 | ||
159 | dout(20) << __func__ << " consuming from " << scrubbing_cdirs.size() | |
160 | << " scrubbing cdirs" << dendl; | |
161 | ||
162 | list<CDir*>::iterator i = scrubbing_cdirs.begin(); | |
163 | while (g_conf->mds_max_scrub_ops_in_progress > scrubs_in_progress) { | |
164 | // select next CDir | |
165 | CDir *cur_dir = NULL; | |
166 | if (i != scrubbing_cdirs.end()) { | |
167 | cur_dir = *i; | |
168 | ++i; | |
169 | dout(20) << __func__ << " got cur_dir = " << *cur_dir << dendl; | |
170 | } else { | |
171 | bool ready = get_next_cdir(in, &cur_dir); | |
172 | dout(20) << __func__ << " get_next_cdir ready=" << ready << dendl; | |
173 | ||
174 | if (ready && cur_dir) { | |
175 | scrubbing_cdirs.push_back(cur_dir); | |
176 | } else if (!ready) { | |
177 | // We are waiting for load of a frag | |
178 | all_frags_done = false; | |
179 | all_frags_terminal = false; | |
180 | break; | |
181 | } else { | |
182 | // Finished with all frags | |
183 | break; | |
184 | } | |
185 | } | |
186 | // scrub that CDir | |
187 | bool frag_added_children = false; | |
188 | bool frag_terminal = true; | |
189 | bool frag_done = false; | |
190 | scrub_dirfrag(cur_dir, header, | |
191 | &frag_added_children, &frag_terminal, &frag_done); | |
192 | if (frag_done) { | |
193 | cur_dir->inode->scrub_dirfrag_finished(cur_dir->frag); | |
194 | } | |
195 | *added_children |= frag_added_children; | |
196 | all_frags_terminal = all_frags_terminal && frag_terminal; | |
197 | all_frags_done = all_frags_done && frag_done; | |
198 | } | |
199 | ||
200 | dout(20) << "finished looping; all_frags_terminal=" << all_frags_terminal | |
201 | << ", all_frags_done=" << all_frags_done << dendl; | |
202 | } else { | |
203 | dout(20) << "!scrub_recursive" << dendl; | |
204 | } | |
205 | ||
206 | if (all_frags_done) { | |
207 | assert (!*added_children); // can't do this if children are still pending | |
208 | ||
209 | // OK, so now I can... fire off a validate on the dir inode, and | |
210 | // when it completes, come through here again, noticing that we've | |
211 | // set a flag to indicate the validate happened, and | |
212 | scrub_dir_inode_final(in); | |
213 | } | |
214 | ||
215 | *terminal = all_frags_terminal; | |
216 | *done = all_frags_done; | |
217 | dout(10) << __func__ << " is exiting " << *terminal << " " << *done << dendl; | |
218 | return; | |
219 | } | |
220 | ||
221 | bool ScrubStack::get_next_cdir(CInode *in, CDir **new_dir) | |
222 | { | |
223 | dout(20) << __func__ << " on " << *in << dendl; | |
224 | frag_t next_frag; | |
225 | int r = in->scrub_dirfrag_next(&next_frag); | |
226 | assert (r >= 0); | |
227 | ||
228 | if (r == 0) { | |
229 | // we got a frag to scrub, otherwise it would be ENOENT | |
230 | dout(25) << "looking up new frag " << next_frag << dendl; | |
231 | CDir *next_dir = in->get_or_open_dirfrag(mdcache, next_frag); | |
232 | if (!next_dir->is_complete()) { | |
233 | scrubs_in_progress++; | |
234 | next_dir->fetch(&scrub_kick); | |
235 | dout(25) << "fetching frag from RADOS" << dendl; | |
236 | return false; | |
237 | } | |
238 | *new_dir = next_dir; | |
239 | dout(25) << "returning dir " << *new_dir << dendl; | |
240 | return true; | |
241 | } | |
242 | assert(r == ENOENT); | |
243 | // there are no dirfrags left | |
244 | *new_dir = NULL; | |
245 | return true; | |
246 | } | |
247 | ||
248 | class C_InodeValidated : public MDSInternalContext | |
249 | { | |
250 | public: | |
251 | ScrubStack *stack; | |
252 | CInode::validated_data result; | |
253 | CInode *target; | |
254 | ||
255 | C_InodeValidated(MDSRank *mds, ScrubStack *stack_, CInode *target_) | |
256 | : MDSInternalContext(mds), stack(stack_), target(target_) | |
257 | {} | |
258 | ||
259 | void finish(int r) override | |
260 | { | |
261 | stack->_validate_inode_done(target, r, result); | |
262 | } | |
263 | }; | |
264 | ||
265 | ||
266 | void ScrubStack::scrub_dir_inode_final(CInode *in) | |
267 | { | |
268 | dout(20) << __func__ << *in << dendl; | |
269 | ||
270 | // Two passes through this function. First one triggers inode validation, | |
271 | // second one sets finally_done | |
272 | // FIXME: kind of overloading scrub_in_progress here, using it while | |
273 | // dentry is still on stack to indicate that we have finished | |
274 | // doing our validate_disk_state on the inode | |
275 | // FIXME: the magic-constructing scrub_info() is going to leave | |
276 | // an unneeded scrub_infop lying around here | |
277 | if (!in->scrub_info()->children_scrubbed) { | |
278 | if (!in->scrub_info()->on_finish) { | |
279 | scrubs_in_progress++; | |
280 | in->scrub_set_finisher(&scrub_kick); | |
281 | } | |
282 | ||
283 | in->scrub_children_finished(); | |
284 | C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in); | |
285 | in->validate_disk_state(&fin->result, fin); | |
286 | } | |
287 | ||
288 | return; | |
289 | } | |
290 | ||
291 | void ScrubStack::scrub_dirfrag(CDir *dir, | |
292 | const ScrubHeaderRefConst& header, | |
293 | bool *added_children, bool *is_terminal, | |
294 | bool *done) | |
295 | { | |
296 | assert(dir != NULL); | |
297 | ||
298 | dout(20) << __func__ << " on " << *dir << dendl; | |
299 | *added_children = false; | |
300 | *is_terminal = false; | |
301 | *done = false; | |
302 | ||
303 | ||
304 | if (!dir->scrub_info()->directory_scrubbing) { | |
305 | // Get the frag complete before calling | |
306 | // scrub initialize, so that it can populate its lists | |
307 | // of dentries. | |
308 | if (!dir->is_complete()) { | |
309 | scrubs_in_progress++; | |
310 | dir->fetch(&scrub_kick); | |
311 | return; | |
312 | } | |
313 | ||
314 | dir->scrub_initialize(header); | |
315 | } | |
316 | ||
317 | int r = 0; | |
318 | while(r == 0) { | |
319 | CDentry *dn = NULL; | |
320 | scrubs_in_progress++; | |
321 | r = dir->scrub_dentry_next(&scrub_kick, &dn); | |
322 | if (r != EAGAIN) { | |
323 | scrubs_in_progress--; | |
324 | } | |
325 | ||
326 | if (r == EAGAIN) { | |
327 | // Drop out, CDir fetcher will call back our kicker context | |
328 | dout(20) << __func__ << " waiting for fetch on " << *dir << dendl; | |
329 | return; | |
330 | } | |
331 | ||
332 | if (r == ENOENT) { | |
333 | // Nothing left to scrub, are we done? | |
334 | std::list<CDentry*> scrubbing; | |
335 | dir->scrub_dentries_scrubbing(&scrubbing); | |
336 | if (scrubbing.empty()) { | |
337 | dout(20) << __func__ << " dirfrag done: " << *dir << dendl; | |
338 | // FIXME: greg: What's the diff meant to be between done and terminal | |
339 | dir->scrub_finished(); | |
340 | *done = true; | |
341 | *is_terminal = true; | |
342 | } else { | |
343 | dout(20) << __func__ << " " << scrubbing.size() << " dentries still " | |
344 | "scrubbing in " << *dir << dendl; | |
345 | } | |
346 | return; | |
347 | } | |
348 | ||
349 | // scrub_dentry_next defined to only give EAGAIN, ENOENT, 0 -- we should | |
350 | // never get random IO errors here. | |
351 | assert(r == 0); | |
352 | ||
353 | _enqueue_inode(dn->get_projected_inode(), dn, header, NULL, true); | |
354 | ||
355 | *added_children = true; | |
356 | } | |
357 | } | |
358 | ||
359 | void ScrubStack::scrub_file_inode(CInode *in) | |
360 | { | |
361 | C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in); | |
362 | // At this stage the DN is already past scrub_initialize, so | |
363 | // it's in the cache, it has PIN_SCRUBQUEUE and it is authpinned | |
364 | in->validate_disk_state(&fin->result, fin); | |
365 | } | |
366 | ||
367 | void ScrubStack::_validate_inode_done(CInode *in, int r, | |
368 | const CInode::validated_data &result) | |
369 | { | |
370 | LogChannelRef clog = mdcache->mds->clog; | |
371 | const ScrubHeaderRefConst header = in->scrub_info()->header; | |
372 | ||
373 | std::string path; | |
374 | if (!result.passed_validation) { | |
375 | // Build path string for use in messages | |
376 | in->make_path_string(path, true); | |
377 | } | |
378 | ||
379 | if (result.backtrace.checked && !result.backtrace.passed) { | |
380 | // Record backtrace fails as remote linkage damage, as | |
381 | // we may not be able to resolve hard links to this inode | |
382 | mdcache->mds->damage_table.notify_remote_damaged(in->inode.ino, path); | |
383 | } else if (result.inode.checked && !result.inode.passed) { | |
384 | // Record damaged inode structures as damaged dentries as | |
385 | // that is where they are stored | |
386 | auto parent = in->get_projected_parent_dn(); | |
387 | if (parent) { | |
388 | auto dir = parent->get_dir(); | |
389 | mdcache->mds->damage_table.notify_dentry( | |
390 | dir->inode->ino(), dir->frag, parent->last, parent->name, path); | |
391 | } | |
392 | } | |
393 | ||
394 | // Inform the cluster log if we found an error | |
395 | if (!result.passed_validation) { | |
d2e6a577 | 396 | clog->warn() << "Scrub error on inode " << in->ino() |
7c673cae | 397 | << " (" << path << ") see " << g_conf->name |
d2e6a577 | 398 | << " log and `damage ls` output for details"; |
7c673cae FG |
399 | |
400 | // Put the verbose JSON output into the MDS log for later inspection | |
401 | JSONFormatter f; | |
402 | result.dump(&f); | |
403 | std::ostringstream out; | |
404 | f.flush(out); | |
405 | derr << __func__ << " scrub error on inode " << *in << ": " << out.str() | |
406 | << dendl; | |
407 | } else { | |
408 | dout(10) << __func__ << " scrub passed on inode " << *in << dendl; | |
409 | } | |
410 | ||
411 | MDSInternalContextBase *c = NULL; | |
412 | in->scrub_finished(&c); | |
413 | ||
414 | if (!header->get_recursive() && in == header->get_origin()) { | |
415 | if (r >= 0) { // we got into the scrubbing dump it | |
416 | result.dump(&(header->get_formatter())); | |
417 | } else { // we failed the lookup or something; dump ourselves | |
418 | header->get_formatter().open_object_section("results"); | |
419 | header->get_formatter().dump_int("return_code", r); | |
420 | header->get_formatter().close_section(); // results | |
421 | } | |
422 | } | |
423 | if (c) { | |
424 | finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0); | |
425 | } | |
426 | } | |
427 | ||
428 | ScrubStack::C_KickOffScrubs::C_KickOffScrubs(MDCache *mdcache, ScrubStack *s) | |
429 | : MDSInternalContext(mdcache->mds), stack(s) { } |