]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/ScrubStack.cc
update sources to v12.1.3
[ceph.git] / ceph / src / mds / ScrubStack.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <iostream>
16
17 #include "ScrubStack.h"
18 #include "common/Finisher.h"
19 #include "mds/MDSRank.h"
20 #include "mds/MDCache.h"
21 #include "mds/MDSContinuation.h"
22
23 #define dout_context g_ceph_context
24 #define dout_subsys ceph_subsys_mds
25 #undef dout_prefix
26 #define dout_prefix _prefix(_dout, scrubstack->mdcache->mds)
27 static ostream& _prefix(std::ostream *_dout, MDSRank *mds) {
28 return *_dout << "mds." << mds->get_nodeid() << ".scrubstack ";
29 }
30
31 void ScrubStack::push_inode(CInode *in)
32 {
33 dout(20) << "pushing " << *in << " on top of ScrubStack" << dendl;
34 if (!in->item_scrub.is_on_list()) {
35 in->get(CInode::PIN_SCRUBQUEUE);
36 stack_size++;
37 }
38 inode_stack.push_front(&in->item_scrub);
39 }
40
41 void ScrubStack::push_inode_bottom(CInode *in)
42 {
43 dout(20) << "pushing " << *in << " on bottom of ScrubStack" << dendl;
44 if (!in->item_scrub.is_on_list()) {
45 in->get(CInode::PIN_SCRUBQUEUE);
46 stack_size++;
47 }
48 inode_stack.push_back(&in->item_scrub);
49 }
50
51 void ScrubStack::pop_inode(CInode *in)
52 {
53 dout(20) << "popping " << *in
54 << " off of ScrubStack" << dendl;
55 assert(in->item_scrub.is_on_list());
56 in->put(CInode::PIN_SCRUBQUEUE);
57 in->item_scrub.remove_myself();
58 stack_size--;
59 }
60
61 void ScrubStack::_enqueue_inode(CInode *in, CDentry *parent,
62 const ScrubHeaderRefConst& header,
63 MDSInternalContextBase *on_finish, bool top)
64 {
65 dout(10) << __func__ << " with {" << *in << "}"
66 << ", on_finish=" << on_finish << ", top=" << top << dendl;
67 assert(mdcache->mds->mds_lock.is_locked_by_me());
68 in->scrub_initialize(parent, header, on_finish);
69 if (top)
70 push_inode(in);
71 else
72 push_inode_bottom(in);
73 }
74
75 void ScrubStack::enqueue_inode(CInode *in, const ScrubHeaderRefConst& header,
76 MDSInternalContextBase *on_finish, bool top)
77 {
78 _enqueue_inode(in, NULL, header, on_finish, top);
79 kick_off_scrubs();
80 }
81
82 void ScrubStack::kick_off_scrubs()
83 {
84 dout(20) << __func__ << " entering with " << scrubs_in_progress << " in "
85 "progress and " << stack_size << " in the stack" << dendl;
86 bool can_continue = true;
87 elist<CInode*>::iterator i = inode_stack.begin();
88 while (g_conf->mds_max_scrub_ops_in_progress > scrubs_in_progress &&
89 can_continue && !i.end()) {
90 CInode *curi = *i;
91 ++i; // we have our reference, push iterator forward
92
93 dout(20) << __func__ << " examining " << *curi << dendl;
94
95 if (!curi->is_dir()) {
96 // it's a regular file, symlink, or hard link
97 pop_inode(curi); // we only touch it this once, so remove from stack
98
99 if (!curi->scrub_info()->on_finish) {
100 scrubs_in_progress++;
101 curi->scrub_set_finisher(&scrub_kick);
102 }
103 scrub_file_inode(curi);
104 can_continue = true;
105 } else {
106 bool completed; // it's done, so pop it off the stack
107 bool terminal; // not done, but we can start ops on other directories
108 bool progress; // it added new dentries to the top of the stack
109 scrub_dir_inode(curi, &progress, &terminal, &completed);
110 if (completed) {
111 dout(20) << __func__ << " dir completed" << dendl;
112 pop_inode(curi);
113 } else if (progress) {
114 dout(20) << __func__ << " dir progressed" << dendl;
115 // we added new stuff to top of stack, so reset ourselves there
116 i = inode_stack.begin();
117 } else {
118 dout(20) << __func__ << " dir no-op" << dendl;
119 }
120
121 can_continue = progress || terminal || completed;
122 }
123 }
124 }
125
126 void ScrubStack::scrub_dir_inode(CInode *in,
127 bool *added_children,
128 bool *terminal,
129 bool *done)
130 {
131 dout(10) << __func__ << *in << dendl;
132
133 *added_children = false;
134 bool all_frags_terminal = true;
135 bool all_frags_done = true;
136
137 const ScrubHeaderRefConst& header = in->scrub_info()->header;
138
139 if (header->get_recursive()) {
140 list<frag_t> scrubbing_frags;
141 list<CDir*> scrubbing_cdirs;
142 in->scrub_dirfrags_scrubbing(&scrubbing_frags);
143 dout(20) << __func__ << " iterating over " << scrubbing_frags.size()
144 << " scrubbing frags" << dendl;
145 for (list<frag_t>::iterator i = scrubbing_frags.begin();
146 i != scrubbing_frags.end();
147 ++i) {
148 // turn frags into CDir *
149 CDir *dir = in->get_dirfrag(*i);
150 if (dir) {
151 scrubbing_cdirs.push_back(dir);
152 dout(25) << __func__ << " got CDir " << *dir << " presently scrubbing" << dendl;
153 } else {
154 in->scrub_dirfrag_finished(*i);
155 dout(25) << __func__ << " missing dirfrag " << *i << " skip scrubbing" << dendl;
156 }
157 }
158
159 dout(20) << __func__ << " consuming from " << scrubbing_cdirs.size()
160 << " scrubbing cdirs" << dendl;
161
162 list<CDir*>::iterator i = scrubbing_cdirs.begin();
163 while (g_conf->mds_max_scrub_ops_in_progress > scrubs_in_progress) {
164 // select next CDir
165 CDir *cur_dir = NULL;
166 if (i != scrubbing_cdirs.end()) {
167 cur_dir = *i;
168 ++i;
169 dout(20) << __func__ << " got cur_dir = " << *cur_dir << dendl;
170 } else {
171 bool ready = get_next_cdir(in, &cur_dir);
172 dout(20) << __func__ << " get_next_cdir ready=" << ready << dendl;
173
174 if (ready && cur_dir) {
175 scrubbing_cdirs.push_back(cur_dir);
176 } else if (!ready) {
177 // We are waiting for load of a frag
178 all_frags_done = false;
179 all_frags_terminal = false;
180 break;
181 } else {
182 // Finished with all frags
183 break;
184 }
185 }
186 // scrub that CDir
187 bool frag_added_children = false;
188 bool frag_terminal = true;
189 bool frag_done = false;
190 scrub_dirfrag(cur_dir, header,
191 &frag_added_children, &frag_terminal, &frag_done);
192 if (frag_done) {
193 cur_dir->inode->scrub_dirfrag_finished(cur_dir->frag);
194 }
195 *added_children |= frag_added_children;
196 all_frags_terminal = all_frags_terminal && frag_terminal;
197 all_frags_done = all_frags_done && frag_done;
198 }
199
200 dout(20) << "finished looping; all_frags_terminal=" << all_frags_terminal
201 << ", all_frags_done=" << all_frags_done << dendl;
202 } else {
203 dout(20) << "!scrub_recursive" << dendl;
204 }
205
206 if (all_frags_done) {
207 assert (!*added_children); // can't do this if children are still pending
208
209 // OK, so now I can... fire off a validate on the dir inode, and
210 // when it completes, come through here again, noticing that we've
211 // set a flag to indicate the validate happened, and
212 scrub_dir_inode_final(in);
213 }
214
215 *terminal = all_frags_terminal;
216 *done = all_frags_done;
217 dout(10) << __func__ << " is exiting " << *terminal << " " << *done << dendl;
218 return;
219 }
220
221 bool ScrubStack::get_next_cdir(CInode *in, CDir **new_dir)
222 {
223 dout(20) << __func__ << " on " << *in << dendl;
224 frag_t next_frag;
225 int r = in->scrub_dirfrag_next(&next_frag);
226 assert (r >= 0);
227
228 if (r == 0) {
229 // we got a frag to scrub, otherwise it would be ENOENT
230 dout(25) << "looking up new frag " << next_frag << dendl;
231 CDir *next_dir = in->get_or_open_dirfrag(mdcache, next_frag);
232 if (!next_dir->is_complete()) {
233 scrubs_in_progress++;
234 next_dir->fetch(&scrub_kick);
235 dout(25) << "fetching frag from RADOS" << dendl;
236 return false;
237 }
238 *new_dir = next_dir;
239 dout(25) << "returning dir " << *new_dir << dendl;
240 return true;
241 }
242 assert(r == ENOENT);
243 // there are no dirfrags left
244 *new_dir = NULL;
245 return true;
246 }
247
248 class C_InodeValidated : public MDSInternalContext
249 {
250 public:
251 ScrubStack *stack;
252 CInode::validated_data result;
253 CInode *target;
254
255 C_InodeValidated(MDSRank *mds, ScrubStack *stack_, CInode *target_)
256 : MDSInternalContext(mds), stack(stack_), target(target_)
257 {}
258
259 void finish(int r) override
260 {
261 stack->_validate_inode_done(target, r, result);
262 }
263 };
264
265
266 void ScrubStack::scrub_dir_inode_final(CInode *in)
267 {
268 dout(20) << __func__ << *in << dendl;
269
270 // Two passes through this function. First one triggers inode validation,
271 // second one sets finally_done
272 // FIXME: kind of overloading scrub_in_progress here, using it while
273 // dentry is still on stack to indicate that we have finished
274 // doing our validate_disk_state on the inode
275 // FIXME: the magic-constructing scrub_info() is going to leave
276 // an unneeded scrub_infop lying around here
277 if (!in->scrub_info()->children_scrubbed) {
278 if (!in->scrub_info()->on_finish) {
279 scrubs_in_progress++;
280 in->scrub_set_finisher(&scrub_kick);
281 }
282
283 in->scrub_children_finished();
284 C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in);
285 in->validate_disk_state(&fin->result, fin);
286 }
287
288 return;
289 }
290
291 void ScrubStack::scrub_dirfrag(CDir *dir,
292 const ScrubHeaderRefConst& header,
293 bool *added_children, bool *is_terminal,
294 bool *done)
295 {
296 assert(dir != NULL);
297
298 dout(20) << __func__ << " on " << *dir << dendl;
299 *added_children = false;
300 *is_terminal = false;
301 *done = false;
302
303
304 if (!dir->scrub_info()->directory_scrubbing) {
305 // Get the frag complete before calling
306 // scrub initialize, so that it can populate its lists
307 // of dentries.
308 if (!dir->is_complete()) {
309 scrubs_in_progress++;
310 dir->fetch(&scrub_kick);
311 return;
312 }
313
314 dir->scrub_initialize(header);
315 }
316
317 int r = 0;
318 while(r == 0) {
319 CDentry *dn = NULL;
320 scrubs_in_progress++;
321 r = dir->scrub_dentry_next(&scrub_kick, &dn);
322 if (r != EAGAIN) {
323 scrubs_in_progress--;
324 }
325
326 if (r == EAGAIN) {
327 // Drop out, CDir fetcher will call back our kicker context
328 dout(20) << __func__ << " waiting for fetch on " << *dir << dendl;
329 return;
330 }
331
332 if (r == ENOENT) {
333 // Nothing left to scrub, are we done?
334 std::list<CDentry*> scrubbing;
335 dir->scrub_dentries_scrubbing(&scrubbing);
336 if (scrubbing.empty()) {
337 dout(20) << __func__ << " dirfrag done: " << *dir << dendl;
338 // FIXME: greg: What's the diff meant to be between done and terminal
339 dir->scrub_finished();
340 *done = true;
341 *is_terminal = true;
342 } else {
343 dout(20) << __func__ << " " << scrubbing.size() << " dentries still "
344 "scrubbing in " << *dir << dendl;
345 }
346 return;
347 }
348
349 // scrub_dentry_next defined to only give EAGAIN, ENOENT, 0 -- we should
350 // never get random IO errors here.
351 assert(r == 0);
352
353 _enqueue_inode(dn->get_projected_inode(), dn, header, NULL, true);
354
355 *added_children = true;
356 }
357 }
358
359 void ScrubStack::scrub_file_inode(CInode *in)
360 {
361 C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in);
362 // At this stage the DN is already past scrub_initialize, so
363 // it's in the cache, it has PIN_SCRUBQUEUE and it is authpinned
364 in->validate_disk_state(&fin->result, fin);
365 }
366
367 void ScrubStack::_validate_inode_done(CInode *in, int r,
368 const CInode::validated_data &result)
369 {
370 LogChannelRef clog = mdcache->mds->clog;
371 const ScrubHeaderRefConst header = in->scrub_info()->header;
372
373 std::string path;
374 if (!result.passed_validation) {
375 // Build path string for use in messages
376 in->make_path_string(path, true);
377 }
378
379 if (result.backtrace.checked && !result.backtrace.passed) {
380 // Record backtrace fails as remote linkage damage, as
381 // we may not be able to resolve hard links to this inode
382 mdcache->mds->damage_table.notify_remote_damaged(in->inode.ino, path);
383 } else if (result.inode.checked && !result.inode.passed) {
384 // Record damaged inode structures as damaged dentries as
385 // that is where they are stored
386 auto parent = in->get_projected_parent_dn();
387 if (parent) {
388 auto dir = parent->get_dir();
389 mdcache->mds->damage_table.notify_dentry(
390 dir->inode->ino(), dir->frag, parent->last, parent->name, path);
391 }
392 }
393
394 // Inform the cluster log if we found an error
395 if (!result.passed_validation) {
396 clog->warn() << "Scrub error on inode " << in->ino()
397 << " (" << path << ") see " << g_conf->name
398 << " log and `damage ls` output for details";
399
400 // Put the verbose JSON output into the MDS log for later inspection
401 JSONFormatter f;
402 result.dump(&f);
403 std::ostringstream out;
404 f.flush(out);
405 derr << __func__ << " scrub error on inode " << *in << ": " << out.str()
406 << dendl;
407 } else {
408 dout(10) << __func__ << " scrub passed on inode " << *in << dendl;
409 }
410
411 MDSInternalContextBase *c = NULL;
412 in->scrub_finished(&c);
413
414 if (!header->get_recursive() && in == header->get_origin()) {
415 if (r >= 0) { // we got into the scrubbing dump it
416 result.dump(&(header->get_formatter()));
417 } else { // we failed the lookup or something; dump ourselves
418 header->get_formatter().open_object_section("results");
419 header->get_formatter().dump_int("return_code", r);
420 header->get_formatter().close_section(); // results
421 }
422 }
423 if (c) {
424 finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0);
425 }
426 }
427
428 ScrubStack::C_KickOffScrubs::C_KickOffScrubs(MDCache *mdcache, ScrubStack *s)
429 : MDSInternalContext(mdcache->mds), stack(s) { }