]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/ScrubStack.cc
update sources to v12.2.3
[ceph.git] / ceph / src / mds / ScrubStack.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <iostream>
16
17#include "ScrubStack.h"
18#include "common/Finisher.h"
19#include "mds/MDSRank.h"
20#include "mds/MDCache.h"
21#include "mds/MDSContinuation.h"
22
23#define dout_context g_ceph_context
24#define dout_subsys ceph_subsys_mds
25#undef dout_prefix
26#define dout_prefix _prefix(_dout, scrubstack->mdcache->mds)
27static ostream& _prefix(std::ostream *_dout, MDSRank *mds) {
28 return *_dout << "mds." << mds->get_nodeid() << ".scrubstack ";
29}
30
31void ScrubStack::push_inode(CInode *in)
32{
33 dout(20) << "pushing " << *in << " on top of ScrubStack" << dendl;
34 if (!in->item_scrub.is_on_list()) {
35 in->get(CInode::PIN_SCRUBQUEUE);
36 stack_size++;
37 }
38 inode_stack.push_front(&in->item_scrub);
39}
40
41void ScrubStack::push_inode_bottom(CInode *in)
42{
43 dout(20) << "pushing " << *in << " on bottom of ScrubStack" << dendl;
44 if (!in->item_scrub.is_on_list()) {
45 in->get(CInode::PIN_SCRUBQUEUE);
46 stack_size++;
47 }
48 inode_stack.push_back(&in->item_scrub);
49}
50
51void ScrubStack::pop_inode(CInode *in)
52{
53 dout(20) << "popping " << *in
54 << " off of ScrubStack" << dendl;
55 assert(in->item_scrub.is_on_list());
56 in->put(CInode::PIN_SCRUBQUEUE);
57 in->item_scrub.remove_myself();
58 stack_size--;
59}
60
61void ScrubStack::_enqueue_inode(CInode *in, CDentry *parent,
b32b8144 62 ScrubHeaderRef& header,
7c673cae
FG
63 MDSInternalContextBase *on_finish, bool top)
64{
65 dout(10) << __func__ << " with {" << *in << "}"
66 << ", on_finish=" << on_finish << ", top=" << top << dendl;
67 assert(mdcache->mds->mds_lock.is_locked_by_me());
68 in->scrub_initialize(parent, header, on_finish);
69 if (top)
70 push_inode(in);
71 else
72 push_inode_bottom(in);
73}
74
b32b8144 75void ScrubStack::enqueue_inode(CInode *in, ScrubHeaderRef& header,
7c673cae
FG
76 MDSInternalContextBase *on_finish, bool top)
77{
78 _enqueue_inode(in, NULL, header, on_finish, top);
79 kick_off_scrubs();
80}
81
82void ScrubStack::kick_off_scrubs()
83{
84 dout(20) << __func__ << " entering with " << scrubs_in_progress << " in "
85 "progress and " << stack_size << " in the stack" << dendl;
86 bool can_continue = true;
87 elist<CInode*>::iterator i = inode_stack.begin();
88 while (g_conf->mds_max_scrub_ops_in_progress > scrubs_in_progress &&
89 can_continue && !i.end()) {
90 CInode *curi = *i;
91 ++i; // we have our reference, push iterator forward
92
93 dout(20) << __func__ << " examining " << *curi << dendl;
94
95 if (!curi->is_dir()) {
96 // it's a regular file, symlink, or hard link
97 pop_inode(curi); // we only touch it this once, so remove from stack
98
99 if (!curi->scrub_info()->on_finish) {
100 scrubs_in_progress++;
101 curi->scrub_set_finisher(&scrub_kick);
102 }
103 scrub_file_inode(curi);
104 can_continue = true;
105 } else {
106 bool completed; // it's done, so pop it off the stack
107 bool terminal; // not done, but we can start ops on other directories
108 bool progress; // it added new dentries to the top of the stack
109 scrub_dir_inode(curi, &progress, &terminal, &completed);
110 if (completed) {
111 dout(20) << __func__ << " dir completed" << dendl;
112 pop_inode(curi);
113 } else if (progress) {
114 dout(20) << __func__ << " dir progressed" << dendl;
115 // we added new stuff to top of stack, so reset ourselves there
116 i = inode_stack.begin();
117 } else {
118 dout(20) << __func__ << " dir no-op" << dendl;
119 }
120
121 can_continue = progress || terminal || completed;
122 }
123 }
124}
125
126void ScrubStack::scrub_dir_inode(CInode *in,
127 bool *added_children,
128 bool *terminal,
129 bool *done)
130{
131 dout(10) << __func__ << *in << dendl;
132
133 *added_children = false;
134 bool all_frags_terminal = true;
135 bool all_frags_done = true;
136
b32b8144
FG
137 ScrubHeaderRef header = in->get_scrub_header();
138 assert(header != nullptr);
7c673cae
FG
139
140 if (header->get_recursive()) {
141 list<frag_t> scrubbing_frags;
142 list<CDir*> scrubbing_cdirs;
143 in->scrub_dirfrags_scrubbing(&scrubbing_frags);
144 dout(20) << __func__ << " iterating over " << scrubbing_frags.size()
145 << " scrubbing frags" << dendl;
146 for (list<frag_t>::iterator i = scrubbing_frags.begin();
147 i != scrubbing_frags.end();
148 ++i) {
149 // turn frags into CDir *
150 CDir *dir = in->get_dirfrag(*i);
151 if (dir) {
152 scrubbing_cdirs.push_back(dir);
153 dout(25) << __func__ << " got CDir " << *dir << " presently scrubbing" << dendl;
154 } else {
155 in->scrub_dirfrag_finished(*i);
156 dout(25) << __func__ << " missing dirfrag " << *i << " skip scrubbing" << dendl;
157 }
158 }
159
160 dout(20) << __func__ << " consuming from " << scrubbing_cdirs.size()
161 << " scrubbing cdirs" << dendl;
162
163 list<CDir*>::iterator i = scrubbing_cdirs.begin();
164 while (g_conf->mds_max_scrub_ops_in_progress > scrubs_in_progress) {
165 // select next CDir
166 CDir *cur_dir = NULL;
167 if (i != scrubbing_cdirs.end()) {
168 cur_dir = *i;
169 ++i;
170 dout(20) << __func__ << " got cur_dir = " << *cur_dir << dendl;
171 } else {
172 bool ready = get_next_cdir(in, &cur_dir);
173 dout(20) << __func__ << " get_next_cdir ready=" << ready << dendl;
174
175 if (ready && cur_dir) {
176 scrubbing_cdirs.push_back(cur_dir);
177 } else if (!ready) {
178 // We are waiting for load of a frag
179 all_frags_done = false;
180 all_frags_terminal = false;
181 break;
182 } else {
183 // Finished with all frags
184 break;
185 }
186 }
187 // scrub that CDir
188 bool frag_added_children = false;
189 bool frag_terminal = true;
190 bool frag_done = false;
191 scrub_dirfrag(cur_dir, header,
192 &frag_added_children, &frag_terminal, &frag_done);
193 if (frag_done) {
194 cur_dir->inode->scrub_dirfrag_finished(cur_dir->frag);
195 }
196 *added_children |= frag_added_children;
197 all_frags_terminal = all_frags_terminal && frag_terminal;
198 all_frags_done = all_frags_done && frag_done;
199 }
200
201 dout(20) << "finished looping; all_frags_terminal=" << all_frags_terminal
202 << ", all_frags_done=" << all_frags_done << dendl;
203 } else {
204 dout(20) << "!scrub_recursive" << dendl;
205 }
206
207 if (all_frags_done) {
208 assert (!*added_children); // can't do this if children are still pending
209
210 // OK, so now I can... fire off a validate on the dir inode, and
211 // when it completes, come through here again, noticing that we've
212 // set a flag to indicate the validate happened, and
213 scrub_dir_inode_final(in);
214 }
215
216 *terminal = all_frags_terminal;
217 *done = all_frags_done;
218 dout(10) << __func__ << " is exiting " << *terminal << " " << *done << dendl;
219 return;
220}
221
222bool ScrubStack::get_next_cdir(CInode *in, CDir **new_dir)
223{
224 dout(20) << __func__ << " on " << *in << dendl;
225 frag_t next_frag;
226 int r = in->scrub_dirfrag_next(&next_frag);
227 assert (r >= 0);
228
229 if (r == 0) {
230 // we got a frag to scrub, otherwise it would be ENOENT
231 dout(25) << "looking up new frag " << next_frag << dendl;
232 CDir *next_dir = in->get_or_open_dirfrag(mdcache, next_frag);
233 if (!next_dir->is_complete()) {
234 scrubs_in_progress++;
235 next_dir->fetch(&scrub_kick);
236 dout(25) << "fetching frag from RADOS" << dendl;
237 return false;
238 }
239 *new_dir = next_dir;
240 dout(25) << "returning dir " << *new_dir << dendl;
241 return true;
242 }
243 assert(r == ENOENT);
244 // there are no dirfrags left
245 *new_dir = NULL;
246 return true;
247}
248
249class C_InodeValidated : public MDSInternalContext
250{
251 public:
252 ScrubStack *stack;
253 CInode::validated_data result;
254 CInode *target;
255
256 C_InodeValidated(MDSRank *mds, ScrubStack *stack_, CInode *target_)
257 : MDSInternalContext(mds), stack(stack_), target(target_)
258 {}
259
260 void finish(int r) override
261 {
262 stack->_validate_inode_done(target, r, result);
263 }
264};
265
266
267void ScrubStack::scrub_dir_inode_final(CInode *in)
268{
269 dout(20) << __func__ << *in << dendl;
270
271 // Two passes through this function. First one triggers inode validation,
272 // second one sets finally_done
273 // FIXME: kind of overloading scrub_in_progress here, using it while
274 // dentry is still on stack to indicate that we have finished
275 // doing our validate_disk_state on the inode
276 // FIXME: the magic-constructing scrub_info() is going to leave
277 // an unneeded scrub_infop lying around here
278 if (!in->scrub_info()->children_scrubbed) {
279 if (!in->scrub_info()->on_finish) {
280 scrubs_in_progress++;
281 in->scrub_set_finisher(&scrub_kick);
282 }
283
284 in->scrub_children_finished();
285 C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in);
286 in->validate_disk_state(&fin->result, fin);
287 }
288
289 return;
290}
291
292void ScrubStack::scrub_dirfrag(CDir *dir,
b32b8144 293 ScrubHeaderRef& header,
7c673cae
FG
294 bool *added_children, bool *is_terminal,
295 bool *done)
296{
297 assert(dir != NULL);
298
299 dout(20) << __func__ << " on " << *dir << dendl;
300 *added_children = false;
301 *is_terminal = false;
302 *done = false;
303
304
305 if (!dir->scrub_info()->directory_scrubbing) {
306 // Get the frag complete before calling
307 // scrub initialize, so that it can populate its lists
308 // of dentries.
309 if (!dir->is_complete()) {
310 scrubs_in_progress++;
311 dir->fetch(&scrub_kick);
312 return;
313 }
314
315 dir->scrub_initialize(header);
316 }
317
318 int r = 0;
319 while(r == 0) {
320 CDentry *dn = NULL;
321 scrubs_in_progress++;
322 r = dir->scrub_dentry_next(&scrub_kick, &dn);
323 if (r != EAGAIN) {
324 scrubs_in_progress--;
325 }
326
327 if (r == EAGAIN) {
328 // Drop out, CDir fetcher will call back our kicker context
329 dout(20) << __func__ << " waiting for fetch on " << *dir << dendl;
330 return;
331 }
332
333 if (r == ENOENT) {
334 // Nothing left to scrub, are we done?
335 std::list<CDentry*> scrubbing;
336 dir->scrub_dentries_scrubbing(&scrubbing);
337 if (scrubbing.empty()) {
338 dout(20) << __func__ << " dirfrag done: " << *dir << dendl;
339 // FIXME: greg: What's the diff meant to be between done and terminal
340 dir->scrub_finished();
341 *done = true;
342 *is_terminal = true;
343 } else {
344 dout(20) << __func__ << " " << scrubbing.size() << " dentries still "
345 "scrubbing in " << *dir << dendl;
346 }
347 return;
348 }
349
350 // scrub_dentry_next defined to only give EAGAIN, ENOENT, 0 -- we should
351 // never get random IO errors here.
352 assert(r == 0);
353
354 _enqueue_inode(dn->get_projected_inode(), dn, header, NULL, true);
355
356 *added_children = true;
357 }
358}
359
360void ScrubStack::scrub_file_inode(CInode *in)
361{
362 C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in);
363 // At this stage the DN is already past scrub_initialize, so
364 // it's in the cache, it has PIN_SCRUBQUEUE and it is authpinned
365 in->validate_disk_state(&fin->result, fin);
366}
367
368void ScrubStack::_validate_inode_done(CInode *in, int r,
369 const CInode::validated_data &result)
370{
371 LogChannelRef clog = mdcache->mds->clog;
372 const ScrubHeaderRefConst header = in->scrub_info()->header;
373
374 std::string path;
375 if (!result.passed_validation) {
376 // Build path string for use in messages
377 in->make_path_string(path, true);
378 }
379
b32b8144
FG
380 if (result.backtrace.checked && !result.backtrace.passed
381 && !result.backtrace.repaired)
382 {
7c673cae
FG
383 // Record backtrace fails as remote linkage damage, as
384 // we may not be able to resolve hard links to this inode
385 mdcache->mds->damage_table.notify_remote_damaged(in->inode.ino, path);
386 } else if (result.inode.checked && !result.inode.passed) {
387 // Record damaged inode structures as damaged dentries as
388 // that is where they are stored
389 auto parent = in->get_projected_parent_dn();
390 if (parent) {
391 auto dir = parent->get_dir();
392 mdcache->mds->damage_table.notify_dentry(
393 dir->inode->ino(), dir->frag, parent->last, parent->name, path);
394 }
395 }
396
397 // Inform the cluster log if we found an error
398 if (!result.passed_validation) {
b32b8144
FG
399 if (result.all_damage_repaired()) {
400 clog->info() << "Scrub repaired inode " << in->ino()
401 << " (" << path << ")";
402 } else {
403 clog->warn() << "Scrub error on inode " << in->ino()
404 << " (" << path << ") see " << g_conf->name
405 << " log and `damage ls` output for details";
406 }
7c673cae
FG
407
408 // Put the verbose JSON output into the MDS log for later inspection
409 JSONFormatter f;
410 result.dump(&f);
411 std::ostringstream out;
412 f.flush(out);
413 derr << __func__ << " scrub error on inode " << *in << ": " << out.str()
414 << dendl;
415 } else {
416 dout(10) << __func__ << " scrub passed on inode " << *in << dendl;
417 }
418
419 MDSInternalContextBase *c = NULL;
420 in->scrub_finished(&c);
421
422 if (!header->get_recursive() && in == header->get_origin()) {
423 if (r >= 0) { // we got into the scrubbing dump it
424 result.dump(&(header->get_formatter()));
425 } else { // we failed the lookup or something; dump ourselves
426 header->get_formatter().open_object_section("results");
427 header->get_formatter().dump_int("return_code", r);
428 header->get_formatter().close_section(); // results
429 }
430 }
431 if (c) {
432 finisher->queue(new MDSIOContextWrapper(mdcache->mds, c), 0);
433 }
434}
435
436ScrubStack::C_KickOffScrubs::C_KickOffScrubs(MDCache *mdcache, ScrubStack *s)
437 : MDSInternalContext(mdcache->mds), stack(s) { }