1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
19 #include "osdc/Filer.h"
21 #include "RecoveryQueue.h"
23 #define dout_context g_ceph_context
24 #define dout_subsys ceph_subsys_mds
26 #define dout_prefix *_dout << "mds." << mds->get_nodeid() << " RecoveryQueue::" << __func__ << " "
28 class C_MDC_Recover
: public MDSIOContextBase
{
32 void finish(int r
) override
{
33 rq
->_recovered(in
, r
, size
, mtime
);
36 MDSRank
*get_mds() override
{
44 C_MDC_Recover(RecoveryQueue
*rq_
, CInode
*i
) : rq(rq_
), in(i
), size(0) {
50 RecoveryQueue::RecoveryQueue(MDSRank
*mds_
)
51 : mds(mds_
), logger(NULL
), filer(mds_
->objecter
, mds_
->finisher
)
56 * Progress the queue. Call this after enqueuing something or on
57 * completion of something.
59 void RecoveryQueue::advance()
61 dout(10) << file_recover_queue
.size() << " queued, "
62 << file_recover_queue_front
.size() << " prioritized, "
63 << file_recovering
.size() << " recovering" << dendl
;
65 while (file_recovering
.size() < g_conf
->mds_max_file_recover
) {
66 if (!file_recover_queue_front
.empty()) {
67 CInode
*in
= *file_recover_queue_front
.begin();
68 file_recover_queue_front
.erase(file_recover_queue_front
.begin());
69 file_recover_queue
.erase(in
);
71 } else if (!file_recover_queue
.empty()) {
72 CInode
*in
= *file_recover_queue
.begin();
73 file_recover_queue
.erase(file_recover_queue
.begin());
80 logger
->set(l_mdc_num_recovering_processing
, file_recovering
.size());
81 logger
->set(l_mdc_num_recovering_enqueued
, file_recover_queue
.size());
82 logger
->set(l_mdc_num_recovering_prioritized
, file_recover_queue_front
.size());
85 void RecoveryQueue::_start(CInode
*in
)
87 inode_t
*pi
= in
->get_projected_inode();
90 if (pi
->client_ranges
.size() && !pi
->get_max_size()) {
91 mds
->clog
->warn() << "bad client_range " << pi
->client_ranges
92 << " on ino " << pi
->ino
;
95 if (pi
->client_ranges
.size() && pi
->get_max_size()) {
96 dout(10) << "starting " << in
->inode
.size
<< " " << pi
->client_ranges
97 << " " << *in
<< dendl
;
98 file_recovering
.insert(in
);
100 C_MDC_Recover
*fin
= new C_MDC_Recover(this, in
);
101 filer
.probe(in
->inode
.ino
, &in
->inode
.layout
, in
->last
,
102 pi
->get_max_size(), &fin
->size
, &fin
->mtime
, false,
105 dout(10) << "skipping " << in
->inode
.size
<< " " << *in
<< dendl
;
106 in
->state_clear(CInode::STATE_RECOVERING
);
107 mds
->locker
->eval(in
, CEPH_LOCK_IFILE
);
108 in
->auth_unpin(this);
112 void RecoveryQueue::prioritize(CInode
*in
)
114 if (file_recovering
.count(in
)) {
115 dout(10) << "already working on " << *in
<< dendl
;
119 if (file_recover_queue
.count(in
)) {
120 dout(20) << *in
<< dendl
;
121 file_recover_queue_front
.insert(in
);
122 logger
->set(l_mdc_num_recovering_prioritized
, file_recover_queue_front
.size());
126 dout(10) << "not queued " << *in
<< dendl
;
131 * Given an authoritative inode which is in the cache,
132 * enqueue it for recovery.
134 void RecoveryQueue::enqueue(CInode
*in
)
136 dout(15) << "RecoveryQueue::enqueue " << *in
<< dendl
;
137 assert(logger
); // Caller should have done set_logger before using me
138 assert(in
->is_auth());
140 in
->state_clear(CInode::STATE_NEEDSRECOVER
);
141 if (!in
->state_test(CInode::STATE_RECOVERING
)) {
142 in
->state_set(CInode::STATE_RECOVERING
);
144 logger
->inc(l_mdc_recovery_started
);
146 file_recover_queue
.insert(in
);
147 logger
->set(l_mdc_num_recovering_enqueued
, file_recover_queue
.size());
152 * Call back on completion of Filer probe on an inode.
154 void RecoveryQueue::_recovered(CInode
*in
, int r
, uint64_t size
, utime_t mtime
)
156 dout(10) << "_recovered r=" << r
<< " size=" << size
<< " mtime=" << mtime
157 << " for " << *in
<< dendl
;
160 dout(0) << "recovery error! " << r
<< dendl
;
161 if (r
== -EBLACKLISTED
) {
165 // Something wrong on the OSD side trying to recover the size
166 // of this inode. In principle we could record this as a piece
167 // of per-inode damage, but it's actually more likely that
168 // this indicates something wrong with the MDS (like maybe
169 // it has the wrong auth caps?)
170 mds
->clog
->error() << " OSD read error while recovering size for inode 0x"
171 << std::hex
<< in
->ino() << std::dec
;
176 file_recovering
.erase(in
);
177 logger
->set(l_mdc_num_recovering_processing
, file_recovering
.size());
178 logger
->inc(l_mdc_recovery_completed
);
179 in
->state_clear(CInode::STATE_RECOVERING
);
181 if (!in
->get_parent_dn() && !in
->get_projected_parent_dn()) {
182 dout(10) << " inode has no parents, killing it off" << dendl
;
183 in
->auth_unpin(this);
184 mds
->mdcache
->remove_inode(in
);
187 mds
->locker
->check_inode_max_size(in
, true, 0, size
, mtime
);
188 mds
->locker
->eval(in
, CEPH_LOCK_IFILE
);
189 in
->auth_unpin(this);