1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
19 #include "osdc/Filer.h"
21 #include "RecoveryQueue.h"
23 #define dout_context g_ceph_context
24 #define dout_subsys ceph_subsys_mds
26 #define dout_prefix *_dout << "mds." << mds->get_nodeid() << " RecoveryQueue::" << __func__ << " "
30 class C_MDC_Recover
: public MDSIOContextBase
{
32 C_MDC_Recover(RecoveryQueue
*rq_
, CInode
*i
) :
33 MDSIOContextBase(false), rq(rq_
), in(i
) {
34 ceph_assert(rq
!= NULL
);
36 void print(ostream
& out
) const override
{
37 out
<< "file_recover(" << in
->ino() << ")";
43 void finish(int r
) override
{
44 rq
->_recovered(in
, r
, size
, mtime
);
47 MDSRank
*get_mds() override
{
55 RecoveryQueue::RecoveryQueue(MDSRank
*mds_
) :
56 file_recover_queue(member_offset(CInode
, item_dirty_dirfrag_dir
)),
57 file_recover_queue_front(member_offset(CInode
, item_dirty_dirfrag_nest
)),
58 mds(mds_
), filer(mds_
->objecter
, mds_
->finisher
)
62 * Progress the queue. Call this after enqueuing something or on
63 * completion of something.
65 void RecoveryQueue::advance()
67 dout(10) << file_recover_queue_size
<< " queued, "
68 << file_recover_queue_front_size
<< " prioritized, "
69 << file_recovering
.size() << " recovering" << dendl
;
71 while (file_recovering
.size() < g_conf()->mds_max_file_recover
) {
72 if (!file_recover_queue_front
.empty()) {
73 CInode
*in
= file_recover_queue_front
.front();
74 in
->item_recover_queue_front
.remove_myself();
75 file_recover_queue_front_size
--;
77 } else if (!file_recover_queue
.empty()) {
78 CInode
*in
= file_recover_queue
.front();
79 in
->item_recover_queue
.remove_myself();
80 file_recover_queue_size
--;
87 logger
->set(l_mdc_num_recovering_processing
, file_recovering
.size());
88 logger
->set(l_mdc_num_recovering_enqueued
, file_recover_queue_size
+ file_recover_queue_front_size
);
89 logger
->set(l_mdc_num_recovering_prioritized
, file_recover_queue_front_size
);
92 void RecoveryQueue::_start(CInode
*in
)
94 const auto& pi
= in
->get_projected_inode();
97 if (pi
->client_ranges
.size() && !pi
->get_max_size()) {
98 mds
->clog
->warn() << "bad client_range " << pi
->client_ranges
99 << " on ino " << pi
->ino
;
102 auto p
= file_recovering
.find(in
);
103 if (pi
->client_ranges
.size() && pi
->get_max_size()) {
104 dout(10) << "starting " << pi
->size
<< " " << pi
->client_ranges
105 << " " << *in
<< dendl
;
106 if (p
== file_recovering
.end()) {
107 file_recovering
.insert(make_pair(in
, false));
109 C_MDC_Recover
*fin
= new C_MDC_Recover(this, in
);
110 auto layout
= pi
->layout
;
111 filer
.probe(in
->ino(), &layout
, in
->last
,
112 pi
->get_max_size(), &fin
->size
, &fin
->mtime
, false,
116 dout(10) << "already working on " << *in
<< ", set need_restart flag" << dendl
;
119 dout(10) << "skipping " << pi
->size
<< " " << *in
<< dendl
;
120 if (p
== file_recovering
.end()) {
121 in
->state_clear(CInode::STATE_RECOVERING
);
122 mds
->locker
->eval(in
, CEPH_LOCK_IFILE
);
123 in
->auth_unpin(this);
128 void RecoveryQueue::prioritize(CInode
*in
)
130 if (file_recovering
.count(in
)) {
131 dout(10) << "already working on " << *in
<< dendl
;
135 if (!in
->item_recover_queue_front
.is_on_list()) {
136 dout(20) << *in
<< dendl
;
138 ceph_assert(in
->item_recover_queue
.is_on_list());
139 in
->item_recover_queue
.remove_myself();
140 file_recover_queue_size
--;
142 file_recover_queue_front
.push_back(&in
->item_recover_queue_front
);
144 file_recover_queue_front_size
++;
145 logger
->set(l_mdc_num_recovering_prioritized
, file_recover_queue_front_size
);
149 dout(10) << "not queued " << *in
<< dendl
;
152 static bool _is_in_any_recover_queue(CInode
*in
)
154 return in
->item_recover_queue
.is_on_list() ||
155 in
->item_recover_queue_front
.is_on_list();
159 * Given an authoritative inode which is in the cache,
160 * enqueue it for recovery.
162 void RecoveryQueue::enqueue(CInode
*in
)
164 dout(15) << "RecoveryQueue::enqueue " << *in
<< dendl
;
165 ceph_assert(logger
); // Caller should have done set_logger before using me
166 ceph_assert(in
->is_auth());
168 in
->state_clear(CInode::STATE_NEEDSRECOVER
);
169 if (!in
->state_test(CInode::STATE_RECOVERING
)) {
170 in
->state_set(CInode::STATE_RECOVERING
);
172 logger
->inc(l_mdc_recovery_started
);
175 if (!_is_in_any_recover_queue(in
)) {
176 file_recover_queue
.push_back(&in
->item_recover_queue
);
177 file_recover_queue_size
++;
178 logger
->set(l_mdc_num_recovering_enqueued
, file_recover_queue_size
+ file_recover_queue_front_size
);
184 * Call back on completion of Filer probe on an inode.
186 void RecoveryQueue::_recovered(CInode
*in
, int r
, uint64_t size
, utime_t mtime
)
188 dout(10) << "_recovered r=" << r
<< " size=" << size
<< " mtime=" << mtime
189 << " for " << *in
<< dendl
;
192 dout(0) << "recovery error! " << r
<< dendl
;
193 if (r
== -CEPHFS_EBLOCKLISTED
) {
197 // Something wrong on the OSD side trying to recover the size
198 // of this inode. In principle we could record this as a piece
199 // of per-inode damage, but it's actually more likely that
200 // this indicates something wrong with the MDS (like maybe
201 // it has the wrong auth caps?)
202 mds
->clog
->error() << " OSD read error while recovering size"
203 " for inode " << in
->ino();
208 auto p
= file_recovering
.find(in
);
209 ceph_assert(p
!= file_recovering
.end());
210 bool restart
= p
->second
;
211 file_recovering
.erase(p
);
213 logger
->set(l_mdc_num_recovering_processing
, file_recovering
.size());
214 logger
->inc(l_mdc_recovery_completed
);
215 in
->state_clear(CInode::STATE_RECOVERING
);
218 if (in
->item_recover_queue
.is_on_list()) {
219 in
->item_recover_queue
.remove_myself();
220 file_recover_queue_size
--;
222 if (in
->item_recover_queue_front
.is_on_list()) {
223 in
->item_recover_queue_front
.remove_myself();
224 file_recover_queue_front_size
--;
226 logger
->set(l_mdc_num_recovering_enqueued
, file_recover_queue_size
+ file_recover_queue_front_size
);
227 logger
->set(l_mdc_num_recovering_prioritized
, file_recover_queue_front_size
);
229 } else if (!_is_in_any_recover_queue(in
)) {
231 mds
->locker
->check_inode_max_size(in
, true, 0, size
, mtime
);
232 mds
->locker
->eval(in
, CEPH_LOCK_IFILE
);
233 in
->auth_unpin(this);