]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2012 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #include "common/dout.h" | |
2a845540 | 17 | #include "common/likely.h" |
7c673cae | 18 | #include "common/HeartbeatMap.h" |
181888fb | 19 | |
7c673cae FG |
20 | #include "include/stringify.h" |
21 | #include "include/util.h" | |
22 | ||
7c673cae FG |
23 | #include "mon/MonClient.h" |
24 | #include "mds/MDLog.h" | |
25 | #include "mds/MDSRank.h" | |
26 | #include "mds/MDSMap.h" | |
27 | #include "mds/Locker.h" | |
28 | ||
29 | #include "Beacon.h" | |
30 | ||
91327a77 AA |
31 | #include <chrono> |
32 | ||
7c673cae FG |
33 | #define dout_context g_ceph_context |
34 | #define dout_subsys ceph_subsys_mds | |
35 | #undef dout_prefix | |
36 | #define dout_prefix *_dout << "mds.beacon." << name << ' ' | |
37 | ||
20effc67 TL |
38 | using std::map; |
39 | using std::string; | |
40 | ||
11fdf7f2 TL |
41 | using namespace std::chrono_literals; |
42 | ||
43 | Beacon::Beacon(CephContext *cct, MonClient *monc, std::string_view name) | |
91327a77 AA |
44 | : |
45 | Dispatcher(cct), | |
11fdf7f2 | 46 | beacon_interval(g_conf()->mds_beacon_interval), |
91327a77 | 47 | monc(monc), |
522d829b TL |
48 | name(name), |
49 | compat(MDSMap::get_compat_set_all()) | |
7c673cae | 50 | { |
7c673cae FG |
51 | } |
52 | ||
7c673cae FG |
53 | Beacon::~Beacon() |
54 | { | |
91327a77 | 55 | shutdown(); |
7c673cae FG |
56 | } |
57 | ||
91327a77 AA |
58 | void Beacon::shutdown() |
59 | { | |
60 | std::unique_lock<std::mutex> lock(mutex); | |
61 | if (!finished) { | |
62 | finished = true; | |
63 | lock.unlock(); | |
81eedcae TL |
64 | if (sender.joinable()) |
65 | sender.join(); | |
91327a77 AA |
66 | } |
67 | } | |
7c673cae | 68 | |
11fdf7f2 | 69 | void Beacon::init(const MDSMap &mdsmap) |
7c673cae | 70 | { |
11fdf7f2 | 71 | std::unique_lock lock(mutex); |
7c673cae FG |
72 | |
73 | _notify_mdsmap(mdsmap); | |
7c673cae | 74 | |
91327a77 AA |
75 | sender = std::thread([this]() { |
76 | std::unique_lock<std::mutex> lock(mutex); | |
77 | std::condition_variable c; // no one wakes us | |
78 | while (!finished) { | |
79 | auto now = clock::now(); | |
80 | auto since = std::chrono::duration<double>(now-last_send).count(); | |
81 | auto interval = beacon_interval; | |
82 | if (since >= interval*.90) { | |
a8e16298 TL |
83 | if (!_send()) { |
84 | interval = 0.5; /* 500ms */ | |
85 | } | |
91327a77 AA |
86 | } else { |
87 | interval -= since; | |
88 | } | |
89 | dout(20) << "sender thread waiting interval " << interval << "s" << dendl; | |
11fdf7f2 | 90 | c.wait_for(lock, interval*1s); |
91327a77 AA |
91 | } |
92 | }); | |
7c673cae FG |
93 | } |
94 | ||
9f95a23c | 95 | bool Beacon::ms_can_fast_dispatch2(const cref_t<Message>& m) const |
7c673cae | 96 | { |
91327a77 | 97 | return m->get_type() == MSG_MDS_BEACON; |
7c673cae FG |
98 | } |
99 | ||
9f95a23c | 100 | void Beacon::ms_fast_dispatch2(const ref_t<Message>& m) |
91327a77 | 101 | { |
11fdf7f2 TL |
102 | bool handled = ms_dispatch2(m); |
103 | ceph_assert(handled); | |
91327a77 | 104 | } |
7c673cae | 105 | |
9f95a23c | 106 | bool Beacon::ms_dispatch2(const ref_t<Message>& m) |
7c673cae FG |
107 | { |
108 | if (m->get_type() == MSG_MDS_BEACON) { | |
109 | if (m->get_connection()->get_peer_type() == CEPH_ENTITY_TYPE_MON) { | |
9f95a23c | 110 | handle_mds_beacon(ref_cast<MMDSBeacon>(m)); |
7c673cae FG |
111 | } |
112 | return true; | |
113 | } | |
114 | ||
115 | return false; | |
116 | } | |
117 | ||
118 | ||
119 | /** | |
120 | * Update lagginess state based on response from remote MDSMonitor | |
121 | * | |
122 | * This function puts the passed message before returning | |
123 | */ | |
9f95a23c | 124 | void Beacon::handle_mds_beacon(const cref_t<MMDSBeacon> &m) |
7c673cae | 125 | { |
11fdf7f2 | 126 | std::unique_lock lock(mutex); |
7c673cae FG |
127 | |
128 | version_t seq = m->get_seq(); | |
129 | ||
130 | // update lab | |
91327a77 AA |
131 | auto it = seq_stamp.find(seq); |
132 | if (it != seq_stamp.end()) { | |
133 | auto now = clock::now(); | |
134 | ||
135 | last_acked_stamp = it->second; | |
136 | auto rtt = std::chrono::duration<double>(now - last_acked_stamp).count(); | |
137 | ||
138 | dout(5) << "received beacon reply " << ceph_mds_state_name(m->get_state()) << " seq " << m->get_seq() << " rtt " << rtt << dendl; | |
139 | ||
11fdf7f2 | 140 | if (laggy && rtt < g_conf()->mds_beacon_grace) { |
91327a77 AA |
141 | dout(0) << " MDS is no longer laggy" << dendl; |
142 | laggy = false; | |
143 | last_laggy = now; | |
7c673cae FG |
144 | } |
145 | ||
146 | // clean up seq_stamp map | |
91327a77 | 147 | seq_stamp.erase(seq_stamp.begin(), ++it); |
7c673cae FG |
148 | |
149 | // Wake a waiter up if present | |
91327a77 | 150 | cvar.notify_all(); |
7c673cae | 151 | } else { |
91327a77 AA |
152 | dout(1) << "discarding unexpected beacon reply " << ceph_mds_state_name(m->get_state()) |
153 | << " seq " << m->get_seq() << " dne" << dendl; | |
7c673cae FG |
154 | } |
155 | } | |
156 | ||
157 | ||
158 | void Beacon::send() | |
159 | { | |
11fdf7f2 | 160 | std::unique_lock lock(mutex); |
7c673cae FG |
161 | _send(); |
162 | } | |
163 | ||
164 | ||
165 | void Beacon::send_and_wait(const double duration) | |
166 | { | |
11fdf7f2 | 167 | std::unique_lock lock(mutex); |
7c673cae | 168 | _send(); |
91327a77 | 169 | auto awaiting_seq = last_seq; |
7c673cae FG |
170 | dout(20) << __func__ << ": awaiting " << awaiting_seq |
171 | << " for up to " << duration << "s" << dendl; | |
172 | ||
91327a77 AA |
173 | auto start = clock::now(); |
174 | while (!seq_stamp.empty() && seq_stamp.begin()->first <= awaiting_seq) { | |
175 | auto now = clock::now(); | |
176 | auto s = duration*.95-std::chrono::duration<double>(now-start).count(); | |
177 | if (s < 0) break; | |
11fdf7f2 | 178 | cvar.wait_for(lock, s*1s); |
7c673cae | 179 | } |
7c673cae FG |
180 | } |
181 | ||
182 | ||
183 | /** | |
184 | * Call periodically, or when you have updated the desired state | |
185 | */ | |
a8e16298 | 186 | bool Beacon::_send() |
7c673cae | 187 | { |
91327a77 AA |
188 | auto now = clock::now(); |
189 | auto since = std::chrono::duration<double>(now-last_acked_stamp).count(); | |
7c673cae FG |
190 | |
191 | if (!cct->get_heartbeat_map()->is_healthy()) { | |
192 | /* If anything isn't progressing, let avoid sending a beacon so that | |
193 | * the MDS will consider us laggy */ | |
91327a77 | 194 | dout(0) << "Skipping beacon heartbeat to monitors (last acked " << since << "s ago); MDS internal heartbeat is not healthy!" << dendl; |
a8e16298 | 195 | return false; |
7c673cae FG |
196 | } |
197 | ||
198 | ++last_seq; | |
91327a77 | 199 | dout(5) << "Sending beacon " << ceph_mds_state_name(want_state) << " seq " << last_seq << dendl; |
7c673cae | 200 | |
91327a77 | 201 | seq_stamp[last_seq] = now; |
7c673cae | 202 | |
11fdf7f2 | 203 | ceph_assert(want_state != MDSMap::STATE_NULL); |
7c673cae | 204 | |
9f95a23c | 205 | auto beacon = make_message<MMDSBeacon>( |
7c673cae FG |
206 | monc->get_fsid(), mds_gid_t(monc->get_global_id()), |
207 | name, | |
208 | epoch, | |
209 | want_state, | |
210 | last_seq, | |
211 | CEPH_FEATURES_SUPPORTED_DEFAULT); | |
7c673cae FG |
212 | beacon->set_health(health); |
213 | beacon->set_compat(compat); | |
9f95a23c | 214 | beacon->set_fs(g_conf().get_val<std::string>("mds_join_fs")); |
7c673cae FG |
215 | // piggyback the sys info on beacon msg |
216 | if (want_state == MDSMap::STATE_BOOT) { | |
217 | map<string, string> sys_info; | |
218 | collect_sys_info(&sys_info, cct); | |
11fdf7f2 | 219 | sys_info["addr"] = stringify(monc->get_myaddrs()); |
7c673cae FG |
220 | beacon->set_sys_info(sys_info); |
221 | } | |
11fdf7f2 | 222 | monc->send_mon_message(beacon.detach()); |
91327a77 | 223 | last_send = now; |
a8e16298 | 224 | return true; |
7c673cae FG |
225 | } |
226 | ||
227 | /** | |
228 | * Call this when there is a new MDSMap available | |
229 | */ | |
11fdf7f2 | 230 | void Beacon::notify_mdsmap(const MDSMap &mdsmap) |
7c673cae | 231 | { |
11fdf7f2 | 232 | std::unique_lock lock(mutex); |
7c673cae FG |
233 | |
234 | _notify_mdsmap(mdsmap); | |
235 | } | |
236 | ||
11fdf7f2 | 237 | void Beacon::_notify_mdsmap(const MDSMap &mdsmap) |
7c673cae | 238 | { |
11fdf7f2 | 239 | ceph_assert(mdsmap.get_epoch() >= epoch); |
7c673cae | 240 | |
522d829b | 241 | if (mdsmap.get_epoch() >= epoch) { |
11fdf7f2 | 242 | epoch = mdsmap.get_epoch(); |
7c673cae FG |
243 | } |
244 | } | |
245 | ||
246 | ||
247 | bool Beacon::is_laggy() | |
248 | { | |
11fdf7f2 | 249 | std::unique_lock lock(mutex); |
7c673cae | 250 | |
91327a77 AA |
251 | auto now = clock::now(); |
252 | auto since = std::chrono::duration<double>(now-last_acked_stamp).count(); | |
11fdf7f2 | 253 | if (since > g_conf()->mds_beacon_grace) { |
91327a77 | 254 | if (!laggy) { |
11fdf7f2 TL |
255 | dout(1) << "MDS connection to Monitors appears to be laggy; " << since |
256 | << "s since last acked beacon" << dendl; | |
91327a77 AA |
257 | } |
258 | laggy = true; | |
7c673cae FG |
259 | return true; |
260 | } | |
261 | return false; | |
262 | } | |
263 | ||
9f95a23c | 264 | void Beacon::set_want_state(const MDSMap &mdsmap, MDSMap::DaemonState newstate) |
7c673cae | 265 | { |
11fdf7f2 | 266 | std::unique_lock lock(mutex); |
7c673cae FG |
267 | |
268 | // Update mdsmap epoch atomically with updating want_state, so that when | |
269 | // we send a beacon with the new want state it has the latest epoch, and | |
270 | // once we have updated to the latest epoch, we are not sending out | |
271 | // a stale want_state (i.e. one from before making it through MDSMap | |
272 | // handling) | |
273 | _notify_mdsmap(mdsmap); | |
274 | ||
275 | if (want_state != newstate) { | |
91327a77 | 276 | dout(5) << __func__ << ": " |
7c673cae FG |
277 | << ceph_mds_state_name(want_state) << " -> " |
278 | << ceph_mds_state_name(newstate) << dendl; | |
279 | want_state = newstate; | |
280 | } | |
281 | } | |
282 | ||
283 | ||
284 | /** | |
285 | * We are 'shown' an MDS briefly in order to update | |
286 | * some health metrics that we will send in the next | |
287 | * beacon. | |
288 | */ | |
289 | void Beacon::notify_health(MDSRank const *mds) | |
290 | { | |
11fdf7f2 | 291 | std::unique_lock lock(mutex); |
7c673cae FG |
292 | if (!mds) { |
293 | // No MDS rank held | |
294 | return; | |
295 | } | |
296 | ||
297 | // I'm going to touch this MDS, so it must be locked | |
9f95a23c | 298 | ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); |
7c673cae FG |
299 | |
300 | health.metrics.clear(); | |
301 | ||
2a845540 TL |
302 | if (unlikely(g_conf().get_val<bool>("mds_inject_health_dummy"))) { |
303 | MDSHealthMetric m(MDS_HEALTH_DUMMY, HEALTH_ERR, std::string("dummy")); | |
304 | health.metrics.push_back(m); | |
305 | } | |
306 | ||
7c673cae FG |
307 | // Detect presence of entries in DamageTable |
308 | if (!mds->damage_table.empty()) { | |
309 | MDSHealthMetric m(MDS_HEALTH_DAMAGE, HEALTH_ERR, std::string( | |
310 | "Metadata damage detected")); | |
311 | health.metrics.push_back(m); | |
312 | } | |
313 | ||
314 | // Detect MDS_HEALTH_TRIM condition | |
f91f0fd5 | 315 | // Indicates MDS is not trimming promptly |
7c673cae | 316 | { |
f91f0fd5 | 317 | if (mds->mdlog->get_num_segments() > (size_t)(g_conf()->mds_log_max_segments * g_conf().get_val<double>("mds_log_warn_factor"))) { |
f67539c2 TL |
318 | CachedStackStringStream css; |
319 | *css << "Behind on trimming (" << mds->mdlog->get_num_segments() | |
11fdf7f2 | 320 | << "/" << g_conf()->mds_log_max_segments << ")"; |
7c673cae | 321 | |
f67539c2 | 322 | MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv()); |
7c673cae | 323 | m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments()); |
11fdf7f2 | 324 | m.metadata["max_segments"] = stringify(g_conf()->mds_log_max_segments); |
7c673cae FG |
325 | health.metrics.push_back(m); |
326 | } | |
327 | } | |
328 | ||
329 | // Detect clients failing to respond to modifications to capabilities in | |
330 | // CLIENT_CAPS messages. | |
331 | { | |
9f95a23c TL |
332 | auto&& late_clients = mds->locker->get_late_revoking_clients(mds->mdsmap->get_session_timeout()); |
333 | std::vector<MDSHealthMetric> late_cap_metrics; | |
7c673cae | 334 | |
9f95a23c | 335 | for (const auto& client : late_clients) { |
7c673cae FG |
336 | // client_t is equivalent to session.info.inst.name.num |
337 | // Construct an entity_name_t to lookup into SessionMap | |
9f95a23c | 338 | entity_name_t ename(CEPH_ENTITY_TYPE_CLIENT, client.v); |
7c673cae FG |
339 | Session const *s = mds->sessionmap.get_session(ename); |
340 | if (s == NULL) { | |
341 | // Shouldn't happen, but not worth crashing if it does as this is | |
342 | // just health-reporting code. | |
9f95a23c | 343 | derr << "Client ID without session: " << client.v << dendl; |
7c673cae FG |
344 | continue; |
345 | } | |
346 | ||
f67539c2 TL |
347 | CachedStackStringStream css; |
348 | *css << "Client " << s->get_human_name() << " failing to respond to capability release"; | |
349 | MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, css->strv()); | |
9f95a23c TL |
350 | m.metadata["client_id"] = stringify(client.v); |
351 | late_cap_metrics.emplace_back(std::move(m)); | |
7c673cae FG |
352 | } |
353 | ||
11fdf7f2 | 354 | if (late_cap_metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) { |
9f95a23c TL |
355 | auto&& m = late_cap_metrics; |
356 | health.metrics.insert(std::end(health.metrics), std::cbegin(m), std::cend(m)); | |
7c673cae | 357 | } else { |
f67539c2 TL |
358 | CachedStackStringStream css; |
359 | *css << "Many clients (" << late_cap_metrics.size() | |
7c673cae | 360 | << ") failing to respond to capability release"; |
f67539c2 | 361 | MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE_MANY, HEALTH_WARN, css->strv()); |
7c673cae | 362 | m.metadata["client_count"] = stringify(late_cap_metrics.size()); |
9f95a23c | 363 | health.metrics.push_back(std::move(m)); |
7c673cae FG |
364 | } |
365 | } | |
366 | ||
367 | // Detect clients failing to generate cap releases from CEPH_SESSION_RECALL_STATE | |
368 | // messages. May be due to buggy client or resource-hogging application. | |
369 | // | |
370 | // Detect clients failing to advance their old_client_tid | |
371 | { | |
20effc67 | 372 | std::set<Session*> sessions; |
7c673cae FG |
373 | mds->sessionmap.get_client_session_set(sessions); |
374 | ||
f91f0fd5 | 375 | const auto min_caps_working_set = g_conf().get_val<uint64_t>("mds_min_caps_working_set"); |
11fdf7f2 TL |
376 | const auto recall_warning_threshold = g_conf().get_val<Option::size_t>("mds_recall_warning_threshold"); |
377 | const auto max_completed_requests = g_conf()->mds_max_completed_requests; | |
378 | const auto max_completed_flushes = g_conf()->mds_max_completed_flushes; | |
9f95a23c TL |
379 | std::vector<MDSHealthMetric> late_recall_metrics; |
380 | std::vector<MDSHealthMetric> large_completed_requests_metrics; | |
91327a77 | 381 | for (auto& session : sessions) { |
f91f0fd5 | 382 | const uint64_t num_caps = session->get_num_caps(); |
11fdf7f2 | 383 | const uint64_t recall_caps = session->get_recall_caps(); |
f91f0fd5 | 384 | if (recall_caps > recall_warning_threshold && num_caps > min_caps_working_set) { |
a8e16298 TL |
385 | dout(2) << "Session " << *session << |
386 | " is not releasing caps fast enough. Recalled caps at " << recall_caps | |
387 | << " > " << recall_warning_threshold << " (mds_recall_warning_threshold)." << dendl; | |
f67539c2 TL |
388 | CachedStackStringStream css; |
389 | *css << "Client " << session->get_human_name() << " failing to respond to cache pressure"; | |
390 | MDSHealthMetric m(MDS_HEALTH_CLIENT_RECALL, HEALTH_WARN, css->strv()); | |
a8e16298 | 391 | m.metadata["client_id"] = stringify(session->get_client()); |
9f95a23c | 392 | late_recall_metrics.emplace_back(std::move(m)); |
7c673cae FG |
393 | } |
394 | if ((session->get_num_trim_requests_warnings() > 0 && | |
a8e16298 | 395 | session->get_num_completed_requests() >= max_completed_requests) || |
7c673cae | 396 | (session->get_num_trim_flushes_warnings() > 0 && |
a8e16298 | 397 | session->get_num_completed_flushes() >= max_completed_flushes)) { |
f67539c2 TL |
398 | CachedStackStringStream css; |
399 | *css << "Client " << session->get_human_name() << " failing to advance its oldest client/flush tid. "; | |
400 | MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID, HEALTH_WARN, css->strv()); | |
11fdf7f2 | 401 | m.metadata["client_id"] = stringify(session->get_client()); |
9f95a23c | 402 | large_completed_requests_metrics.emplace_back(std::move(m)); |
7c673cae FG |
403 | } |
404 | } | |
405 | ||
11fdf7f2 | 406 | if (late_recall_metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) { |
9f95a23c TL |
407 | auto&& m = late_recall_metrics; |
408 | health.metrics.insert(std::end(health.metrics), std::cbegin(m), std::cend(m)); | |
7c673cae | 409 | } else { |
f67539c2 TL |
410 | CachedStackStringStream css; |
411 | *css << "Many clients (" << late_recall_metrics.size() | |
7c673cae | 412 | << ") failing to respond to cache pressure"; |
f67539c2 | 413 | MDSHealthMetric m(MDS_HEALTH_CLIENT_RECALL_MANY, HEALTH_WARN, css->strv()); |
7c673cae FG |
414 | m.metadata["client_count"] = stringify(late_recall_metrics.size()); |
415 | health.metrics.push_back(m); | |
416 | late_recall_metrics.clear(); | |
417 | } | |
418 | ||
11fdf7f2 | 419 | if (large_completed_requests_metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) { |
9f95a23c TL |
420 | auto&& m = large_completed_requests_metrics; |
421 | health.metrics.insert(std::end(health.metrics), std::cbegin(m), std::cend(m)); | |
7c673cae | 422 | } else { |
f67539c2 TL |
423 | CachedStackStringStream css; |
424 | *css << "Many clients (" << large_completed_requests_metrics.size() | |
7c673cae | 425 | << ") failing to advance their oldest client/flush tid"; |
f67539c2 | 426 | MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID_MANY, HEALTH_WARN, css->strv()); |
7c673cae FG |
427 | m.metadata["client_count"] = stringify(large_completed_requests_metrics.size()); |
428 | health.metrics.push_back(m); | |
429 | large_completed_requests_metrics.clear(); | |
430 | } | |
431 | } | |
432 | ||
433 | // Detect MDS_HEALTH_SLOW_REQUEST condition | |
434 | { | |
435 | int slow = mds->get_mds_slow_req_count(); | |
7c673cae | 436 | if (slow) { |
91327a77 | 437 | dout(20) << slow << " slow request found" << dendl; |
f67539c2 TL |
438 | CachedStackStringStream css; |
439 | *css << slow << " slow requests are blocked > " << g_conf()->mds_op_complaint_time << " secs"; | |
7c673cae | 440 | |
f67539c2 | 441 | MDSHealthMetric m(MDS_HEALTH_SLOW_REQUEST, HEALTH_WARN, css->strv()); |
7c673cae FG |
442 | health.metrics.push_back(m); |
443 | } | |
444 | } | |
445 | ||
91327a77 | 446 | { |
11fdf7f2 | 447 | auto complaint_time = g_conf()->osd_op_complaint_time; |
91327a77 AA |
448 | auto now = clock::now(); |
449 | auto cutoff = now - ceph::make_timespan(complaint_time); | |
450 | ||
451 | std::string count; | |
452 | ceph::coarse_mono_time oldest; | |
453 | if (MDSIOContextBase::check_ios_in_flight(cutoff, count, oldest)) { | |
454 | dout(20) << count << " slow metadata IOs found" << dendl; | |
455 | ||
456 | auto oldest_secs = std::chrono::duration<double>(now - oldest).count(); | |
f67539c2 TL |
457 | CachedStackStringStream css; |
458 | *css << count << " slow metadata IOs are blocked > " << complaint_time | |
91327a77 AA |
459 | << " secs, oldest blocked for " << (int64_t)oldest_secs << " secs"; |
460 | ||
f67539c2 | 461 | MDSHealthMetric m(MDS_HEALTH_SLOW_METADATA_IO, HEALTH_WARN, css->strv()); |
91327a77 AA |
462 | health.metrics.push_back(m); |
463 | } | |
464 | } | |
465 | ||
7c673cae FG |
466 | // Report a health warning if we are readonly |
467 | if (mds->mdcache->is_readonly()) { | |
468 | MDSHealthMetric m(MDS_HEALTH_READ_ONLY, HEALTH_WARN, | |
469 | "MDS in read-only mode"); | |
470 | health.metrics.push_back(m); | |
471 | } | |
472 | ||
473 | // Report if we have significantly exceeded our cache size limit | |
181888fb | 474 | if (mds->mdcache->cache_overfull()) { |
f67539c2 TL |
475 | CachedStackStringStream css; |
476 | *css << "MDS cache is too large (" << bytes2str(mds->mdcache->cache_size()) | |
181888fb | 477 | << "/" << bytes2str(mds->mdcache->cache_limit_memory()) << "); " |
7c673cae FG |
478 | << mds->mdcache->num_inodes_with_caps << " inodes in use by clients, " |
479 | << mds->mdcache->get_num_strays() << " stray files"; | |
480 | ||
f67539c2 | 481 | MDSHealthMetric m(MDS_HEALTH_CACHE_OVERSIZED, HEALTH_WARN, css->strv()); |
7c673cae FG |
482 | health.metrics.push_back(m); |
483 | } | |
484 | } | |
485 | ||
486 | MDSMap::DaemonState Beacon::get_want_state() const | |
487 | { | |
11fdf7f2 | 488 | std::unique_lock lock(mutex); |
7c673cae FG |
489 | return want_state; |
490 | } | |
491 |