]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/HealthMonitor.cc
update sources to 12.2.8
[ceph.git] / ceph / src / mon / HealthMonitor.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank, Inc
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
7c673cae
FG
15#include <stdlib.h>
16#include <limits.h>
224ce89b
WB
17#include <sstream>
18#include <boost/regex.hpp>
7c673cae 19
7c673cae 20#include "include/assert.h"
224ce89b 21#include "include/stringify.h"
7c673cae
FG
22
23#include "mon/Monitor.h"
24#include "mon/HealthService.h"
25#include "mon/HealthMonitor.h"
26#include "mon/DataHealthService.h"
27
28#include "messages/MMonHealth.h"
224ce89b
WB
29#include "messages/MMonHealthChecks.h"
30
7c673cae 31#include "common/Formatter.h"
7c673cae
FG
32
33#define dout_subsys ceph_subsys_mon
34#undef dout_prefix
35#define dout_prefix _prefix(_dout, mon, this)
36static ostream& _prefix(std::ostream *_dout, const Monitor *mon,
37 const HealthMonitor *hmon) {
38 return *_dout << "mon." << mon->name << "@" << mon->rank
224ce89b
WB
39 << "(" << mon->get_state_name() << ").health ";
40}
41
42HealthMonitor::HealthMonitor(Monitor *m, Paxos *p, const string& service_name)
43 : PaxosService(m, p, service_name) {
7c673cae
FG
44}
45
46void HealthMonitor::init()
47{
48 dout(10) << __func__ << dendl;
224ce89b
WB
49}
50
51void HealthMonitor::create_initial()
52{
53 dout(10) << __func__ << dendl;
54}
55
56void HealthMonitor::update_from_paxos(bool *need_bootstrap)
57{
58 version = get_last_committed();
59 dout(10) << __func__ << dendl;
60 load_health();
61
62 bufferlist qbl;
63 mon->store->get(service_name, "quorum", qbl);
64 if (qbl.length()) {
65 auto p = qbl.begin();
66 ::decode(quorum_checks, p);
67 } else {
68 quorum_checks.clear();
69 }
70
71 bufferlist lbl;
72 mon->store->get(service_name, "leader", lbl);
73 if (lbl.length()) {
74 auto p = lbl.begin();
75 ::decode(leader_checks, p);
76 } else {
77 leader_checks.clear();
78 }
7c673cae 79
224ce89b
WB
80 dout(20) << "dump:";
81 JSONFormatter jf(true);
82 jf.open_object_section("health");
83 jf.open_object_section("quorum_health");
84 for (auto& p : quorum_checks) {
85 string s = string("mon.") + stringify(p.first);
86 jf.dump_object(s.c_str(), p.second);
7c673cae 87 }
224ce89b
WB
88 jf.close_section();
89 jf.dump_object("leader_health", leader_checks);
90 jf.close_section();
91 jf.flush(*_dout);
92 *_dout << dendl;
7c673cae
FG
93}
94
224ce89b 95void HealthMonitor::create_pending()
7c673cae 96{
224ce89b 97 dout(10) << " " << version << dendl;
7c673cae
FG
98}
99
224ce89b
WB
100void HealthMonitor::encode_pending(MonitorDBStore::TransactionRef t)
101{
102 ++version;
103 dout(10) << " " << version << dendl;
104 put_last_committed(t, version);
105
106 bufferlist qbl;
107 ::encode(quorum_checks, qbl);
108 t->put(service_name, "quorum", qbl);
109 bufferlist lbl;
110 ::encode(leader_checks, lbl);
111 t->put(service_name, "leader", lbl);
112
113 health_check_map_t pending_health;
114
115 // combine per-mon details carefully...
116 map<string,set<string>> names; // code -> <mon names>
117 for (auto p : quorum_checks) {
118 for (auto q : p.second.checks) {
119 names[q.first].insert(mon->monmap->get_name(p.first));
120 }
121 pending_health.merge(p.second);
7c673cae 122 }
c07f9fc5 123 for (auto &p : pending_health.checks) {
224ce89b
WB
124 p.second.summary = boost::regex_replace(
125 p.second.summary,
c07f9fc5
FG
126 boost::regex("%hasorhave%"),
127 names[p.first].size() > 1 ? "have" : "has");
224ce89b
WB
128 p.second.summary = boost::regex_replace(
129 p.second.summary,
130 boost::regex("%names%"), stringify(names[p.first]));
131 p.second.summary = boost::regex_replace(
132 p.second.summary,
133 boost::regex("%plurals%"),
134 names[p.first].size() > 1 ? "s" : "");
135 p.second.summary = boost::regex_replace(
136 p.second.summary,
137 boost::regex("%isorare%"),
138 names[p.first].size() > 1 ? "are" : "is");
139 }
140
141 pending_health.merge(leader_checks);
142 encode_health(pending_health, t);
7c673cae
FG
143}
144
224ce89b
WB
145version_t HealthMonitor::get_trim_to()
146{
147 // we don't actually need *any* old states, but keep a few.
148 if (version > 5) {
149 return version - 5;
7c673cae 150 }
224ce89b 151 return 0;
7c673cae
FG
152}
153
224ce89b 154bool HealthMonitor::preprocess_query(MonOpRequestRef op)
7c673cae 155{
c07f9fc5
FG
156 return false;
157}
158
159bool HealthMonitor::prepare_update(MonOpRequestRef op)
160{
161 Message *m = op->get_req();
162 dout(7) << "prepare_update " << *m
163 << " from " << m->get_orig_source_inst() << dendl;
164 switch (m->get_type()) {
224ce89b
WB
165 case MSG_MON_HEALTH:
166 {
167 MMonHealth *hm = static_cast<MMonHealth*>(op->get_req());
168 int service_type = hm->get_service_type();
169 if (services.count(service_type) == 0) {
170 dout(1) << __func__ << " service type " << service_type
171 << " not registered -- drop message!" << dendl;
172 return false;
173 }
174 return services[service_type]->service_dispatch(op);
175 }
224ce89b 176 case MSG_MON_HEALTH_CHECKS:
c07f9fc5
FG
177 return prepare_health_checks(op);
178 default:
179 return false;
224ce89b 180 }
224ce89b
WB
181}
182
c07f9fc5 183bool HealthMonitor::prepare_health_checks(MonOpRequestRef op)
224ce89b
WB
184{
185 MMonHealthChecks *m = static_cast<MMonHealthChecks*>(op->get_req());
c07f9fc5
FG
186 // no need to check if it's changed, the peon has done so
187 quorum_checks[m->get_source().num()] = std::move(m->health_checks);
224ce89b
WB
188 return true;
189}
190
191void HealthMonitor::tick()
192{
193 if (!is_active()) {
194 return;
195 }
196 dout(10) << __func__ << dendl;
197 bool changed = false;
198 if (check_member_health()) {
199 changed = true;
200 }
c07f9fc5
FG
201 if (!mon->is_leader()) {
202 return;
203 }
204 if (check_leader_health()) {
205 changed = true;
7c673cae 206 }
224ce89b
WB
207 if (changed) {
208 propose_pending();
209 }
210}
211
212bool HealthMonitor::check_member_health()
213{
214 dout(20) << __func__ << dendl;
215 bool changed = false;
7c673cae 216
224ce89b
WB
217 // snapshot of usage
218 DataStats stats;
219 get_fs_stats(stats.fs_stats, g_conf->mon_data.c_str());
220 map<string,uint64_t> extra;
221 uint64_t store_size = mon->store->get_estimated_size(extra);
222 assert(store_size > 0);
223 stats.store_stats.bytes_total = store_size;
224 stats.store_stats.bytes_sst = extra["sst"];
225 stats.store_stats.bytes_log = extra["log"];
226 stats.store_stats.bytes_misc = extra["misc"];
227 stats.last_update = ceph_clock_now();
228 dout(10) << __func__ << " avail " << stats.fs_stats.avail_percent << "%"
1adf2230
AA
229 << " total " << byte_u_t(stats.fs_stats.byte_total)
230 << ", used " << byte_u_t(stats.fs_stats.byte_used)
231 << ", avail " << byte_u_t(stats.fs_stats.byte_avail) << dendl;
224ce89b
WB
232
233 // MON_DISK_{LOW,CRIT,BIG}
234 health_check_map_t next;
235 if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_crit) {
236 stringstream ss, ss2;
237 ss << "mon%plurals% %names% %isorare% very low on available space";
238 auto& d = next.add("MON_DISK_CRIT", HEALTH_ERR, ss.str());
239 ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent
240 << "% avail";
241 d.detail.push_back(ss2.str());
242 } else if (stats.fs_stats.avail_percent <= g_conf->mon_data_avail_warn) {
243 stringstream ss, ss2;
244 ss << "mon%plurals% %names% %isorare% low on available space";
c07f9fc5 245 auto& d = next.add("MON_DISK_LOW", HEALTH_WARN, ss.str());
224ce89b
WB
246 ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent
247 << "% avail";
248 d.detail.push_back(ss2.str());
249 }
250 if (stats.store_stats.bytes_total >= g_conf->mon_data_size_warn) {
251 stringstream ss, ss2;
252 ss << "mon%plurals% %names% %isorare% using a lot of disk space";
253 auto& d = next.add("MON_DISK_BIG", HEALTH_WARN, ss.str());
254 ss2 << "mon." << mon->name << " is "
1adf2230 255 << byte_u_t(stats.store_stats.bytes_total)
224ce89b 256 << " >= mon_data_size_warn ("
1adf2230 257 << byte_u_t(g_conf->mon_data_size_warn) << ")";
224ce89b 258 d.detail.push_back(ss2.str());
7c673cae
FG
259 }
260
224ce89b
WB
261 // OSD_NO_DOWN_OUT_INTERVAL
262 {
263 // Warn if 'mon_osd_down_out_interval' is set to zero.
264 // Having this option set to zero on the leader acts much like the
265 // 'noout' flag. It's hard to figure out what's going wrong with clusters
266 // without the 'noout' flag set but acting like that just the same, so
267 // we report a HEALTH_WARN in case this option is set to zero.
268 // This is an ugly hack to get the warning out, but until we find a way
269 // to spread global options throughout the mon cluster and have all mons
270 // using a base set of the same options, we need to work around this sort
271 // of things.
272 // There's also the obvious drawback that if this is set on a single
273 // monitor on a 3-monitor cluster, this warning will only be shown every
274 // third monitor connection.
275 if (g_conf->mon_warn_on_osd_down_out_interval_zero &&
276 g_conf->mon_osd_down_out_interval == 0) {
277 ostringstream ss, ds;
c07f9fc5 278 ss << "mon%plurals% %names% %hasorhave% mon_osd_down_out_interval set to 0";
224ce89b
WB
279 auto& d = next.add("OSD_NO_DOWN_OUT_INTERVAL", HEALTH_WARN, ss.str());
280 ds << "mon." << mon->name << " has mon_osd_down_out_interval set to 0";
281 d.detail.push_back(ds.str());
282 }
283 }
284
c07f9fc5
FG
285 auto p = quorum_checks.find(mon->rank);
286 if (p == quorum_checks.end()) {
287 if (next.empty()) {
288 return false;
289 }
290 } else {
291 if (p->second == next) {
292 return false;
293 }
294 }
295
296 if (mon->is_leader()) {
297 // prepare to propose
298 quorum_checks[mon->rank] = next;
299 changed = true;
300 } else {
1adf2230
AA
301 // tell the leader, but only if the quorum is luminous
302 if (mon->quorum_mon_features.contains_all(
303 ceph::features::mon::FEATURE_LUMINOUS)) {
304 mon->messenger->send_message(new MMonHealthChecks(next),
305 mon->monmap->get_inst(mon->get_leader()));
306 }
c07f9fc5
FG
307 }
308
224ce89b 309 return changed;
7c673cae
FG
310}
311
224ce89b
WB
312bool HealthMonitor::check_leader_health()
313{
314 dout(20) << __func__ << dendl;
315 bool changed = false;
316
317 // prune quorum_health
318 {
319 auto& qset = mon->get_quorum();
320 auto p = quorum_checks.begin();
321 while (p != quorum_checks.end()) {
322 if (qset.count(p->first) == 0) {
323 p = quorum_checks.erase(p);
324 changed = true;
325 } else {
326 ++p;
327 }
328 }
329 }
330
331 health_check_map_t next;
332
333 // MON_DOWN
334 {
335 int max = mon->monmap->size();
336 int actual = mon->get_quorum().size();
337 if (actual < max) {
338 ostringstream ss;
339 ss << (max-actual) << "/" << max << " mons down, quorum "
340 << mon->get_quorum_names();
341 auto& d = next.add("MON_DOWN", HEALTH_WARN, ss.str());
342 set<int> q = mon->get_quorum();
343 for (int i=0; i<max; i++) {
344 if (q.count(i) == 0) {
345 ostringstream ss;
346 ss << "mon." << mon->monmap->get_name(i) << " (rank " << i
347 << ") addr " << mon->monmap->get_addr(i)
348 << " is down (out of quorum)";
349 d.detail.push_back(ss.str());
350 }
351 }
352 }
353 }
354
355 // MON_CLOCK_SKEW
356 if (!mon->timecheck_skews.empty()) {
357 list<string> warns;
358 list<string> details;
359 for (map<entity_inst_t,double>::iterator i = mon->timecheck_skews.begin();
360 i != mon->timecheck_skews.end(); ++i) {
361 entity_inst_t inst = i->first;
362 double skew = i->second;
363 double latency = mon->timecheck_latencies[inst];
364 string name = mon->monmap->get_name(inst.addr);
365 ostringstream tcss;
366 health_status_t tcstatus = mon->timecheck_status(tcss, skew, latency);
367 if (tcstatus != HEALTH_OK) {
368 warns.push_back(name);
369 ostringstream tmp_ss;
370 tmp_ss << "mon." << name
371 << " addr " << inst.addr << " " << tcss.str()
372 << " (latency " << latency << "s)";
373 details.push_back(tmp_ss.str());
374 }
375 }
376 if (!warns.empty()) {
377 ostringstream ss;
378 ss << "clock skew detected on";
379 while (!warns.empty()) {
380 ss << " mon." << warns.front();
381 warns.pop_front();
382 if (!warns.empty())
383 ss << ",";
384 }
c07f9fc5 385 auto& d = next.add("MON_CLOCK_SKEW", HEALTH_WARN, ss.str());
224ce89b
WB
386 d.detail.swap(details);
387 }
388 }
389
390 if (next != leader_checks) {
391 changed = true;
392 leader_checks = next;
393 }
394 return changed;
395}