1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank, Inc
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
22 #include "include/ceph_assert.h"
23 #include "include/common_fwd.h"
24 #include "include/stringify.h"
26 #include "mon/Monitor.h"
27 #include "mon/HealthMonitor.h"
29 #include "messages/MMonHealthChecks.h"
31 #include "common/Formatter.h"
33 #define dout_subsys ceph_subsys_mon
35 #define dout_prefix _prefix(_dout, mon, this)
36 using namespace TOPNSPC::common
;
38 using namespace std::literals
;
47 using std::ostringstream
;
52 using std::stringstream
;
55 using std::unique_ptr
;
57 using ceph::bufferlist
;
60 using ceph::Formatter
;
61 using ceph::JSONFormatter
;
62 using ceph::mono_clock
;
63 using ceph::mono_time
;
64 using ceph::parse_timespan
;
65 using ceph::timespan_str
;
66 static ostream
& _prefix(std::ostream
*_dout
, const Monitor
&mon
,
67 const HealthMonitor
*hmon
) {
68 return *_dout
<< "mon." << mon
.name
<< "@" << mon
.rank
69 << "(" << mon
.get_state_name() << ").health ";
72 HealthMonitor::HealthMonitor(Monitor
&m
, Paxos
&p
, const string
& service_name
)
73 : PaxosService(m
, p
, service_name
) {
76 void HealthMonitor::init()
78 dout(10) << __func__
<< dendl
;
81 void HealthMonitor::create_initial()
83 dout(10) << __func__
<< dendl
;
86 void HealthMonitor::update_from_paxos(bool *need_bootstrap
)
88 version
= get_last_committed();
89 dout(10) << __func__
<< dendl
;
93 mon
.store
->get(service_name
, "quorum", qbl
);
95 auto p
= qbl
.cbegin();
96 decode(quorum_checks
, p
);
98 quorum_checks
.clear();
102 mon
.store
->get(service_name
, "leader", lbl
);
104 auto p
= lbl
.cbegin();
105 decode(leader_checks
, p
);
107 leader_checks
.clear();
112 mon
.store
->get(service_name
, "mutes", bl
);
114 auto p
= bl
.cbegin();
122 JSONFormatter
jf(true);
123 jf
.open_object_section("health");
124 jf
.open_object_section("quorum_health");
125 for (auto& p
: quorum_checks
) {
126 string s
= string("mon.") + stringify(p
.first
);
127 jf
.dump_object(s
.c_str(), p
.second
);
130 jf
.dump_object("leader_health", leader_checks
);
136 void HealthMonitor::create_pending()
138 dout(10) << " " << version
<< dendl
;
139 pending_mutes
= mutes
;
142 void HealthMonitor::encode_pending(MonitorDBStore::TransactionRef t
)
145 dout(10) << " " << version
<< dendl
;
146 put_last_committed(t
, version
);
149 encode(quorum_checks
, qbl
);
150 t
->put(service_name
, "quorum", qbl
);
152 encode(leader_checks
, lbl
);
153 t
->put(service_name
, "leader", lbl
);
156 encode(pending_mutes
, bl
);
157 t
->put(service_name
, "mutes", bl
);
160 health_check_map_t pending_health
;
162 // combine per-mon details carefully...
163 map
<string
,set
<string
>> names
; // code -> <mon names>
164 for (auto p
: quorum_checks
) {
165 for (auto q
: p
.second
.checks
) {
166 names
[q
.first
].insert(mon
.monmap
->get_name(p
.first
));
168 pending_health
.merge(p
.second
);
170 for (auto &p
: pending_health
.checks
) {
171 p
.second
.summary
= std::regex_replace(
173 std::regex("%hasorhave%"),
174 names
[p
.first
].size() > 1 ? "have" : "has");
175 p
.second
.summary
= std::regex_replace(
177 std::regex("%names%"), stringify(names
[p
.first
]));
178 p
.second
.summary
= std::regex_replace(
180 std::regex("%plurals%"),
181 names
[p
.first
].size() > 1 ? "s" : "");
182 p
.second
.summary
= std::regex_replace(
184 std::regex("%isorare%"),
185 names
[p
.first
].size() > 1 ? "are" : "is");
188 pending_health
.merge(leader_checks
);
189 encode_health(pending_health
, t
);
192 version_t
HealthMonitor::get_trim_to() const
194 // we don't actually need *any* old states, but keep a few.
201 bool HealthMonitor::preprocess_query(MonOpRequestRef op
)
203 auto m
= op
->get_req
<PaxosServiceMessage
>();
204 switch (m
->get_type()) {
205 case MSG_MON_COMMAND
:
206 return preprocess_command(op
);
207 case MSG_MON_HEALTH_CHECKS
:
211 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
216 bool HealthMonitor::prepare_update(MonOpRequestRef op
)
218 Message
*m
= op
->get_req();
219 dout(7) << "prepare_update " << *m
220 << " from " << m
->get_orig_source_inst() << dendl
;
221 switch (m
->get_type()) {
222 case MSG_MON_HEALTH_CHECKS
:
223 return prepare_health_checks(op
);
224 case MSG_MON_COMMAND
:
225 return prepare_command(op
);
231 bool HealthMonitor::preprocess_command(MonOpRequestRef op
)
233 auto m
= op
->get_req
<MMonCommand
>();
234 std::stringstream ss
;
238 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
239 string rs
= ss
.str();
240 mon
.reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
244 MonSession
*session
= op
->get_session();
246 mon
.reply_command(op
, -EACCES
, "access denied", rdata
,
247 get_last_committed());
250 // more sanity checks
253 cmd_getval(cmdmap
, "format", format
);
255 cmd_getval(cmdmap
, "prefix", prefix
);
256 } catch (const bad_cmd_get
& e
) {
257 mon
.reply_command(op
, -EINVAL
, e
.what(), rdata
, get_last_committed());
263 bool HealthMonitor::prepare_command(MonOpRequestRef op
)
265 auto m
= op
->get_req
<MMonCommand
>();
267 std::stringstream ss
;
271 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
272 string rs
= ss
.str();
273 mon
.reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
277 MonSession
*session
= op
->get_session();
279 mon
.reply_command(op
, -EACCES
, "access denied", rdata
, get_last_committed());
283 string format
= cmd_getval_or
<string
>(cmdmap
, "format", "plain");
284 boost::scoped_ptr
<Formatter
> f(Formatter::create(format
));
287 cmd_getval(cmdmap
, "prefix", prefix
);
291 if (prefix
== "health mute") {
294 if (!cmd_getval(cmdmap
, "code", code
) ||
297 ss
<< "must specify an alert code to mute";
300 cmd_getval(cmdmap
, "sticky", sticky
);
303 std::chrono::seconds secs
;
304 if (cmd_getval(cmdmap
, "ttl", ttl_str
)) {
306 secs
= parse_timespan(ttl_str
);
308 throw std::invalid_argument("timespan = 0");
310 } catch (const std::invalid_argument
& e
) {
311 ss
<< "invalid duration: " << ttl_str
<< " (" << e
.what() << ")";
316 ttl
= ceph_clock_now();
317 ttl
+= std::chrono::duration
<double>(secs
).count();
319 health_check_map_t all
;
320 gather_all_health_checks(&all
);
324 auto p
= all
.checks
.find(code
);
325 if (p
== all
.checks
.end()) {
327 ss
<< "health alert " << code
<< " is not currently raised";
330 count
= p
->second
.count
;
331 summary
= p
->second
.summary
;
333 auto& m
= pending_mutes
[code
];
339 } else if (prefix
== "health unmute") {
341 if (cmd_getval(cmdmap
, "code", code
)) {
342 pending_mutes
.erase(code
);
344 pending_mutes
.clear();
347 ss
<< "Command '" << prefix
<< "' not implemented!";
352 dout(4) << __func__
<< " done, r=" << r
<< dendl
;
353 /* Compose response */
358 // success.. delay reply
359 wait_for_finished_proposal(op
, new Monitor::C_Command(mon
, op
, r
, rs
,
360 get_last_committed() + 1));
364 mon
.reply_command(op
, r
, rs
, rdata
, get_last_committed());
369 bool HealthMonitor::prepare_health_checks(MonOpRequestRef op
)
371 auto m
= op
->get_req
<MMonHealthChecks
>();
372 // no need to check if it's changed, the peon has done so
373 quorum_checks
[m
->get_source().num()] = std::move(m
->health_checks
);
377 void HealthMonitor::tick()
382 dout(10) << __func__
<< dendl
;
383 bool changed
= false;
384 if (check_member_health()) {
387 if (!mon
.is_leader()) {
390 if (check_leader_health()) {
401 bool HealthMonitor::check_mutes()
404 auto now
= ceph_clock_now();
405 health_check_map_t all
;
406 gather_all_health_checks(&all
);
407 auto p
= pending_mutes
.begin();
408 while (p
!= pending_mutes
.end()) {
409 if (p
->second
.ttl
!= utime_t() &&
410 p
->second
.ttl
<= now
) {
411 mon
.clog
->info() << "Health alert mute " << p
->first
412 << " cleared (passed TTL " << p
->second
.ttl
<< ")";
413 p
= pending_mutes
.erase(p
);
417 if (!p
->second
.sticky
) {
418 auto q
= all
.checks
.find(p
->first
);
419 if (q
== all
.checks
.end()) {
420 mon
.clog
->info() << "Health alert mute " << p
->first
421 << " cleared (health alert cleared)";
422 p
= pending_mutes
.erase(p
);
426 if (p
->second
.count
) {
428 if (q
->second
.count
> p
->second
.count
) {
429 mon
.clog
->info() << "Health alert mute " << p
->first
430 << " cleared (count increased from " << p
->second
.count
431 << " to " << q
->second
.count
<< ")";
432 p
= pending_mutes
.erase(p
);
436 if (q
->second
.count
< p
->second
.count
) {
437 // rachet down the mute
438 dout(10) << __func__
<< " mute " << p
->first
<< " count "
439 << p
->second
.count
<< " -> " << q
->second
.count
441 p
->second
.count
= q
->second
.count
;
445 // summary-based mute
446 if (p
->second
.summary
!= q
->second
.summary
) {
447 mon
.clog
->info() << "Health alert mute " << p
->first
448 << " cleared (summary changed)";
449 p
= pending_mutes
.erase(p
);
460 void HealthMonitor::gather_all_health_checks(health_check_map_t
*all
)
462 for (auto& svc
: mon
.paxos_service
) {
463 all
->merge(svc
->get_health_checks());
467 health_status_t
HealthMonitor::get_health_status(
474 health_check_map_t all
;
475 gather_all_health_checks(&all
);
476 health_status_t r
= HEALTH_OK
;
477 for (auto& p
: all
.checks
) {
478 if (!mutes
.count(p
.first
)) {
479 if (r
> p
.second
.severity
) {
480 r
= p
.second
.severity
;
485 f
->open_object_section("health");
486 f
->dump_stream("status") << r
;
487 f
->open_object_section("checks");
488 for (auto& p
: all
.checks
) {
489 f
->open_object_section(p
.first
.c_str());
490 p
.second
.dump(f
, want_detail
);
491 f
->dump_bool("muted", mutes
.count(p
.first
));
495 f
->open_array_section("mutes");
496 for (auto& p
: mutes
) {
497 f
->dump_object("mute", p
.second
);
502 auto now
= ceph_clock_now();
503 // one-liner: HEALTH_FOO[ thing1[; thing2 ...]]
505 for (auto& p
: all
.checks
) {
506 if (!mutes
.count(p
.first
)) {
507 if (!summary
.empty()) {
510 summary
+= p
.second
.summary
;
513 *plain
= stringify(r
);
514 if (summary
.size()) {
518 if (!mutes
.empty()) {
519 if (summary
.size()) {
525 for (auto& p
: mutes
) {
529 if (p
.second
.ttl
> now
) {
530 auto left
= p
.second
.ttl
;
532 *plain
+= "("s
+ utimespan_str(left
) + ")";
543 for (auto& p
: all
.checks
) {
544 auto q
= mutes
.find(p
.first
);
545 if (q
!= mutes
.end()) {
547 if (q
->second
.ttl
!= utime_t()) {
548 if (q
->second
.ttl
> now
) {
549 auto left
= q
->second
.ttl
;
552 *plain
+= utimespan_str(left
);
557 if (q
->second
.sticky
) {
558 *plain
+= ", STICKY";
562 *plain
+= "["s
+ short_health_string(p
.second
.severity
) + "] " +
563 p
.first
+ ": " + p
.second
.summary
+ "\n";
564 for (auto& d
: p
.second
.detail
) {
575 bool HealthMonitor::check_member_health()
577 dout(20) << __func__
<< dendl
;
578 bool changed
= false;
579 const auto max
= g_conf().get_val
<uint64_t>("mon_health_max_detail");
583 get_fs_stats(stats
.fs_stats
, g_conf()->mon_data
.c_str());
584 map
<string
,uint64_t> extra
;
585 uint64_t store_size
= mon
.store
->get_estimated_size(extra
);
586 ceph_assert(store_size
> 0);
587 stats
.store_stats
.bytes_total
= store_size
;
588 stats
.store_stats
.bytes_sst
= extra
["sst"];
589 stats
.store_stats
.bytes_log
= extra
["log"];
590 stats
.store_stats
.bytes_misc
= extra
["misc"];
591 stats
.last_update
= ceph_clock_now();
592 dout(10) << __func__
<< " avail " << stats
.fs_stats
.avail_percent
<< "%"
593 << " total " << byte_u_t(stats
.fs_stats
.byte_total
)
594 << ", used " << byte_u_t(stats
.fs_stats
.byte_used
)
595 << ", avail " << byte_u_t(stats
.fs_stats
.byte_avail
) << dendl
;
597 // MON_DISK_{LOW,CRIT,BIG}
598 health_check_map_t next
;
599 if (stats
.fs_stats
.avail_percent
<= g_conf()->mon_data_avail_crit
) {
600 stringstream ss
, ss2
;
601 ss
<< "mon%plurals% %names% %isorare% very low on available space";
602 auto& d
= next
.add("MON_DISK_CRIT", HEALTH_ERR
, ss
.str(), 1);
603 ss2
<< "mon." << mon
.name
<< " has " << stats
.fs_stats
.avail_percent
605 d
.detail
.push_back(ss2
.str());
606 } else if (stats
.fs_stats
.avail_percent
<= g_conf()->mon_data_avail_warn
) {
607 stringstream ss
, ss2
;
608 ss
<< "mon%plurals% %names% %isorare% low on available space";
609 auto& d
= next
.add("MON_DISK_LOW", HEALTH_WARN
, ss
.str(), 1);
610 ss2
<< "mon." << mon
.name
<< " has " << stats
.fs_stats
.avail_percent
612 d
.detail
.push_back(ss2
.str());
614 if (stats
.store_stats
.bytes_total
>= g_conf()->mon_data_size_warn
) {
615 stringstream ss
, ss2
;
616 ss
<< "mon%plurals% %names% %isorare% using a lot of disk space";
617 auto& d
= next
.add("MON_DISK_BIG", HEALTH_WARN
, ss
.str(), 1);
618 ss2
<< "mon." << mon
.name
<< " is "
619 << byte_u_t(stats
.store_stats
.bytes_total
)
620 << " >= mon_data_size_warn ("
621 << byte_u_t(g_conf()->mon_data_size_warn
) << ")";
622 d
.detail
.push_back(ss2
.str());
625 // OSD_NO_DOWN_OUT_INTERVAL
627 // Warn if 'mon_osd_down_out_interval' is set to zero.
628 // Having this option set to zero on the leader acts much like the
629 // 'noout' flag. It's hard to figure out what's going wrong with clusters
630 // without the 'noout' flag set but acting like that just the same, so
631 // we report a HEALTH_WARN in case this option is set to zero.
632 // This is an ugly hack to get the warning out, but until we find a way
633 // to spread global options throughout the mon cluster and have all mons
634 // using a base set of the same options, we need to work around this sort
636 // There's also the obvious drawback that if this is set on a single
637 // monitor on a 3-monitor cluster, this warning will only be shown every
638 // third monitor connection.
639 if (g_conf()->mon_warn_on_osd_down_out_interval_zero
&&
640 g_conf()->mon_osd_down_out_interval
== 0) {
641 ostringstream ss
, ds
;
642 ss
<< "mon%plurals% %names% %hasorhave% mon_osd_down_out_interval set to 0";
643 auto& d
= next
.add("OSD_NO_DOWN_OUT_INTERVAL", HEALTH_WARN
, ss
.str(), 1);
644 ds
<< "mon." << mon
.name
<< " has mon_osd_down_out_interval set to 0";
645 d
.detail
.push_back(ds
.str());
649 // AUTH_INSECURE_GLOBAL_ID_RECLAIM
650 if (g_conf().get_val
<bool>("mon_warn_on_insecure_global_id_reclaim") &&
651 g_conf().get_val
<bool>("auth_allow_insecure_global_id_reclaim")) {
652 // Warn if there are any clients that are insecurely renewing their global_id
653 std::lock_guard
l(mon
.session_map_lock
);
654 list
<std::string
> detail
;
655 for (auto p
= mon
.session_map
.sessions
.begin();
656 p
!= mon
.session_map
.sessions
.end();
658 if ((*p
)->global_id_status
== global_id_status_t::RECLAIM_INSECURE
) {
660 ds
<< (*p
)->entity_name
<< " at " << (*p
)->addrs
661 << " is using insecure global_id reclaim";
662 detail
.push_back(ds
.str());
663 if (detail
.size() >= max
) {
664 detail
.push_back("...");
669 if (!detail
.empty()) {
671 ss
<< "client%plurals% %isorare% using insecure global_id reclaim";
672 auto& d
= next
.add("AUTH_INSECURE_GLOBAL_ID_RECLAIM", HEALTH_WARN
, ss
.str(),
674 d
.detail
.swap(detail
);
677 // AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED
678 if (g_conf().get_val
<bool>("mon_warn_on_insecure_global_id_reclaim_allowed") &&
679 g_conf().get_val
<bool>("auth_allow_insecure_global_id_reclaim")) {
680 ostringstream ss
, ds
;
681 ss
<< "mon%plurals% %isorare% allowing insecure global_id reclaim";
682 auto& d
= next
.add("AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED", HEALTH_WARN
, ss
.str(), 1);
683 ds
<< "mon." << mon
.name
<< " has auth_allow_insecure_global_id_reclaim set to true";
684 d
.detail
.push_back(ds
.str());
687 auto p
= quorum_checks
.find(mon
.rank
);
688 if (p
== quorum_checks
.end()) {
693 if (p
->second
== next
) {
698 if (mon
.is_leader()) {
699 // prepare to propose
700 quorum_checks
[mon
.rank
] = next
;
704 mon
.send_mon_message(new MMonHealthChecks(next
), mon
.get_leader());
710 bool HealthMonitor::check_leader_health()
712 dout(20) << __func__
<< dendl
;
713 bool changed
= false;
715 // prune quorum_health
717 auto& qset
= mon
.get_quorum();
718 auto p
= quorum_checks
.begin();
719 while (p
!= quorum_checks
.end()) {
720 if (qset
.count(p
->first
) == 0) {
721 p
= quorum_checks
.erase(p
);
729 health_check_map_t next
;
731 // DAEMON_OLD_VERSION
732 if (g_conf().get_val
<bool>("mon_warn_on_older_version")) {
733 check_for_older_version(&next
);
736 check_for_mon_down(&next
);
738 check_for_clock_skew(&next
);
739 // MON_MSGR2_NOT_ENABLED
740 if (g_conf().get_val
<bool>("mon_warn_on_msgr2_not_enabled")) {
741 check_if_msgr2_enabled(&next
);
744 if (next
!= leader_checks
) {
746 leader_checks
= next
;
751 void HealthMonitor::check_for_older_version(health_check_map_t
*checks
)
753 static ceph::coarse_mono_time old_version_first_time
=
754 ceph::coarse_mono_clock::zero();
756 auto now
= ceph::coarse_mono_clock::now();
757 if (ceph::coarse_mono_clock::is_zero(old_version_first_time
)) {
758 old_version_first_time
= now
;
760 const auto warn_delay
= g_conf().get_val
<std::chrono::seconds
>("mon_warn_older_version_delay");
761 if (now
- old_version_first_time
> warn_delay
) {
762 std::map
<string
, std::list
<string
> > all_versions
;
763 mon
.get_all_versions(all_versions
);
764 if (all_versions
.size() > 1) {
765 dout(20) << __func__
<< " all_versions=" << all_versions
<< dendl
;
766 // The last entry has the largest version
767 dout(20) << __func__
<< " highest version daemon count "
768 << all_versions
.rbegin()->second
.size() << dendl
;
769 // Erase last element (the highest version running)
770 all_versions
.erase(all_versions
.rbegin()->first
);
771 ceph_assert(all_versions
.size() > 0);
773 unsigned daemon_count
= 0;
774 for (auto& g
: all_versions
) {
775 daemon_count
+= g
.second
.size();
777 int ver_count
= all_versions
.size();
778 ceph_assert(!(daemon_count
== 1 && ver_count
!= 1));
779 ss
<< "There " << (daemon_count
== 1 ? "is a daemon" : "are daemons")
780 << " running " << (ver_count
> 1 ? "multiple old versions" : "an older version") << " of ceph";
781 health_status_t status
;
785 status
= HEALTH_WARN
;
786 auto& d
= checks
->add("DAEMON_OLD_VERSION", status
, ss
.str(), all_versions
.size());
787 for (auto& g
: all_versions
) {
789 for (auto& i
: g
.second
) { // Daemon list
792 ds
<< (g
.second
.size() == 1 ? "is" : "are")
793 << " running an older version of ceph: " << g
.first
;
794 d
.detail
.push_back(ds
.str());
797 old_version_first_time
= ceph::coarse_mono_clock::zero();
802 void HealthMonitor::check_for_mon_down(health_check_map_t
*checks
)
804 int max
= mon
.monmap
->size();
805 int actual
= mon
.get_quorum().size();
806 const auto now
= ceph::real_clock::now();
808 now
> mon
.monmap
->created
.to_real_time() + g_conf().get_val
<std::chrono::seconds
>("mon_down_mkfs_grace")) {
810 ss
<< (max
-actual
) << "/" << max
<< " mons down, quorum "
811 << mon
.get_quorum_names();
812 auto& d
= checks
->add("MON_DOWN", HEALTH_WARN
, ss
.str(), max
- actual
);
813 set
<int> q
= mon
.get_quorum();
814 for (int i
=0; i
<max
; i
++) {
815 if (q
.count(i
) == 0) {
817 ss
<< "mon." << mon
.monmap
->get_name(i
) << " (rank " << i
818 << ") addr " << mon
.monmap
->get_addrs(i
)
819 << " is down (out of quorum)";
820 d
.detail
.push_back(ss
.str());
826 void HealthMonitor::check_for_clock_skew(health_check_map_t
*checks
)
828 if (!mon
.timecheck_skews
.empty()) {
830 list
<string
> details
;
831 for (auto& i
: mon
.timecheck_skews
) {
832 double skew
= i
.second
;
833 double latency
= mon
.timecheck_latencies
[i
.first
];
834 string name
= mon
.monmap
->get_name(i
.first
);
836 health_status_t tcstatus
= mon
.timecheck_status(tcss
, skew
, latency
);
837 if (tcstatus
!= HEALTH_OK
) {
838 warns
.push_back(name
);
839 ostringstream tmp_ss
;
840 tmp_ss
<< "mon." << name
<< " " << tcss
.str()
841 << " (latency " << latency
<< "s)";
842 details
.push_back(tmp_ss
.str());
845 if (!warns
.empty()) {
847 ss
<< "clock skew detected on";
848 while (!warns
.empty()) {
849 ss
<< " mon." << warns
.front();
854 auto& d
= checks
->add("MON_CLOCK_SKEW", HEALTH_WARN
, ss
.str(), details
.size());
855 d
.detail
.swap(details
);
860 void HealthMonitor::check_if_msgr2_enabled(health_check_map_t
*checks
)
862 if (g_conf().get_val
<bool>("ms_bind_msgr2") &&
863 mon
.monmap
->get_required_features().contains_all(
864 ceph::features::mon::FEATURE_NAUTILUS
)) {
865 list
<string
> details
;
866 for (auto& i
: mon
.monmap
->mon_info
) {
867 if (!i
.second
.public_addrs
.has_msgr2()) {
869 ds
<< "mon." << i
.first
<< " is not bound to a msgr2 port, only "
870 << i
.second
.public_addrs
;
871 details
.push_back(ds
.str());
874 if (!details
.empty()) {
876 ss
<< details
.size() << " monitors have not enabled msgr2";
877 auto &d
= checks
->add("MON_MSGR2_NOT_ENABLED", HEALTH_WARN
, ss
.str(),
879 d
.detail
.swap(details
);