1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
14 #include <boost/tokenizer.hpp>
16 #include "messages/MMgrBeacon.h"
17 #include "messages/MMgrMap.h"
18 #include "messages/MMgrDigest.h"
20 #include "include/stringify.h"
21 #include "mgr/MgrContext.h"
22 #include "mgr/mgr_commands.h"
23 #include "OSDMonitor.h"
24 #include "ConfigMonitor.h"
25 #include "HealthMonitor.h"
27 #include "common/TextTable.h"
28 #include "include/stringify.h"
30 #include "MgrMonitor.h"
32 #define MGR_METADATA_PREFIX "mgr_metadata"
34 #define dout_subsys ceph_subsys_mon
36 #define dout_prefix _prefix(_dout, mon, map)
37 using namespace TOPNSPC::common
;
45 using std::ostringstream
;
49 using std::stringstream
;
53 using ceph::bufferlist
;
56 using ceph::ErasureCodeInterfaceRef
;
57 using ceph::ErasureCodeProfile
;
58 using ceph::Formatter
;
59 using ceph::JSONFormatter
;
60 using ceph::make_message
;
61 using ceph::mono_clock
;
62 using ceph::mono_time
;
64 static ostream
& _prefix(std::ostream
*_dout
, Monitor
&mon
,
65 const MgrMap
& mgrmap
) {
66 return *_dout
<< "mon." << mon
.name
<< "@" << mon
.rank
67 << "(" << mon
.get_state_name()
68 << ").mgr e" << mgrmap
.get_epoch() << " ";
71 // the system treats always_on_modules as if they provide built-in functionality
72 // by ensuring that they are always enabled.
73 const static std::map
<uint32_t, std::set
<std::string
>> always_on_modules
= {
75 CEPH_RELEASE_OCTOPUS
, {
89 CEPH_RELEASE_PACIFIC
, {
103 CEPH_RELEASE_QUINCY
, {
118 // Prefix for mon store of active mgr's command descriptions
119 const static std::string command_descs_prefix
= "mgr_command_descs";
121 const Option
*MgrMonitor::find_module_option(const string
& name
)
123 // we have two forms of names: "mgr/$module/$option" and
124 // localized "mgr/$module/$instance/$option". normalize to the
125 // former by stripping out $instance.
127 if (name
.substr(0, 4) != "mgr/") {
130 auto second_slash
= name
.find('/', 5);
131 if (second_slash
== std::string::npos
) {
134 auto third_slash
= name
.find('/', second_slash
+ 1);
135 if (third_slash
!= std::string::npos
) {
136 // drop the $instance part between the second and third slash
137 real_name
= name
.substr(0, second_slash
) + name
.substr(third_slash
);
141 auto p
= mgr_module_options
.find(real_name
);
142 if (p
!= mgr_module_options
.end()) {
148 version_t
MgrMonitor::get_trim_to() const
150 int64_t max
= g_conf().get_val
<int64_t>("mon_max_mgrmap_epochs");
151 if (map
.epoch
> max
) {
152 return map
.epoch
- max
;
157 void MgrMonitor::create_initial()
159 // Take a local copy of initial_modules for tokenizer to iterate over.
160 auto initial_modules
= g_conf().get_val
<std::string
>("mgr_initial_modules");
161 boost::tokenizer
<> tok(initial_modules
);
162 for (auto& m
: tok
) {
163 pending_map
.modules
.insert(m
);
165 pending_map
.always_on_modules
= always_on_modules
;
166 pending_command_descs
= mgr_commands
;
167 dout(10) << __func__
<< " initial modules " << pending_map
.modules
168 << ", always on modules " << pending_map
.get_always_on_modules()
169 << ", " << pending_command_descs
.size() << " commands"
173 void MgrMonitor::get_store_prefixes(std::set
<string
>& s
) const
175 s
.insert(service_name
);
176 s
.insert(command_descs_prefix
);
177 s
.insert(MGR_METADATA_PREFIX
);
180 void MgrMonitor::update_from_paxos(bool *need_bootstrap
)
182 version_t version
= get_last_committed();
183 if (version
!= map
.epoch
) {
184 dout(4) << "loading version " << version
<< dendl
;
187 int err
= get_version(version
, bl
);
188 ceph_assert(err
== 0);
190 bool old_available
= map
.get_available();
191 uint64_t old_gid
= map
.get_active_gid();
193 auto p
= bl
.cbegin();
196 dout(4) << "active server: " << map
.active_addrs
197 << "(" << map
.active_gid
<< ")" << dendl
;
199 ever_had_active_mgr
= get_value("ever_had_active_mgr");
204 first_seen_inactive
= utime_t();
206 first_seen_inactive
= ceph_clock_now();
212 || command_descs
.empty()
213 || (map
.get_available()
214 && (!old_available
|| old_gid
!= map
.get_active_gid()))) {
215 dout(4) << "mkfs or daemon transitioned to available, loading commands"
217 bufferlist loaded_commands
;
218 int r
= mon
.store
->get(command_descs_prefix
, "", loaded_commands
);
220 derr
<< "Failed to load mgr commands: " << cpp_strerror(r
) << dendl
;
222 auto p
= loaded_commands
.cbegin();
223 decode(command_descs
, p
);
228 // populate module options
229 mgr_module_options
.clear();
230 misc_option_strings
.clear();
231 for (auto& i
: map
.available_modules
) {
232 for (auto& j
: i
.module_options
) {
233 string name
= string("mgr/") + i
.name
+ "/" + j
.second
.name
;
234 auto p
= mgr_module_options
.emplace(
236 Option(name
, static_cast<Option::type_t
>(j
.second
.type
),
237 static_cast<Option::level_t
>(j
.second
.level
)));
238 Option
& opt
= p
.first
->second
;
239 opt
.set_flags(static_cast<Option::flag_t
>(j
.second
.flags
));
240 opt
.set_flag(Option::FLAG_MGR
);
241 opt
.set_description(j
.second
.desc
.c_str());
242 opt
.set_long_description(j
.second
.long_desc
.c_str());
243 for (auto& k
: j
.second
.tags
) {
244 opt
.add_tag(k
.c_str());
246 for (auto& k
: j
.second
.see_also
) {
247 if (i
.module_options
.count(k
)) {
248 // it's another module option
249 misc_option_strings
.push_back(string("mgr/") + i
.name
+ "/" + k
);
250 opt
.add_see_also(misc_option_strings
.back().c_str());
252 // it's a native option
253 opt
.add_see_also(k
.c_str());
256 Option::value_t v
, v2
;
258 if (j
.second
.default_value
.size() &&
259 !opt
.parse_value(j
.second
.default_value
, &v
, &err
)) {
262 if (j
.second
.min
.size() &&
263 j
.second
.max
.size() &&
264 !opt
.parse_value(j
.second
.min
, &v
, &err
) &&
265 !opt
.parse_value(j
.second
.max
, &v2
, &err
)) {
266 opt
.set_min_max(v
, v2
);
268 std::vector
<const char *> enum_allowed
;
269 for (auto& k
: j
.second
.enum_allowed
) {
270 enum_allowed
.push_back(k
.c_str());
272 opt
.set_enum_allowed(enum_allowed
);
275 // force ConfigMonitor to refresh, since it uses const Option *
276 // pointers into our mgr_module_options (which we just rebuilt).
277 mon
.configmon()->load_config();
279 if (!mon
.is_init()) {
280 // feed our pet MgrClient, unless we are in Monitor::[pre]init()
285 void MgrMonitor::prime_mgr_client()
287 dout(10) << __func__
<< dendl
;
288 mon
.mgr_client
.ms_dispatch2(make_message
<MMgrMap
>(map
));
291 void MgrMonitor::create_pending()
297 health_status_t
MgrMonitor::should_warn_about_mgr_down()
299 utime_t now
= ceph_clock_now();
300 // we warn if we have osds AND we've exceeded the grace period
301 // which means a new mon cluster and be HEALTH_OK indefinitely as long as
302 // no OSDs are ever created.
303 if (mon
.osdmon()->osdmap
.get_num_osds() > 0 &&
304 now
> mon
.monmap
->created
+ g_conf().get_val
<int64_t>("mon_mgr_mkfs_grace")) {
305 health_status_t level
= HEALTH_WARN
;
306 if (first_seen_inactive
!= utime_t() &&
307 now
- first_seen_inactive
> g_conf().get_val
<int64_t>("mon_mgr_inactive_grace")) {
315 void MgrMonitor::post_paxos_update()
317 // are we handling digest subscribers?
320 if (prev_health_checks
.empty()) {
321 prev_health_checks
.resize(mon
.paxos_service
.size());
324 ceph_assert(prev_health_checks
.size() == mon
.paxos_service
.size());
325 for (auto i
= 0u; i
< prev_health_checks
.size(); i
++) {
326 const auto& curr
= mon
.paxos_service
[i
]->get_health_checks();
327 if (!send
&& curr
!= prev_health_checks
[i
]) {
330 prev_health_checks
[i
] = curr
;
337 wait_for_active_ctx(new C_MonContext
{&mon
, [this](int) {
345 void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t
)
347 dout(10) << __func__
<< " " << pending_map
<< dendl
;
349 pending_map
.encode(bl
, mon
.get_quorum_con_features());
350 put_version(t
, pending_map
.epoch
, bl
);
351 put_last_committed(t
, pending_map
.epoch
);
353 for (auto& p
: pending_metadata
) {
354 dout(10) << __func__
<< " set metadata for " << p
.first
<< dendl
;
355 t
->put(MGR_METADATA_PREFIX
, p
.first
, p
.second
);
357 for (auto& name
: pending_metadata_rm
) {
358 dout(10) << __func__
<< " rm metadata for " << name
<< dendl
;
359 t
->erase(MGR_METADATA_PREFIX
, name
);
361 pending_metadata
.clear();
362 pending_metadata_rm
.clear();
364 health_check_map_t next
;
365 if (pending_map
.active_gid
== 0) {
366 auto level
= should_warn_about_mgr_down();
367 if (level
!= HEALTH_OK
) {
368 next
.add("MGR_DOWN", level
, "no active mgr", 0);
370 dout(10) << __func__
<< " no health warning (never active and new cluster)"
374 put_value(t
, "ever_had_active_mgr", 1);
376 encode_health(next
, t
);
378 if (pending_command_descs
.size()) {
379 dout(4) << __func__
<< " encoding " << pending_command_descs
.size()
380 << " command_descs" << dendl
;
381 for (auto& p
: pending_command_descs
) {
382 p
.set_flag(MonCommand::FLAG_MGR
);
385 encode(pending_command_descs
, bl
);
386 t
->put(command_descs_prefix
, "", bl
);
387 pending_command_descs
.clear();
391 bool MgrMonitor::check_caps(MonOpRequestRef op
, const uuid_d
& fsid
)
394 MonSession
*session
= op
->get_session();
397 if (!session
->is_capable("mgr", MON_CAP_X
)) {
398 dout(1) << __func__
<< " insufficient caps " << session
->caps
<< dendl
;
401 if (fsid
!= mon
.monmap
->fsid
) {
402 dout(1) << __func__
<< " op fsid " << fsid
403 << " != " << mon
.monmap
->fsid
<< dendl
;
409 bool MgrMonitor::preprocess_query(MonOpRequestRef op
)
411 auto m
= op
->get_req
<PaxosServiceMessage
>();
412 switch (m
->get_type()) {
414 return preprocess_beacon(op
);
415 case MSG_MON_COMMAND
:
417 return preprocess_command(op
);
418 } catch (const bad_cmd_get
& e
) {
420 mon
.reply_command(op
, -EINVAL
, e
.what(), bl
, get_last_committed());
426 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
431 bool MgrMonitor::prepare_update(MonOpRequestRef op
)
433 auto m
= op
->get_req
<PaxosServiceMessage
>();
434 switch (m
->get_type()) {
436 return prepare_beacon(op
);
438 case MSG_MON_COMMAND
:
440 return prepare_command(op
);
441 } catch (const bad_cmd_get
& e
) {
443 mon
.reply_command(op
, -EINVAL
, e
.what(), bl
, get_last_committed());
449 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
456 class C_Updated
: public Context
{
460 C_Updated(MgrMonitor
*a
, MonOpRequestRef c
) :
462 void finish(int r
) override
{
465 } else if (r
== -ECANCELED
) {
466 mm
->mon
.no_reply(op
);
468 mm
->dispatch(op
); // try again
473 bool MgrMonitor::preprocess_beacon(MonOpRequestRef op
)
475 auto m
= op
->get_req
<MMgrBeacon
>();
476 mon
.no_reply(op
); // we never reply to beacons
477 dout(4) << "beacon from " << m
->get_gid() << dendl
;
479 if (!check_caps(op
, m
->get_fsid())) {
480 // drop it on the floor
484 // always send this to the leader's prepare_beacon()
488 bool MgrMonitor::prepare_beacon(MonOpRequestRef op
)
490 auto m
= op
->get_req
<MMgrBeacon
>();
491 dout(4) << "beacon from " << m
->get_gid() << dendl
;
493 // See if we are seeing same name, new GID for the active daemon
494 if (m
->get_name() == pending_map
.active_name
495 && m
->get_gid() != pending_map
.active_gid
)
497 dout(4) << "Active daemon restart (mgr." << m
->get_name() << ")" << dendl
;
498 mon
.clog
->info() << "Active manager daemon " << m
->get_name()
500 if (!mon
.osdmon()->is_writeable()) {
501 dout(1) << __func__
<< ": waiting for osdmon writeable to"
502 " blocklist old instance." << dendl
;
503 mon
.osdmon()->wait_for_writeable(op
, new C_RetryMessage(this, op
));
509 // See if we are seeing same name, new GID for any standbys
510 for (const auto &i
: pending_map
.standbys
) {
511 const MgrMap::StandbyInfo
&s
= i
.second
;
512 if (s
.name
== m
->get_name() && s
.gid
!= m
->get_gid()) {
513 dout(4) << "Standby daemon restart (mgr." << m
->get_name() << ")" << dendl
;
514 mon
.clog
->debug() << "Standby manager daemon " << m
->get_name()
516 drop_standby(i
.first
);
521 last_beacon
[m
->get_gid()] = ceph::coarse_mono_clock::now();
523 // Track whether we modified pending_map
524 bool updated
= false;
526 if (pending_map
.active_gid
== m
->get_gid()) {
527 if (pending_map
.services
!= m
->get_services()) {
528 dout(4) << "updated services from mgr." << m
->get_name()
529 << ": " << m
->get_services() << dendl
;
530 pending_map
.services
= m
->get_services();
534 // A beacon from the currently active daemon
535 if (pending_map
.active_addrs
!= m
->get_server_addrs()) {
536 dout(4) << "learned address " << m
->get_server_addrs()
537 << " (was " << pending_map
.active_addrs
<< ")" << dendl
;
538 pending_map
.active_addrs
= m
->get_server_addrs();
542 if (pending_map
.get_available() != m
->get_available()) {
543 dout(4) << "available " << m
->get_gid() << dendl
;
544 mon
.clog
->info() << "Manager daemon " << pending_map
.active_name
545 << " is now available";
547 // This beacon should include command descriptions
548 pending_command_descs
= m
->get_command_descs();
549 if (pending_command_descs
.empty()) {
550 // This should not happen, but it also isn't fatal: we just
551 // won't successfully update our list of commands.
552 dout(4) << "First available beacon from " << pending_map
.active_name
553 << "(" << m
->get_gid() << ") does not include command descs"
556 dout(4) << "First available beacon from " << pending_map
.active_name
557 << "(" << m
->get_gid() << ") includes "
558 << pending_command_descs
.size() << " command descs" << dendl
;
561 pending_map
.available
= m
->get_available();
564 if (pending_map
.available_modules
!= m
->get_available_modules()) {
565 dout(4) << "available_modules " << m
->get_available_modules()
566 << " (was " << pending_map
.available_modules
<< ")" << dendl
;
567 pending_map
.available_modules
= m
->get_available_modules();
570 const auto& clients
= m
->get_clients();
571 if (pending_map
.clients
!= clients
) {
572 dout(4) << "active's RADOS clients " << clients
573 << " (was " << pending_map
.clients
<< ")" << dendl
;
574 pending_map
.clients
= clients
;
577 } else if (pending_map
.active_gid
== 0) {
578 // There is no currently active daemon, select this one.
579 if (pending_map
.standbys
.count(m
->get_gid())) {
580 drop_standby(m
->get_gid(), false);
582 dout(4) << "selecting new active " << m
->get_gid()
583 << " " << m
->get_name()
584 << " (was " << pending_map
.active_gid
<< " "
585 << pending_map
.active_name
<< ")" << dendl
;
586 pending_map
.active_gid
= m
->get_gid();
587 pending_map
.active_name
= m
->get_name();
588 pending_map
.active_change
= ceph_clock_now();
589 pending_map
.active_mgr_features
= m
->get_mgr_features();
590 pending_map
.available_modules
= m
->get_available_modules();
591 encode(m
->get_metadata(), pending_metadata
[m
->get_name()]);
592 pending_metadata_rm
.erase(m
->get_name());
594 mon
.clog
->info() << "Activating manager daemon "
595 << pending_map
.active_name
;
599 if (pending_map
.standbys
.count(m
->get_gid()) > 0) {
600 dout(10) << "from existing standby " << m
->get_gid() << dendl
;
601 if (pending_map
.standbys
[m
->get_gid()].available_modules
!=
602 m
->get_available_modules()) {
603 dout(10) << "existing standby " << m
->get_gid() << " available_modules "
604 << m
->get_available_modules() << " (was "
605 << pending_map
.standbys
[m
->get_gid()].available_modules
<< ")"
607 pending_map
.standbys
[m
->get_gid()].available_modules
=
608 m
->get_available_modules();
612 dout(10) << "new standby " << m
->get_gid() << dendl
;
613 mon
.clog
->debug() << "Standby manager daemon " << m
->get_name()
615 pending_map
.standbys
[m
->get_gid()] = {m
->get_gid(), m
->get_name(),
616 m
->get_available_modules(),
617 m
->get_mgr_features()};
618 encode(m
->get_metadata(), pending_metadata
[m
->get_name()]);
619 pending_metadata_rm
.erase(m
->get_name());
625 dout(4) << "updating map" << dendl
;
626 wait_for_finished_proposal(op
, new C_Updated(this, op
));
628 dout(10) << "no change" << dendl
;
634 void MgrMonitor::check_subs()
636 const std::string type
= "mgrmap";
637 if (mon
.session_map
.subs
.count(type
) == 0)
639 for (auto sub
: *(mon
.session_map
.subs
[type
])) {
644 void MgrMonitor::check_sub(Subscription
*sub
)
646 if (sub
->type
== "mgrmap") {
647 if (sub
->next
<= map
.get_epoch()) {
648 dout(20) << "Sending map to subscriber " << sub
->session
->con
649 << " " << sub
->session
->con
->get_peer_addr() << dendl
;
650 sub
->session
->con
->send_message2(make_message
<MMgrMap
>(map
));
652 mon
.session_map
.remove_sub(sub
);
654 sub
->next
= map
.get_epoch() + 1;
658 ceph_assert(sub
->type
== "mgrdigest");
659 if (sub
->next
== 0) {
660 // new registration; cancel previous timer
663 if (digest_event
== nullptr) {
670 * Handle digest subscriptions separately (outside of check_sub) because
671 * they are going to be periodic rather than version-driven.
673 void MgrMonitor::send_digests()
677 const std::string type
= "mgrdigest";
678 if (mon
.session_map
.subs
.count(type
) == 0) {
679 prev_health_checks
.clear();
684 // if paxos is currently not active, don't send a digest but reenable timer
687 dout(10) << __func__
<< dendl
;
689 for (auto sub
: *(mon
.session_map
.subs
[type
])) {
690 dout(10) << __func__
<< " sending digest to subscriber " << sub
->session
->con
691 << " " << sub
->session
->con
->get_peer_addr() << dendl
;
692 auto mdigest
= make_message
<MMgrDigest
>();
695 mon
.healthmon()->get_health_status(true, &f
, nullptr, nullptr, nullptr);
696 f
.flush(mdigest
->health_json
);
699 mon
.get_mon_status(&f
);
700 f
.flush(mdigest
->mon_status_json
);
703 sub
->session
->con
->send_message2(mdigest
);
707 digest_event
= mon
.timer
.add_event_after(
708 g_conf().get_val
<int64_t>("mon_mgr_digest_period"),
709 new C_MonContext
{&mon
, [this](int) {
714 void MgrMonitor::cancel_timer()
717 mon
.timer
.cancel_event(digest_event
);
718 digest_event
= nullptr;
722 void MgrMonitor::on_active()
724 if (!mon
.is_leader()) {
727 mon
.clog
->debug() << "mgrmap e" << map
.epoch
<< ": " << map
;
728 assert(HAVE_FEATURE(mon
.get_quorum_con_features(), SERVER_NAUTILUS
));
729 if (pending_map
.always_on_modules
== always_on_modules
) {
732 dout(4) << "always on modules changed, pending "
733 << pending_map
.always_on_modules
<< " != wanted "
734 << always_on_modules
<< dendl
;
735 pending_map
.always_on_modules
= always_on_modules
;
739 void MgrMonitor::tick()
741 if (!is_active() || !mon
.is_leader())
744 const auto now
= ceph::coarse_mono_clock::now();
746 const auto mgr_beacon_grace
=
747 g_conf().get_val
<std::chrono::seconds
>("mon_mgr_beacon_grace");
749 // Note that this is the mgr daemon's tick period, not ours (the
750 // beacon is sent with this period).
751 const auto mgr_tick_period
=
752 g_conf().get_val
<std::chrono::seconds
>("mgr_tick_period");
754 if (last_tick
!= ceph::coarse_mono_clock::time_point::min()
755 && (now
- last_tick
> (mgr_beacon_grace
- mgr_tick_period
))) {
756 // This case handles either local slowness (calls being delayed
757 // for whatever reason) or cluster election slowness (a long gap
758 // between calls while an election happened)
759 dout(4) << __func__
<< ": resetting beacon timeouts due to mon delay "
760 "(slow election?) of " << now
- last_tick
<< " seconds" << dendl
;
761 for (auto &i
: last_beacon
) {
768 // Populate any missing beacons (i.e. no beacon since MgrMonitor
769 // instantiation) with the current time, so that they will
770 // eventually look laggy if they fail to give us a beacon.
771 if (pending_map
.active_gid
!= 0
772 && last_beacon
.count(pending_map
.active_gid
) == 0) {
773 last_beacon
[pending_map
.active_gid
] = now
;
775 for (auto s
: pending_map
.standbys
) {
776 if (last_beacon
.count(s
.first
) == 0) {
777 last_beacon
[s
.first
] = now
;
781 // Cull standbys first so that any remaining standbys
782 // will be eligible to take over from the active if we cull him.
783 std::list
<uint64_t> dead_standbys
;
784 const auto cutoff
= now
- mgr_beacon_grace
;
785 for (const auto &i
: pending_map
.standbys
) {
786 auto last_beacon_time
= last_beacon
.at(i
.first
);
787 if (last_beacon_time
< cutoff
) {
788 dead_standbys
.push_back(i
.first
);
792 bool propose
= false;
794 for (auto i
: dead_standbys
) {
795 dout(4) << "Dropping laggy standby " << i
<< dendl
;
800 if (pending_map
.active_gid
!= 0
801 && last_beacon
.at(pending_map
.active_gid
) < cutoff
802 && mon
.osdmon()->is_writeable()) {
803 const std::string old_active_name
= pending_map
.active_name
;
806 dout(4) << "Dropping active" << pending_map
.active_gid
<< dendl
;
807 if (promote_standby()) {
808 dout(4) << "Promoted standby " << pending_map
.active_gid
<< dendl
;
809 mon
.clog
->info() << "Manager daemon " << old_active_name
810 << " is unresponsive, replacing it with standby"
811 << " daemon " << pending_map
.active_name
;
813 dout(4) << "Active is laggy but have no standbys to replace it" << dendl
;
814 mon
.clog
->info() << "Manager daemon " << old_active_name
815 << " is unresponsive. No standby daemons available.";
817 } else if (pending_map
.active_gid
== 0) {
818 if (promote_standby()) {
819 dout(4) << "Promoted standby " << pending_map
.active_gid
<< dendl
;
820 mon
.clog
->info() << "Activating manager daemon "
821 << pending_map
.active_name
;
826 if (!pending_map
.available
&&
827 !ever_had_active_mgr
&&
828 should_warn_about_mgr_down() != HEALTH_OK
) {
829 dout(10) << " exceeded mon_mgr_mkfs_grace "
830 << g_conf().get_val
<int64_t>("mon_mgr_mkfs_grace")
831 << " seconds" << dendl
;
836 if (mon
.monmap
->min_mon_release
>= ceph_release_t::octopus
&&
837 pending_map
.module_enabled("orchestrator_cli")) {
838 dout(10) << " disabling obsolete/renamed 'orchestrator_cli'" << dendl
;
839 // we don't need to enable 'orchestrator' because it's now always-on
840 pending_map
.modules
.erase("orchestrator_cli");
849 void MgrMonitor::on_restart()
851 // Clear out the leader-specific state.
853 last_tick
= ceph::coarse_mono_clock::now();
857 bool MgrMonitor::promote_standby()
859 ceph_assert(pending_map
.active_gid
== 0);
860 if (pending_map
.standbys
.size()) {
861 // Promote a replacement (arbitrary choice of standby)
862 auto replacement_gid
= pending_map
.standbys
.begin()->first
;
863 pending_map
.active_gid
= replacement_gid
;
864 pending_map
.active_name
= pending_map
.standbys
.at(replacement_gid
).name
;
865 pending_map
.available_modules
=
866 pending_map
.standbys
.at(replacement_gid
).available_modules
;
867 pending_map
.active_mgr_features
=
868 pending_map
.standbys
.at(replacement_gid
).mgr_features
;
869 pending_map
.available
= false;
870 pending_map
.active_addrs
= entity_addrvec_t();
871 pending_map
.active_change
= ceph_clock_now();
873 drop_standby(replacement_gid
, false);
881 void MgrMonitor::drop_active()
883 ceph_assert(mon
.osdmon()->is_writeable());
885 if (last_beacon
.count(pending_map
.active_gid
) > 0) {
886 last_beacon
.erase(pending_map
.active_gid
);
889 ceph_assert(pending_map
.active_gid
> 0);
890 auto until
= ceph_clock_now();
891 until
+= g_conf().get_val
<double>("mon_mgr_blocklist_interval");
892 dout(5) << "blocklisting previous mgr." << pending_map
.active_name
<< "."
893 << pending_map
.active_gid
<< " ("
894 << pending_map
.active_addrs
<< ")" << dendl
;
895 auto blocklist_epoch
= mon
.osdmon()->blocklist(pending_map
.active_addrs
, until
);
897 /* blocklist RADOS clients in use by the mgr */
898 for (const auto& a
: pending_map
.clients
) {
899 mon
.osdmon()->blocklist(a
, until
);
901 request_proposal(mon
.osdmon());
903 pending_metadata_rm
.insert(pending_map
.active_name
);
904 pending_metadata
.erase(pending_map
.active_name
);
905 pending_map
.active_name
= "";
906 pending_map
.active_gid
= 0;
907 pending_map
.active_change
= ceph_clock_now();
908 pending_map
.active_mgr_features
= 0;
909 pending_map
.available
= false;
910 pending_map
.active_addrs
= entity_addrvec_t();
911 pending_map
.services
.clear();
912 pending_map
.clients
.clear();
913 pending_map
.last_failure_osd_epoch
= blocklist_epoch
;
915 // So that when new active mgr subscribes to mgrdigest, it will
916 // get an immediate response instead of waiting for next timer
920 void MgrMonitor::drop_standby(uint64_t gid
, bool drop_meta
)
923 pending_metadata_rm
.insert(pending_map
.standbys
[gid
].name
);
924 pending_metadata
.erase(pending_map
.standbys
[gid
].name
);
926 pending_map
.standbys
.erase(gid
);
927 if (last_beacon
.count(gid
) > 0) {
928 last_beacon
.erase(gid
);
932 bool MgrMonitor::preprocess_command(MonOpRequestRef op
)
934 auto m
= op
->get_req
<MMonCommand
>();
935 std::stringstream ss
;
939 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
940 string rs
= ss
.str();
941 mon
.reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
945 MonSession
*session
= op
->get_session();
947 mon
.reply_command(op
, -EACCES
, "access denied", rdata
,
948 get_last_committed());
952 string format
= cmd_getval_or
<string
>(cmdmap
, "format", "plain");
953 boost::scoped_ptr
<Formatter
> f(Formatter::create(format
));
956 cmd_getval(cmdmap
, "prefix", prefix
);
959 if (prefix
== "mgr stat") {
961 f
.reset(Formatter::create(format
, "json-pretty", "json-pretty"));
963 f
->open_object_section("stat");
964 f
->dump_unsigned("epoch", map
.get_epoch());
965 f
->dump_bool("available", map
.get_available());
966 f
->dump_string("active_name", map
.get_active_name());
967 f
->dump_unsigned("num_standby", map
.get_num_standby());
970 } else if (prefix
== "mgr dump") {
972 f
.reset(Formatter::create(format
, "json-pretty", "json-pretty"));
974 int64_t epoch
= cmd_getval_or
<int64_t>(cmdmap
, "epoch", map
.get_epoch());
975 if (epoch
== (int64_t)map
.get_epoch()) {
976 f
->dump_object("mgrmap", map
);
979 int err
= get_version(epoch
, bl
);
980 if (err
== -ENOENT
) {
982 ss
<< "there is no map for epoch " << epoch
;
986 auto p
= bl
.cbegin();
988 f
->dump_object("mgrmap", m
);
991 } else if (prefix
== "mgr module ls") {
993 f
->open_object_section("modules");
995 f
->open_array_section("always_on_modules");
996 for (auto& p
: map
.get_always_on_modules()) {
997 f
->dump_string("module", p
);
1000 f
->open_array_section("enabled_modules");
1001 for (auto& p
: map
.modules
) {
1002 if (map
.get_always_on_modules().count(p
) > 0)
1004 // We only show the name for enabled modules. The any errors
1005 // etc will show up as a health checks.
1006 f
->dump_string("module", p
);
1009 f
->open_array_section("disabled_modules");
1010 for (auto& p
: map
.available_modules
) {
1011 if (map
.modules
.count(p
.name
) == 0 &&
1012 map
.get_always_on_modules().count(p
.name
) == 0) {
1013 // For disabled modules, we show the full info if the detail
1014 // parameter is enabled, to give a hint about whether enabling it will work
1024 tbl
.define_column("MODULE", TextTable::LEFT
, TextTable::LEFT
);
1025 tbl
.define_column(" ", TextTable::LEFT
, TextTable::LEFT
);
1027 for (auto& p
: map
.get_always_on_modules()) {
1029 tbl
<< "on (always on)";
1030 tbl
<< TextTable::endrow
;
1032 for (auto& p
: map
.modules
) {
1033 if (map
.get_always_on_modules().count(p
) > 0)
1037 tbl
<< TextTable::endrow
;
1039 for (auto& p
: map
.available_modules
) {
1040 if (map
.modules
.count(p
.name
) == 0 &&
1041 map
.get_always_on_modules().count(p
.name
) == 0) {
1044 tbl
<< TextTable::endrow
;
1047 rdata
.append(stringify(tbl
));
1049 } else if (prefix
== "mgr services") {
1051 f
.reset(Formatter::create(format
, "json-pretty", "json-pretty"));
1053 f
->open_object_section("services");
1054 for (const auto &i
: map
.services
) {
1055 f
->dump_string(i
.first
.c_str(), i
.second
);
1059 } else if (prefix
== "mgr metadata") {
1061 f
.reset(Formatter::create(format
, "json-pretty", "json-pretty"));
1064 cmd_getval(cmdmap
, "who", name
);
1065 if (name
.size() > 0 && !map
.have_name(name
)) {
1066 ss
<< "mgr." << name
<< " does not exist";
1071 f
->open_object_section("mgr_metadata");
1072 f
->dump_string("name", name
);
1073 r
= dump_metadata(name
, f
.get(), &ss
);
1079 f
->open_array_section("mgr_metadata");
1080 for (auto& i
: map
.get_all_names()) {
1081 f
->open_object_section("mgr");
1082 f
->dump_string("name", i
);
1083 r
= dump_metadata(i
, f
.get(), NULL
);
1084 if (r
== -EINVAL
|| r
== -ENOENT
) {
1085 // Drop error, continue to get other daemons' metadata
1086 dout(4) << "No metadata for mgr." << i
<< dendl
;
1097 } else if (prefix
== "mgr versions") {
1099 f
.reset(Formatter::create(format
, "json-pretty", "json-pretty"));
1101 count_metadata("ceph_version", f
.get());
1104 } else if (prefix
== "mgr count-metadata") {
1106 f
.reset(Formatter::create(format
, "json-pretty", "json-pretty"));
1109 cmd_getval(cmdmap
, "property", field
);
1110 count_metadata(field
, f
.get());
1120 mon
.reply_command(op
, r
, rs
, rdata
, get_last_committed());
1124 bool MgrMonitor::prepare_command(MonOpRequestRef op
)
1126 auto m
= op
->get_req
<MMonCommand
>();
1128 std::stringstream ss
;
1132 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
1133 string rs
= ss
.str();
1134 mon
.reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
1138 MonSession
*session
= op
->get_session();
1140 mon
.reply_command(op
, -EACCES
, "access denied", rdata
, get_last_committed());
1144 string format
= cmd_getval_or
<string
>(cmdmap
, "format", "plain");
1145 boost::scoped_ptr
<Formatter
> f(Formatter::create(format
));
1148 cmd_getval(cmdmap
, "prefix", prefix
);
1152 if (prefix
== "mgr fail") {
1154 if (!cmd_getval(cmdmap
, "who", who
)) {
1155 if (!map
.active_gid
) {
1156 ss
<< "Currently no active mgr";
1159 who
= map
.active_name
;
1163 uint64_t gid
= strict_strtol(who
.c_str(), 10, &err
);
1164 bool changed
= false;
1166 // Does not parse as a gid, treat it as a name
1167 if (pending_map
.active_name
== who
) {
1168 if (!mon
.osdmon()->is_writeable()) {
1169 mon
.osdmon()->wait_for_writeable(op
, new C_RetryMessage(this, op
));
1176 for (const auto &i
: pending_map
.standbys
) {
1177 if (i
.second
.name
== who
) {
1186 ss
<< "Daemon not found '" << who
<< "', already failed?";
1190 if (pending_map
.active_gid
== gid
) {
1191 if (!mon
.osdmon()->is_writeable()) {
1192 mon
.osdmon()->wait_for_writeable(op
, new C_RetryMessage(this, op
));
1197 } else if (pending_map
.standbys
.count(gid
) > 0) {
1201 ss
<< "Daemon not found '" << gid
<< "', already failed?";
1205 if (changed
&& pending_map
.active_gid
== 0) {
1208 } else if (prefix
== "mgr module enable") {
1210 cmd_getval(cmdmap
, "module", module
);
1211 if (module
.empty()) {
1215 if (pending_map
.get_always_on_modules().count(module
) > 0) {
1216 ss
<< "module '" << module
<< "' is already enabled (always-on)";
1220 cmd_getval_compat_cephbool(cmdmap
, "force", force
);
1221 if (!pending_map
.all_support_module(module
) &&
1223 ss
<< "all mgr daemons do not support module '" << module
<< "', pass "
1224 << "--force to force enablement";
1229 std::string can_run_error
;
1230 if (!force
&& !pending_map
.can_run_module(module
, &can_run_error
)) {
1231 ss
<< "module '" << module
<< "' reports that it cannot run on the active "
1232 "manager daemon: " << can_run_error
<< " (pass --force to force "
1238 if (pending_map
.module_enabled(module
)) {
1239 ss
<< "module '" << module
<< "' is already enabled";
1243 pending_map
.modules
.insert(module
);
1244 } else if (prefix
== "mgr module disable") {
1246 cmd_getval(cmdmap
, "module", module
);
1247 if (module
.empty()) {
1251 if (pending_map
.get_always_on_modules().count(module
) > 0) {
1252 ss
<< "module '" << module
<< "' cannot be disabled (always-on)";
1256 if (!pending_map
.module_enabled(module
)) {
1257 ss
<< "module '" << module
<< "' is already disabled";
1261 if (!pending_map
.modules
.count(module
)) {
1262 ss
<< "module '" << module
<< "' is not enabled";
1264 pending_map
.modules
.erase(module
);
1266 ss
<< "Command '" << prefix
<< "' not implemented!";
1271 dout(4) << __func__
<< " done, r=" << r
<< dendl
;
1272 /* Compose response */
1277 // success.. delay reply
1278 wait_for_finished_proposal(op
, new Monitor::C_Command(mon
, op
, r
, rs
,
1279 get_last_committed() + 1));
1282 // reply immediately
1283 mon
.reply_command(op
, r
, rs
, rdata
, get_last_committed());
1288 void MgrMonitor::init()
1290 if (digest_event
== nullptr) {
1291 send_digests(); // To get it to schedule its own event
1295 void MgrMonitor::on_shutdown()
1300 int MgrMonitor::load_metadata(const string
& name
, std::map
<string
, string
>& m
,
1304 int r
= mon
.store
->get(MGR_METADATA_PREFIX
, name
, bl
);
1308 auto p
= bl
.cbegin();
1311 catch (ceph::buffer::error
& e
) {
1313 *err
<< "mgr." << name
<< " metadata is corrupt";
1319 void MgrMonitor::count_metadata(const string
& field
, std::map
<string
,int> *out
)
1321 std::set
<string
> ls
= map
.get_all_names();
1322 for (auto& name
: ls
) {
1323 std::map
<string
,string
> meta
;
1324 load_metadata(name
, meta
, nullptr);
1325 auto p
= meta
.find(field
);
1326 if (p
== meta
.end()) {
1327 (*out
)["unknown"]++;
1329 (*out
)[p
->second
]++;
1334 void MgrMonitor::count_metadata(const string
& field
, Formatter
*f
)
1336 std::map
<string
,int> by_val
;
1337 count_metadata(field
, &by_val
);
1338 f
->open_object_section(field
.c_str());
1339 for (auto& p
: by_val
) {
1340 f
->dump_int(p
.first
.c_str(), p
.second
);
1345 void MgrMonitor::get_versions(std::map
<string
, list
<string
> > &versions
)
1347 std::set
<string
> ls
= map
.get_all_names();
1348 for (auto& name
: ls
) {
1349 std::map
<string
,string
> meta
;
1350 load_metadata(name
, meta
, nullptr);
1351 auto p
= meta
.find("ceph_version_short");
1352 if (p
== meta
.end()) continue;
1353 versions
[p
->second
].push_back(string("mgr.") + name
);
1357 int MgrMonitor::dump_metadata(const string
& name
, Formatter
*f
, ostream
*err
)
1359 std::map
<string
,string
> m
;
1360 if (int r
= load_metadata(name
, m
, err
))
1363 f
->dump_string(p
.first
.c_str(), p
.second
);
1368 void MgrMonitor::print_nodes(Formatter
*f
) const
1372 std::map
<string
, list
<string
> > mgrs
; // hostname => mgr
1373 auto ls
= map
.get_all_names();
1374 for (auto& name
: ls
) {
1375 std::map
<string
,string
> meta
;
1376 if (load_metadata(name
, meta
, nullptr)) {
1379 auto hostname
= meta
.find("hostname");
1380 if (hostname
== meta
.end()) {
1381 // not likely though
1384 mgrs
[hostname
->second
].push_back(name
);
1387 dump_services(f
, mgrs
, "mgr");
1390 const std::vector
<MonCommand
> &MgrMonitor::get_command_descs() const
1392 if (command_descs
.empty()) {
1393 // must have just upgraded; fallback to static commands
1394 return mgr_commands
;
1396 return command_descs
;