]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mon/MgrMonitor.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
14 #include <boost/tokenizer.hpp>
16 #include "messages/MMgrBeacon.h"
17 #include "messages/MMgrMap.h"
18 #include "messages/MMgrDigest.h"
20 #include "PGStatService.h"
21 #include "include/stringify.h"
22 #include "mgr/MgrContext.h"
23 #include "OSDMonitor.h"
25 #include "MgrMonitor.h"
27 #define dout_subsys ceph_subsys_mon
29 #define dout_prefix _prefix(_dout, mon, map)
30 static ostream
& _prefix(std::ostream
*_dout
, Monitor
*mon
,
31 const MgrMap
& mgrmap
) {
32 return *_dout
<< "mon." << mon
->name
<< "@" << mon
->rank
33 << "(" << mon
->get_state_name()
34 << ").mgr e" << mgrmap
.get_epoch() << " ";
38 void MgrMonitor::create_initial()
40 boost::tokenizer
<> tok(g_conf
->mgr_initial_modules
);
42 pending_map
.modules
.insert(m
);
44 dout(10) << __func__
<< " initial modules " << pending_map
.modules
<< dendl
;
47 void MgrMonitor::update_from_paxos(bool *need_bootstrap
)
49 version_t version
= get_last_committed();
50 if (version
!= map
.epoch
) {
51 dout(4) << "loading version " << version
<< dendl
;
54 int err
= get_version(version
, bl
);
57 bufferlist::iterator p
= bl
.begin();
60 dout(4) << "active server: " << map
.active_addr
61 << "(" << map
.active_gid
<< ")" << dendl
;
63 ever_had_active_mgr
= get_value("ever_had_active_mgr");
68 first_seen_inactive
= utime_t();
70 first_seen_inactive
= ceph_clock_now();
76 // feed our pet MgrClient
77 mon
->mgr_client
.ms_dispatch(new MMgrMap(map
));
80 void MgrMonitor::create_pending()
86 health_status_t
MgrMonitor::should_warn_about_mgr_down()
88 utime_t now
= ceph_clock_now();
90 // - we've ever had an active mgr, or
91 // - we have osds AND we've exceeded the grace period
92 // which means a new mon cluster and be HEALTH_OK indefinitely as long as
93 // no OSDs are ever created.
94 if (ever_had_active_mgr
||
95 (mon
->osdmon()->osdmap
.get_num_osds() > 0 &&
96 now
> mon
->monmap
->created
+ g_conf
->mon_mgr_mkfs_grace
)) {
97 health_status_t level
= HEALTH_WARN
;
98 if (first_seen_inactive
!= utime_t() &&
99 now
- first_seen_inactive
> g_conf
->mon_mgr_inactive_grace
) {
107 void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t
)
109 dout(10) << __func__
<< " " << pending_map
<< dendl
;
111 pending_map
.encode(bl
, mon
->get_quorum_con_features());
112 put_version(t
, pending_map
.epoch
, bl
);
113 put_last_committed(t
, pending_map
.epoch
);
115 health_check_map_t next
;
116 if (pending_map
.active_gid
== 0) {
117 auto level
= should_warn_about_mgr_down();
118 if (level
!= HEALTH_OK
) {
119 next
.add("MGR_DOWN", level
, "no active mgr");
121 dout(10) << __func__
<< " no health warning (never active and new cluster)"
125 put_value(t
, "ever_had_active_mgr", 1);
127 encode_health(next
, t
);
130 bool MgrMonitor::check_caps(MonOpRequestRef op
, const uuid_d
& fsid
)
133 MonSession
*session
= op
->get_session();
136 if (!session
->is_capable("mgr", MON_CAP_X
)) {
137 dout(1) << __func__
<< " insufficient caps " << session
->caps
<< dendl
;
140 if (fsid
!= mon
->monmap
->fsid
) {
141 dout(1) << __func__
<< " op fsid " << fsid
142 << " != " << mon
->monmap
->fsid
<< dendl
;
148 bool MgrMonitor::preprocess_query(MonOpRequestRef op
)
150 PaxosServiceMessage
*m
= static_cast<PaxosServiceMessage
*>(op
->get_req());
151 switch (m
->get_type()) {
153 return preprocess_beacon(op
);
154 case MSG_MON_COMMAND
:
155 return preprocess_command(op
);
158 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
163 bool MgrMonitor::prepare_update(MonOpRequestRef op
)
165 PaxosServiceMessage
*m
= static_cast<PaxosServiceMessage
*>(op
->get_req());
166 switch (m
->get_type()) {
168 return prepare_beacon(op
);
170 case MSG_MON_COMMAND
:
171 return prepare_command(op
);
175 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
182 class C_Updated
: public Context
{
186 C_Updated(MgrMonitor
*a
, MonOpRequestRef c
) :
188 void finish(int r
) override
{
191 } else if (r
== -ECANCELED
) {
192 mm
->mon
->no_reply(op
);
194 mm
->dispatch(op
); // try again
199 bool MgrMonitor::preprocess_beacon(MonOpRequestRef op
)
201 MMgrBeacon
*m
= static_cast<MMgrBeacon
*>(op
->get_req());
202 dout(4) << "beacon from " << m
->get_gid() << dendl
;
204 if (!check_caps(op
, m
->get_fsid())) {
205 // drop it on the floor
209 // always send this to the leader's prepare_beacon()
213 bool MgrMonitor::prepare_beacon(MonOpRequestRef op
)
215 MMgrBeacon
*m
= static_cast<MMgrBeacon
*>(op
->get_req());
216 dout(4) << "beacon from " << m
->get_gid() << dendl
;
218 // See if we are seeing same name, new GID for the active daemon
219 if (m
->get_name() == pending_map
.active_name
220 && m
->get_gid() != pending_map
.active_gid
)
222 dout(4) << "Active daemon restart (mgr." << m
->get_name() << ")" << dendl
;
223 mon
->clog
->info() << "Active manager daemon " << m
->get_name()
228 // See if we are seeing same name, new GID for any standbys
229 for (const auto &i
: pending_map
.standbys
) {
230 const StandbyInfo
&s
= i
.second
;
231 if (s
.name
== m
->get_name() && s
.gid
!= m
->get_gid()) {
232 dout(4) << "Standby daemon restart (mgr." << m
->get_name() << ")" << dendl
;
233 mon
->clog
->debug() << "Standby manager daemon " << m
->get_name()
235 drop_standby(i
.first
);
240 last_beacon
[m
->get_gid()] = ceph::coarse_mono_clock::now();
242 // Track whether we modified pending_map
243 bool updated
= false;
245 if (pending_map
.active_gid
== m
->get_gid()) {
246 // A beacon from the currently active daemon
247 if (pending_map
.active_addr
!= m
->get_server_addr()) {
248 dout(4) << "learned address " << m
->get_server_addr()
249 << " (was " << pending_map
.active_addr
<< ")" << dendl
;
250 pending_map
.active_addr
= m
->get_server_addr();
254 if (pending_map
.get_available() != m
->get_available()) {
255 dout(4) << "available " << m
->get_gid() << dendl
;
256 mon
->clog
->info() << "Manager daemon " << pending_map
.active_name
257 << " is now available";
258 pending_map
.available
= m
->get_available();
261 if (pending_map
.available_modules
!= m
->get_available_modules()) {
262 dout(4) << "available_modules " << m
->get_available_modules()
263 << " (was " << pending_map
.available_modules
<< ")" << dendl
;
264 pending_map
.available_modules
= m
->get_available_modules();
267 } else if (pending_map
.active_gid
== 0) {
268 // There is no currently active daemon, select this one.
269 if (pending_map
.standbys
.count(m
->get_gid())) {
270 drop_standby(m
->get_gid());
272 dout(4) << "selecting new active " << m
->get_gid()
273 << " " << m
->get_name()
274 << " (was " << pending_map
.active_gid
<< " "
275 << pending_map
.active_name
<< ")" << dendl
;
276 pending_map
.active_gid
= m
->get_gid();
277 pending_map
.active_name
= m
->get_name();
278 pending_map
.available_modules
= m
->get_available_modules();
280 mon
->clog
->info() << "Activating manager daemon "
281 << pending_map
.active_name
;
285 if (pending_map
.standbys
.count(m
->get_gid()) > 0) {
286 dout(10) << "from existing standby " << m
->get_gid() << dendl
;
287 if (pending_map
.standbys
[m
->get_gid()].available_modules
!=
288 m
->get_available_modules()) {
289 dout(10) << "existing standby " << m
->get_gid() << " available_modules "
290 << m
->get_available_modules() << " (was "
291 << pending_map
.standbys
[m
->get_gid()].available_modules
<< ")"
293 pending_map
.standbys
[m
->get_gid()].available_modules
=
294 m
->get_available_modules();
298 dout(10) << "new standby " << m
->get_gid() << dendl
;
299 mon
->clog
->debug() << "Standby manager daemon " << m
->get_name()
306 dout(4) << "updating map" << dendl
;
307 wait_for_finished_proposal(op
, new C_Updated(this, op
));
309 dout(10) << "no change" << dendl
;
315 void MgrMonitor::check_subs()
317 const std::string type
= "mgrmap";
318 if (mon
->session_map
.subs
.count(type
) == 0)
320 for (auto sub
: *(mon
->session_map
.subs
[type
])) {
325 void MgrMonitor::check_sub(Subscription
*sub
)
327 if (sub
->type
== "mgrmap") {
328 if (sub
->next
<= map
.get_epoch()) {
329 dout(20) << "Sending map to subscriber " << sub
->session
->con
330 << " " << sub
->session
->con
->get_peer_addr() << dendl
;
331 sub
->session
->con
->send_message(new MMgrMap(map
));
333 mon
->session_map
.remove_sub(sub
);
335 sub
->next
= map
.get_epoch() + 1;
339 assert(sub
->type
== "mgrdigest");
340 if (digest_event
== nullptr) {
347 * Handle digest subscriptions separately (outside of check_sub) because
348 * they are going to be periodic rather than version-driven.
350 void MgrMonitor::send_digests()
357 dout(10) << __func__
<< dendl
;
359 const std::string type
= "mgrdigest";
360 if (mon
->session_map
.subs
.count(type
) == 0)
363 for (auto sub
: *(mon
->session_map
.subs
[type
])) {
364 dout(10) << __func__
<< " sending digest to subscriber " << sub
->session
->con
365 << " " << sub
->session
->con
->get_peer_addr() << dendl
;
366 MMgrDigest
*mdigest
= new MMgrDigest
;
369 mon
->get_health_status(true, &f
, nullptr, nullptr, nullptr);
370 f
.flush(mdigest
->health_json
);
373 std::ostringstream ss
;
374 mon
->get_mon_status(&f
, ss
);
375 f
.flush(mdigest
->mon_status_json
);
378 sub
->session
->con
->send_message(mdigest
);
381 digest_event
= new C_MonContext(mon
, [this](int){
384 mon
->timer
.add_event_after(g_conf
->mon_mgr_digest_period
, digest_event
);
387 void MgrMonitor::cancel_timer()
390 mon
->timer
.cancel_event(digest_event
);
391 digest_event
= nullptr;
395 void MgrMonitor::on_active()
397 if (mon
->is_leader()) {
398 mon
->clog
->debug() << "mgrmap e" << map
.epoch
<< ": " << map
;
402 void MgrMonitor::get_health(
403 list
<pair
<health_status_t
,string
> >& summary
,
404 list
<pair
<health_status_t
,string
> > *detail
,
405 CephContext
*cct
) const
407 // start mgr warnings as soon as the mons and osds are all upgraded,
408 // but before the require_luminous osdmap flag is set. this way the
409 // user gets some warning before the osd flag is set and mgr is
410 // actually *required*.
411 if (!mon
->monmap
->get_required_features().contains_all(
412 ceph::features::mon::FEATURE_LUMINOUS
) ||
413 !HAVE_FEATURE(mon
->osdmon()->osdmap
.get_up_osd_features(),
418 if (map
.active_gid
== 0) {
419 auto level
= HEALTH_WARN
;
420 // do not escalate to ERR if they are still upgrading to jewel.
421 if (mon
->osdmon()->osdmap
.require_osd_release
>= CEPH_RELEASE_LUMINOUS
) {
422 utime_t now
= ceph_clock_now();
423 if (first_seen_inactive
!= utime_t() &&
424 now
- first_seen_inactive
> g_conf
->mon_mgr_inactive_grace
) {
428 summary
.push_back(make_pair(level
, "no active mgr"));
432 void MgrMonitor::tick()
434 if (!is_active() || !mon
->is_leader())
437 const auto now
= ceph::coarse_mono_clock::now();
438 const auto cutoff
= now
- std::chrono::seconds(g_conf
->mon_mgr_beacon_grace
);
440 // Populate any missing beacons (i.e. no beacon since MgrMonitor
441 // instantiation) with the current time, so that they will
442 // eventually look laggy if they fail to give us a beacon.
443 if (pending_map
.active_gid
!= 0
444 && last_beacon
.count(pending_map
.active_gid
) == 0) {
445 last_beacon
[pending_map
.active_gid
] = now
;
447 for (auto s
: pending_map
.standbys
) {
448 if (last_beacon
.count(s
.first
) == 0) {
449 last_beacon
[s
.first
] = now
;
453 // Cull standbys first so that any remaining standbys
454 // will be eligible to take over from the active if we cull him.
455 std::list
<uint64_t> dead_standbys
;
456 for (const auto &i
: pending_map
.standbys
) {
457 auto last_beacon_time
= last_beacon
.at(i
.first
);
458 if (last_beacon_time
< cutoff
) {
459 dead_standbys
.push_back(i
.first
);
463 bool propose
= false;
465 for (auto i
: dead_standbys
) {
466 dout(4) << "Dropping laggy standby " << i
<< dendl
;
471 if (pending_map
.active_gid
!= 0
472 && last_beacon
.at(pending_map
.active_gid
) < cutoff
) {
473 const std::string old_active_name
= pending_map
.active_name
;
476 dout(4) << "Dropping active" << pending_map
.active_gid
<< dendl
;
477 if (promote_standby()) {
478 dout(4) << "Promoted standby " << pending_map
.active_gid
<< dendl
;
479 mon
->clog
->info() << "Manager daemon " << old_active_name
480 << " is unresponsive, replacing it with standby"
481 << " daemon " << pending_map
.active_name
;
483 dout(4) << "Active is laggy but have no standbys to replace it" << dendl
;
484 mon
->clog
->warn() << "Manager daemon " << old_active_name
485 << " is unresponsive. No standby daemons available.";
487 } else if (pending_map
.active_gid
== 0) {
488 if (promote_standby()) {
489 dout(4) << "Promoted standby " << pending_map
.active_gid
<< dendl
;
490 mon
->clog
->info() << "Activating manager daemon "
491 << pending_map
.active_name
;
496 if (!pending_map
.available
&&
497 should_warn_about_mgr_down() != HEALTH_OK
) {
498 dout(10) << " exceeded mon_mgr_mkfs_grace " << g_conf
->mon_mgr_mkfs_grace
499 << " seconds" << dendl
;
508 void MgrMonitor::on_restart()
510 // Clear out the leader-specific state.
515 bool MgrMonitor::promote_standby()
517 assert(pending_map
.active_gid
== 0);
518 if (pending_map
.standbys
.size()) {
519 // Promote a replacement (arbitrary choice of standby)
520 auto replacement_gid
= pending_map
.standbys
.begin()->first
;
521 pending_map
.active_gid
= replacement_gid
;
522 pending_map
.active_name
= pending_map
.standbys
.at(replacement_gid
).name
;
523 pending_map
.available
= false;
524 pending_map
.active_addr
= entity_addr_t();
526 drop_standby(replacement_gid
);
533 void MgrMonitor::drop_active()
535 if (last_beacon
.count(pending_map
.active_gid
) > 0) {
536 last_beacon
.erase(pending_map
.active_gid
);
539 pending_map
.active_name
= "";
540 pending_map
.active_gid
= 0;
541 pending_map
.available
= false;
542 pending_map
.active_addr
= entity_addr_t();
544 // So that when new active mgr subscribes to mgrdigest, it will
545 // get an immediate response instead of waiting for next timer
549 void MgrMonitor::drop_standby(uint64_t gid
)
551 pending_map
.standbys
.erase(gid
);
552 if (last_beacon
.count(gid
) > 0) {
553 last_beacon
.erase(gid
);
558 bool MgrMonitor::preprocess_command(MonOpRequestRef op
)
560 MMonCommand
*m
= static_cast<MMonCommand
*>(op
->get_req());
561 std::stringstream ss
;
564 std::map
<std::string
, cmd_vartype
> cmdmap
;
565 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
566 string rs
= ss
.str();
567 mon
->reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
571 MonSession
*session
= m
->get_session();
573 mon
->reply_command(op
, -EACCES
, "access denied", rdata
,
574 get_last_committed());
579 cmd_getval(g_ceph_context
, cmdmap
, "format", format
, string("json-pretty"));
580 boost::scoped_ptr
<Formatter
> f(Formatter::create(format
));
583 cmd_getval(g_ceph_context
, cmdmap
, "prefix", prefix
);
586 if (prefix
== "mgr dump") {
588 cmd_getval(g_ceph_context
, cmdmap
, "epoch", epoch
, (int64_t)map
.get_epoch());
589 if (epoch
== (int64_t)map
.get_epoch()) {
590 f
->dump_object("mgrmap", map
);
593 int err
= get_version(epoch
, bl
);
594 if (err
== -ENOENT
) {
596 ss
<< "there is no map for epoch " << epoch
;
602 f
->dump_object("mgrmap", m
);
605 } else if (prefix
== "mgr module ls") {
606 f
->open_array_section("modules");
607 for (auto& p
: map
.modules
) {
608 f
->dump_string("module", p
);
619 mon
->reply_command(op
, r
, rs
, rdata
, get_last_committed());
623 bool MgrMonitor::prepare_command(MonOpRequestRef op
)
625 MMonCommand
*m
= static_cast<MMonCommand
*>(op
->get_req());
627 std::stringstream ss
;
630 std::map
<std::string
, cmd_vartype
> cmdmap
;
631 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
632 string rs
= ss
.str();
633 mon
->reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
637 MonSession
*session
= m
->get_session();
639 mon
->reply_command(op
, -EACCES
, "access denied", rdata
, get_last_committed());
644 cmd_getval(g_ceph_context
, cmdmap
, "format", format
, string("plain"));
645 boost::scoped_ptr
<Formatter
> f(Formatter::create(format
));
648 cmd_getval(g_ceph_context
, cmdmap
, "prefix", prefix
);
652 if (prefix
== "mgr fail") {
654 cmd_getval(g_ceph_context
, cmdmap
, "who", who
);
657 uint64_t gid
= strict_strtol(who
.c_str(), 10, &err
);
658 bool changed
= false;
660 // Does not parse as a gid, treat it as a name
661 if (pending_map
.active_name
== who
) {
666 for (const auto &i
: pending_map
.standbys
) {
667 if (i
.second
.name
== who
) {
676 ss
<< "Daemon not found '" << who
<< "', already failed?";
680 if (pending_map
.active_gid
== gid
) {
683 } else if (pending_map
.standbys
.count(gid
) > 0) {
687 ss
<< "Daemon not found '" << gid
<< "', already failed?";
691 if (changed
&& pending_map
.active_gid
== 0) {
694 } else if (prefix
== "mgr module enable") {
696 cmd_getval(g_ceph_context
, cmdmap
, "module", module
);
697 if (module
.empty()) {
702 cmd_getval(g_ceph_context
, cmdmap
, "force", force
);
703 if (!pending_map
.all_support_module(module
) &&
704 force
!= "--force") {
705 ss
<< "all mgr daemons do not support module '" << module
<< "', pass "
706 << "--force to force enablement";
710 pending_map
.modules
.insert(module
);
711 } else if (prefix
== "mgr module disable") {
713 cmd_getval(g_ceph_context
, cmdmap
, "module", module
);
714 if (module
.empty()) {
718 pending_map
.modules
.erase(module
);
720 ss
<< "Command '" << prefix
<< "' not implemented!";
725 dout(4) << __func__
<< " done, r=" << r
<< dendl
;
726 /* Compose response */
731 // success.. delay reply
732 wait_for_finished_proposal(op
, new Monitor::C_Command(mon
, op
, r
, rs
,
733 get_last_committed() + 1));
737 mon
->reply_command(op
, r
, rs
, rdata
, get_last_committed());
742 void MgrMonitor::init()
744 if (digest_event
== nullptr) {
745 send_digests(); // To get it to schedule its own event
749 void MgrMonitor::on_shutdown()