]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mon/MgrMonitor.cc
863672afe7f5d7ccb29ec14a5a53bd6aa2bf32a5
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
14 #include "messages/MMgrBeacon.h"
15 #include "messages/MMgrMap.h"
16 #include "messages/MMgrDigest.h"
18 #include "PGStatService.h"
19 #include "include/stringify.h"
20 #include "mgr/MgrContext.h"
21 #include "OSDMonitor.h"
23 #include "MgrMonitor.h"
25 #define dout_subsys ceph_subsys_mon
27 #define dout_prefix _prefix(_dout, mon, map)
28 static ostream
& _prefix(std::ostream
*_dout
, Monitor
*mon
,
29 const MgrMap
& mgrmap
) {
30 return *_dout
<< "mon." << mon
->name
<< "@" << mon
->rank
31 << "(" << mon
->get_state_name()
32 << ").mgr e" << mgrmap
.get_epoch() << " ";
36 void MgrMonitor::create_initial()
40 void MgrMonitor::update_from_paxos(bool *need_bootstrap
)
42 version_t version
= get_last_committed();
43 if (version
!= map
.epoch
) {
44 dout(4) << "loading version " << version
<< dendl
;
47 int err
= get_version(version
, bl
);
50 bufferlist::iterator p
= bl
.begin();
53 dout(4) << "active server: " << map
.active_addr
54 << "(" << map
.active_gid
<< ")" << dendl
;
57 first_seen_inactive
= utime_t();
59 first_seen_inactive
= ceph_clock_now();
65 // feed our pet MgrClient
66 mon
->mgr_client
.ms_dispatch(new MMgrMap(map
));
69 void MgrMonitor::create_pending()
75 void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t
)
77 dout(10) << __func__
<< " " << pending_map
<< dendl
;
79 pending_map
.encode(bl
, mon
->get_quorum_con_features());
80 put_version(t
, pending_map
.epoch
, bl
);
81 put_last_committed(t
, pending_map
.epoch
);
84 bool MgrMonitor::check_caps(MonOpRequestRef op
, const uuid_d
& fsid
)
87 MonSession
*session
= op
->get_session();
90 if (!session
->is_capable("mgr", MON_CAP_X
)) {
91 dout(1) << __func__
<< " insufficient caps " << session
->caps
<< dendl
;
94 if (fsid
!= mon
->monmap
->fsid
) {
95 dout(1) << __func__
<< " op fsid " << fsid
96 << " != " << mon
->monmap
->fsid
<< dendl
;
102 bool MgrMonitor::preprocess_query(MonOpRequestRef op
)
104 PaxosServiceMessage
*m
= static_cast<PaxosServiceMessage
*>(op
->get_req());
105 switch (m
->get_type()) {
107 return preprocess_beacon(op
);
108 case MSG_MON_COMMAND
:
109 return preprocess_command(op
);
112 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
117 bool MgrMonitor::prepare_update(MonOpRequestRef op
)
119 PaxosServiceMessage
*m
= static_cast<PaxosServiceMessage
*>(op
->get_req());
120 switch (m
->get_type()) {
122 return prepare_beacon(op
);
124 case MSG_MON_COMMAND
:
125 return prepare_command(op
);
129 derr
<< "Unhandled message type " << m
->get_type() << dendl
;
136 class C_Updated
: public Context
{
140 C_Updated(MgrMonitor
*a
, MonOpRequestRef c
) :
142 void finish(int r
) override
{
145 } else if (r
== -ECANCELED
) {
146 mm
->mon
->no_reply(op
);
148 mm
->dispatch(op
); // try again
153 bool MgrMonitor::preprocess_beacon(MonOpRequestRef op
)
155 MMgrBeacon
*m
= static_cast<MMgrBeacon
*>(op
->get_req());
156 dout(4) << "beacon from " << m
->get_gid() << dendl
;
158 if (!check_caps(op
, m
->get_fsid())) {
159 // drop it on the floor
163 // always send this to the leader's prepare_beacon()
167 bool MgrMonitor::prepare_beacon(MonOpRequestRef op
)
169 MMgrBeacon
*m
= static_cast<MMgrBeacon
*>(op
->get_req());
170 dout(4) << "beacon from " << m
->get_gid() << dendl
;
172 // See if we are seeing same name, new GID for the active daemon
173 if (m
->get_name() == pending_map
.active_name
174 && m
->get_gid() != pending_map
.active_gid
)
176 dout(4) << "Active daemon restart (mgr." << m
->get_name() << ")" << dendl
;
180 // See if we are seeing same name, new GID for any standbys
181 for (const auto &i
: pending_map
.standbys
) {
182 const StandbyInfo
&s
= i
.second
;
183 if (s
.name
== m
->get_name() && s
.gid
!= m
->get_gid()) {
184 dout(4) << "Standby daemon restart (mgr." << m
->get_name() << ")" << dendl
;
185 drop_standby(i
.first
);
190 last_beacon
[m
->get_gid()] = ceph::coarse_mono_clock::now();
192 // Track whether we modified pending_map
193 bool updated
= false;
195 if (pending_map
.active_gid
== m
->get_gid()) {
196 // A beacon from the currently active daemon
197 if (pending_map
.active_addr
!= m
->get_server_addr()) {
198 dout(4) << "learned address " << m
->get_server_addr()
199 << " (was " << pending_map
.active_addr
<< ")" << dendl
;
200 pending_map
.active_addr
= m
->get_server_addr();
204 if (pending_map
.get_available() != m
->get_available()) {
205 dout(4) << "available " << m
->get_gid() << dendl
;
206 pending_map
.available
= m
->get_available();
209 } else if (pending_map
.active_gid
== 0) {
210 // There is no currently active daemon, select this one.
211 if (pending_map
.standbys
.count(m
->get_gid())) {
212 drop_standby(m
->get_gid());
214 dout(4) << "selecting new active " << m
->get_gid()
215 << " " << m
->get_name()
216 << " (was " << pending_map
.active_gid
<< " "
217 << pending_map
.active_name
<< ")" << dendl
;
218 pending_map
.active_gid
= m
->get_gid();
219 pending_map
.active_name
= m
->get_name();
223 if (pending_map
.standbys
.count(m
->get_gid()) > 0) {
224 dout(10) << "from existing standby " << m
->get_gid() << dendl
;
226 dout(10) << "new standby " << m
->get_gid() << dendl
;
227 pending_map
.standbys
[m
->get_gid()] = {m
->get_gid(), m
->get_name()};
233 dout(4) << "updating map" << dendl
;
234 wait_for_finished_proposal(op
, new C_Updated(this, op
));
236 dout(10) << "no change" << dendl
;
242 void MgrMonitor::check_subs()
244 const std::string type
= "mgrmap";
245 if (mon
->session_map
.subs
.count(type
) == 0)
247 for (auto sub
: *(mon
->session_map
.subs
[type
])) {
252 void MgrMonitor::check_sub(Subscription
*sub
)
254 if (sub
->type
== "mgrmap") {
255 if (sub
->next
<= map
.get_epoch()) {
256 dout(20) << "Sending map to subscriber " << sub
->session
->con
<< dendl
;
257 sub
->session
->con
->send_message(new MMgrMap(map
));
259 mon
->session_map
.remove_sub(sub
);
261 sub
->next
= map
.get_epoch() + 1;
265 assert(sub
->type
== "mgrdigest");
266 if (digest_event
== nullptr) {
273 * Handle digest subscriptions separately (outside of check_sub) because
274 * they are going to be periodic rather than version-driven.
276 void MgrMonitor::send_digests()
284 const std::string type
= "mgrdigest";
285 if (mon
->session_map
.subs
.count(type
) == 0)
288 for (auto sub
: *(mon
->session_map
.subs
[type
])) {
289 MMgrDigest
*mdigest
= new MMgrDigest
;
292 std::list
<std::string
> health_strs
;
293 mon
->get_health(health_strs
, nullptr, &f
);
294 f
.flush(mdigest
->health_json
);
297 std::ostringstream ss
;
298 mon
->get_mon_status(&f
, ss
);
299 f
.flush(mdigest
->mon_status_json
);
302 sub
->session
->con
->send_message(mdigest
);
305 digest_event
= new C_MonContext(mon
, [this](int){
308 mon
->timer
.add_event_after(g_conf
->mon_mgr_digest_period
, digest_event
);
311 void MgrMonitor::cancel_timer()
314 mon
->timer
.cancel_event(digest_event
);
315 digest_event
= nullptr;
319 void MgrMonitor::on_active()
321 if (mon
->is_leader())
322 mon
->clog
->info() << "mgrmap e" << map
.epoch
<< ": " << map
;
325 void MgrMonitor::get_health(
326 list
<pair
<health_status_t
,string
> >& summary
,
327 list
<pair
<health_status_t
,string
> > *detail
,
328 CephContext
*cct
) const
330 // start mgr warnings as soon as the mons and osds are all upgraded,
331 // but before the require_luminous osdmap flag is set. this way the
332 // user gets some warning before the osd flag is set and mgr is
333 // actually *required*.
334 if (!mon
->monmap
->get_required_features().contains_all(
335 ceph::features::mon::FEATURE_LUMINOUS
) ||
336 !HAVE_FEATURE(mon
->osdmon()->osdmap
.get_up_osd_features(),
341 if (!map
.available
) {
342 auto level
= HEALTH_WARN
;
343 // do not escalate to ERR if they are still upgrading to jewel.
344 if (mon
->osdmon()->osdmap
.require_osd_release
>= CEPH_RELEASE_LUMINOUS
) {
345 utime_t now
= ceph_clock_now();
346 if (first_seen_inactive
!= utime_t() &&
347 now
- first_seen_inactive
> g_conf
->mon_mgr_inactive_grace
) {
351 summary
.push_back(make_pair(level
, "no active mgr"));
355 void MgrMonitor::tick()
357 if (!is_active() || !mon
->is_leader())
360 const auto now
= ceph::coarse_mono_clock::now();
361 const auto cutoff
= now
- std::chrono::seconds(g_conf
->mon_mgr_beacon_grace
);
363 // Populate any missing beacons (i.e. no beacon since MgrMonitor
364 // instantiation) with the current time, so that they will
365 // eventually look laggy if they fail to give us a beacon.
366 if (pending_map
.active_gid
!= 0
367 && last_beacon
.count(pending_map
.active_gid
) == 0) {
368 last_beacon
[pending_map
.active_gid
] = now
;
370 for (auto s
: pending_map
.standbys
) {
371 if (last_beacon
.count(s
.first
) == 0) {
372 last_beacon
[s
.first
] = now
;
376 // Cull standbys first so that any remaining standbys
377 // will be eligible to take over from the active if we cull him.
378 std::list
<uint64_t> dead_standbys
;
379 for (const auto &i
: pending_map
.standbys
) {
380 auto last_beacon_time
= last_beacon
.at(i
.first
);
381 if (last_beacon_time
< cutoff
) {
382 dead_standbys
.push_back(i
.first
);
386 bool propose
= false;
388 for (auto i
: dead_standbys
) {
389 dout(4) << "Dropping laggy standby " << i
<< dendl
;
394 if (pending_map
.active_gid
!= 0
395 && last_beacon
.at(pending_map
.active_gid
) < cutoff
) {
399 dout(4) << "Dropping active" << pending_map
.active_gid
<< dendl
;
400 if (promote_standby()) {
401 dout(4) << "Promoted standby " << pending_map
.active_gid
<< dendl
;
403 dout(4) << "Active is laggy but have no standbys to replace it" << dendl
;
405 } else if (pending_map
.active_gid
== 0) {
406 if (promote_standby()) {
407 dout(4) << "Promoted standby " << pending_map
.active_gid
<< dendl
;
417 bool MgrMonitor::promote_standby()
419 assert(pending_map
.active_gid
== 0);
420 if (pending_map
.standbys
.size()) {
421 // Promote a replacement (arbitrary choice of standby)
422 auto replacement_gid
= pending_map
.standbys
.begin()->first
;
423 pending_map
.active_gid
= replacement_gid
;
424 pending_map
.active_name
= pending_map
.standbys
.at(replacement_gid
).name
;
425 pending_map
.available
= false;
426 pending_map
.active_addr
= entity_addr_t();
428 drop_standby(replacement_gid
);
435 void MgrMonitor::drop_active()
437 if (last_beacon
.count(pending_map
.active_gid
) > 0) {
438 last_beacon
.erase(pending_map
.active_gid
);
441 pending_map
.active_name
= "";
442 pending_map
.active_gid
= 0;
443 pending_map
.available
= false;
444 pending_map
.active_addr
= entity_addr_t();
447 void MgrMonitor::drop_standby(uint64_t gid
)
449 pending_map
.standbys
.erase(gid
);
450 if (last_beacon
.count(gid
) > 0) {
451 last_beacon
.erase(gid
);
456 bool MgrMonitor::preprocess_command(MonOpRequestRef op
)
458 MMonCommand
*m
= static_cast<MMonCommand
*>(op
->get_req());
459 std::stringstream ss
;
462 std::map
<std::string
, cmd_vartype
> cmdmap
;
463 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
464 string rs
= ss
.str();
465 mon
->reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
469 MonSession
*session
= m
->get_session();
471 mon
->reply_command(op
, -EACCES
, "access denied", rdata
,
472 get_last_committed());
477 cmd_getval(g_ceph_context
, cmdmap
, "format", format
, string("json-pretty"));
478 boost::scoped_ptr
<Formatter
> f(Formatter::create(format
));
481 cmd_getval(g_ceph_context
, cmdmap
, "prefix", prefix
);
484 if (prefix
== "mgr dump") {
486 cmd_getval(g_ceph_context
, cmdmap
, "epoch", epoch
, (int64_t)map
.get_epoch());
487 if (epoch
== (int64_t)map
.get_epoch()) {
488 f
->dump_object("mgrmap", map
);
491 int err
= get_version(epoch
, bl
);
492 if (err
== -ENOENT
) {
494 ss
<< "there is no map for epoch " << epoch
;
500 f
->dump_object("mgrmap", m
);
510 mon
->reply_command(op
, r
, rs
, rdata
, get_last_committed());
514 bool MgrMonitor::prepare_command(MonOpRequestRef op
)
516 MMonCommand
*m
= static_cast<MMonCommand
*>(op
->get_req());
518 std::stringstream ss
;
521 std::map
<std::string
, cmd_vartype
> cmdmap
;
522 if (!cmdmap_from_json(m
->cmd
, &cmdmap
, ss
)) {
523 string rs
= ss
.str();
524 mon
->reply_command(op
, -EINVAL
, rs
, rdata
, get_last_committed());
528 MonSession
*session
= m
->get_session();
530 mon
->reply_command(op
, -EACCES
, "access denied", rdata
, get_last_committed());
535 cmd_getval(g_ceph_context
, cmdmap
, "prefix", prefix
);
539 if (prefix
== "mgr fail") {
541 cmd_getval(g_ceph_context
, cmdmap
, "who", who
);
544 uint64_t gid
= strict_strtol(who
.c_str(), 10, &err
);
545 bool changed
= false;
547 // Does not parse as a gid, treat it as a name
548 if (pending_map
.active_name
== who
) {
553 for (const auto &i
: pending_map
.standbys
) {
554 if (i
.second
.name
== who
) {
563 ss
<< "Daemon not found '" << who
<< "', already failed?";
567 if (pending_map
.active_gid
== gid
) {
570 } else if (pending_map
.standbys
.count(gid
) > 0) {
574 ss
<< "Daemon not found '" << gid
<< "', already failed?";
578 if (changed
&& pending_map
.active_gid
== 0) {
585 dout(4) << __func__
<< " done, r=" << r
<< dendl
;
586 /* Compose response */
591 // success.. delay reply
592 wait_for_finished_proposal(op
, new Monitor::C_Command(mon
, op
, r
, rs
,
593 get_last_committed() + 1));
597 mon
->reply_command(op
, r
, rs
, rdata
, get_last_committed());
602 void MgrMonitor::init()
604 if (digest_event
== nullptr) {
605 send_digests(); // To get it to schedule its own event
609 void MgrMonitor::on_shutdown()