]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MgrMonitor.cc
update sources to 12.2.7
[ceph.git] / ceph / src / mon / MgrMonitor.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
224ce89b
WB
14#include <boost/tokenizer.hpp>
15
7c673cae
FG
16#include "messages/MMgrBeacon.h"
17#include "messages/MMgrMap.h"
18#include "messages/MMgrDigest.h"
19
31f18b77 20#include "PGStatService.h"
7c673cae
FG
21#include "include/stringify.h"
22#include "mgr/MgrContext.h"
c07f9fc5 23#include "mgr/mgr_commands.h"
7c673cae
FG
24#include "OSDMonitor.h"
25
26#include "MgrMonitor.h"
27
c07f9fc5
FG
28#define MGR_METADATA_PREFIX "mgr_metadata"
29
7c673cae
FG
30#define dout_subsys ceph_subsys_mon
31#undef dout_prefix
32#define dout_prefix _prefix(_dout, mon, map)
33static ostream& _prefix(std::ostream *_dout, Monitor *mon,
34 const MgrMap& mgrmap) {
35 return *_dout << "mon." << mon->name << "@" << mon->rank
36 << "(" << mon->get_state_name()
37 << ").mgr e" << mgrmap.get_epoch() << " ";
38}
39
c07f9fc5
FG
40// Prefix for mon store of active mgr's command descriptions
41const static std::string command_descs_prefix = "mgr_command_descs";
42
31f18b77 43
b32b8144
FG
44version_t MgrMonitor::get_trim_to()
45{
46 int64_t max = g_conf->get_val<int64_t>("mon_max_mgrmap_epochs");
47 if (map.epoch > max) {
48 return map.epoch - max;
49 }
50 return 0;
51}
52
7c673cae
FG
53void MgrMonitor::create_initial()
54{
3efd9988
FG
55 // Take a local copy of initial_modules for tokenizer to iterate over.
56 auto initial_modules = g_conf->get_val<std::string>("mgr_initial_modules");
57 boost::tokenizer<> tok(initial_modules);
224ce89b
WB
58 for (auto& m : tok) {
59 pending_map.modules.insert(m);
60 }
c07f9fc5
FG
61 pending_command_descs = mgr_commands;
62 dout(10) << __func__ << " initial modules " << pending_map.modules
63 << ", " << pending_command_descs.size() << " commands"
64 << dendl;
7c673cae
FG
65}
66
3efd9988
FG
67void MgrMonitor::get_store_prefixes(std::set<string>& s)
68{
69 s.insert(service_name);
70 s.insert(command_descs_prefix);
71 s.insert(MGR_METADATA_PREFIX);
72}
73
7c673cae
FG
74void MgrMonitor::update_from_paxos(bool *need_bootstrap)
75{
76 version_t version = get_last_committed();
77 if (version != map.epoch) {
78 dout(4) << "loading version " << version << dendl;
79
80 bufferlist bl;
81 int err = get_version(version, bl);
82 assert(err == 0);
83
c07f9fc5
FG
84 bool old_available = map.get_available();
85 uint64_t old_gid = map.get_active_gid();
86
7c673cae
FG
87 bufferlist::iterator p = bl.begin();
88 map.decode(p);
89
90 dout(4) << "active server: " << map.active_addr
91 << "(" << map.active_gid << ")" << dendl;
92
224ce89b
WB
93 ever_had_active_mgr = get_value("ever_had_active_mgr");
94
95 load_health();
96
7c673cae
FG
97 if (map.available) {
98 first_seen_inactive = utime_t();
99 } else {
100 first_seen_inactive = ceph_clock_now();
101 }
102
103 check_subs();
c07f9fc5
FG
104
105 if (version == 1
3efd9988
FG
106 || command_descs.empty()
107 || (map.get_available()
108 && (!old_available || old_gid != map.get_active_gid()))) {
c07f9fc5
FG
109 dout(4) << "mkfs or daemon transitioned to available, loading commands"
110 << dendl;
111 bufferlist loaded_commands;
112 int r = mon->store->get(command_descs_prefix, "", loaded_commands);
113 if (r < 0) {
114 derr << "Failed to load mgr commands: " << cpp_strerror(r) << dendl;
115 } else {
116 auto p = loaded_commands.begin();
117 ::decode(command_descs, p);
118 }
119 }
7c673cae
FG
120 }
121
122 // feed our pet MgrClient
123 mon->mgr_client.ms_dispatch(new MMgrMap(map));
124}
125
126void MgrMonitor::create_pending()
127{
128 pending_map = map;
129 pending_map.epoch++;
3efd9988
FG
130
131 if (map.get_epoch() == 1 &&
132 command_descs.empty() &&
133 pending_command_descs.empty()) {
134 // we've been through the initial map and we haven't populated the
135 // command_descs vector. This likely means we came from kraken, where
136 // we wouldn't populate the vector, nor would we write it to disk, on
137 // create_initial().
138 create_initial();
139 }
7c673cae
FG
140}
141
224ce89b
WB
142health_status_t MgrMonitor::should_warn_about_mgr_down()
143{
144 utime_t now = ceph_clock_now();
145 // we warn if
146 // - we've ever had an active mgr, or
147 // - we have osds AND we've exceeded the grace period
148 // which means a new mon cluster and be HEALTH_OK indefinitely as long as
149 // no OSDs are ever created.
150 if (ever_had_active_mgr ||
151 (mon->osdmon()->osdmap.get_num_osds() > 0 &&
3efd9988 152 now > mon->monmap->created + g_conf->get_val<int64_t>("mon_mgr_mkfs_grace"))) {
224ce89b
WB
153 health_status_t level = HEALTH_WARN;
154 if (first_seen_inactive != utime_t() &&
3efd9988 155 now - first_seen_inactive > g_conf->get_val<int64_t>("mon_mgr_inactive_grace")) {
224ce89b
WB
156 level = HEALTH_ERR;
157 }
158 return level;
159 }
160 return HEALTH_OK;
161}
162
7c673cae
FG
163void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t)
164{
165 dout(10) << __func__ << " " << pending_map << dendl;
166 bufferlist bl;
167 pending_map.encode(bl, mon->get_quorum_con_features());
168 put_version(t, pending_map.epoch, bl);
169 put_last_committed(t, pending_map.epoch);
224ce89b 170
c07f9fc5
FG
171 for (auto& p : pending_metadata) {
172 dout(10) << __func__ << " set metadata for " << p.first << dendl;
173 t->put(MGR_METADATA_PREFIX, p.first, p.second);
174 }
175 for (auto& name : pending_metadata_rm) {
176 dout(10) << __func__ << " rm metadata for " << name << dendl;
177 t->erase(MGR_METADATA_PREFIX, name);
178 }
179 pending_metadata.clear();
180 pending_metadata_rm.clear();
181
224ce89b
WB
182 health_check_map_t next;
183 if (pending_map.active_gid == 0) {
184 auto level = should_warn_about_mgr_down();
185 if (level != HEALTH_OK) {
186 next.add("MGR_DOWN", level, "no active mgr");
187 } else {
188 dout(10) << __func__ << " no health warning (never active and new cluster)"
189 << dendl;
190 }
191 } else {
192 put_value(t, "ever_had_active_mgr", 1);
193 }
194 encode_health(next, t);
c07f9fc5
FG
195
196 if (pending_command_descs.size()) {
197 dout(4) << __func__ << " encoding " << pending_command_descs.size()
198 << " command_descs" << dendl;
199 for (auto& p : pending_command_descs) {
200 p.set_flag(MonCommand::FLAG_MGR);
201 }
202 bufferlist bl;
203 ::encode(pending_command_descs, bl);
204 t->put(command_descs_prefix, "", bl);
205 pending_command_descs.clear();
206 }
7c673cae
FG
207}
208
209bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid)
210{
211 // check permissions
212 MonSession *session = op->get_session();
213 if (!session)
214 return false;
215 if (!session->is_capable("mgr", MON_CAP_X)) {
216 dout(1) << __func__ << " insufficient caps " << session->caps << dendl;
217 return false;
218 }
219 if (fsid != mon->monmap->fsid) {
220 dout(1) << __func__ << " op fsid " << fsid
221 << " != " << mon->monmap->fsid << dendl;
222 return false;
223 }
224 return true;
225}
226
227bool MgrMonitor::preprocess_query(MonOpRequestRef op)
228{
229 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
230 switch (m->get_type()) {
231 case MSG_MGR_BEACON:
232 return preprocess_beacon(op);
233 case MSG_MON_COMMAND:
234 return preprocess_command(op);
235 default:
236 mon->no_reply(op);
237 derr << "Unhandled message type " << m->get_type() << dendl;
238 return true;
239 }
240}
241
242bool MgrMonitor::prepare_update(MonOpRequestRef op)
243{
244 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
245 switch (m->get_type()) {
246 case MSG_MGR_BEACON:
247 return prepare_beacon(op);
248
249 case MSG_MON_COMMAND:
250 return prepare_command(op);
251
252 default:
253 mon->no_reply(op);
254 derr << "Unhandled message type " << m->get_type() << dendl;
255 return true;
256 }
257}
258
259
260
261class C_Updated : public Context {
262 MgrMonitor *mm;
263 MonOpRequestRef op;
264public:
265 C_Updated(MgrMonitor *a, MonOpRequestRef c) :
266 mm(a), op(c) {}
267 void finish(int r) override {
268 if (r >= 0) {
269 // Success
270 } else if (r == -ECANCELED) {
271 mm->mon->no_reply(op);
272 } else {
273 mm->dispatch(op); // try again
274 }
275 }
276};
277
278bool MgrMonitor::preprocess_beacon(MonOpRequestRef op)
279{
280 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
94b18763 281 mon->no_reply(op); // we never reply to beacons
7c673cae
FG
282 dout(4) << "beacon from " << m->get_gid() << dendl;
283
284 if (!check_caps(op, m->get_fsid())) {
285 // drop it on the floor
286 return true;
287 }
288
289 // always send this to the leader's prepare_beacon()
290 return false;
291}
292
293bool MgrMonitor::prepare_beacon(MonOpRequestRef op)
294{
295 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
296 dout(4) << "beacon from " << m->get_gid() << dendl;
297
298 // See if we are seeing same name, new GID for the active daemon
299 if (m->get_name() == pending_map.active_name
300 && m->get_gid() != pending_map.active_gid)
301 {
302 dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl;
224ce89b
WB
303 mon->clog->info() << "Active manager daemon " << m->get_name()
304 << " restarted";
7c673cae
FG
305 drop_active();
306 }
307
308 // See if we are seeing same name, new GID for any standbys
309 for (const auto &i : pending_map.standbys) {
310 const StandbyInfo &s = i.second;
311 if (s.name == m->get_name() && s.gid != m->get_gid()) {
312 dout(4) << "Standby daemon restart (mgr." << m->get_name() << ")" << dendl;
224ce89b
WB
313 mon->clog->debug() << "Standby manager daemon " << m->get_name()
314 << " restarted";
7c673cae
FG
315 drop_standby(i.first);
316 break;
317 }
318 }
319
31f18b77 320 last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now();
7c673cae
FG
321
322 // Track whether we modified pending_map
323 bool updated = false;
324
325 if (pending_map.active_gid == m->get_gid()) {
3efd9988
FG
326 if (pending_map.services != m->get_services()) {
327 dout(4) << "updated services from mgr." << m->get_name()
328 << ": " << m->get_services() << dendl;
329 pending_map.services = m->get_services();
330 updated = true;
331 }
332
7c673cae
FG
333 // A beacon from the currently active daemon
334 if (pending_map.active_addr != m->get_server_addr()) {
335 dout(4) << "learned address " << m->get_server_addr()
336 << " (was " << pending_map.active_addr << ")" << dendl;
337 pending_map.active_addr = m->get_server_addr();
338 updated = true;
339 }
340
341 if (pending_map.get_available() != m->get_available()) {
342 dout(4) << "available " << m->get_gid() << dendl;
224ce89b
WB
343 mon->clog->info() << "Manager daemon " << pending_map.active_name
344 << " is now available";
c07f9fc5
FG
345
346 // This beacon should include command descriptions
347 pending_command_descs = m->get_command_descs();
348 if (pending_command_descs.empty()) {
349 // This should not happen, but it also isn't fatal: we just
350 // won't successfully update our list of commands.
351 dout(4) << "First available beacon from " << pending_map.active_name
352 << "(" << m->get_gid() << ") does not include command descs"
353 << dendl;
354 } else {
355 dout(4) << "First available beacon from " << pending_map.active_name
356 << "(" << m->get_gid() << ") includes "
357 << pending_command_descs.size() << " command descs" << dendl;
358 }
359
7c673cae
FG
360 pending_map.available = m->get_available();
361 updated = true;
362 }
224ce89b
WB
363 if (pending_map.available_modules != m->get_available_modules()) {
364 dout(4) << "available_modules " << m->get_available_modules()
365 << " (was " << pending_map.available_modules << ")" << dendl;
366 pending_map.available_modules = m->get_available_modules();
367 updated = true;
368 }
7c673cae
FG
369 } else if (pending_map.active_gid == 0) {
370 // There is no currently active daemon, select this one.
371 if (pending_map.standbys.count(m->get_gid())) {
181888fb 372 drop_standby(m->get_gid(), false);
7c673cae
FG
373 }
374 dout(4) << "selecting new active " << m->get_gid()
375 << " " << m->get_name()
376 << " (was " << pending_map.active_gid << " "
377 << pending_map.active_name << ")" << dendl;
378 pending_map.active_gid = m->get_gid();
379 pending_map.active_name = m->get_name();
224ce89b 380 pending_map.available_modules = m->get_available_modules();
c07f9fc5
FG
381 ::encode(m->get_metadata(), pending_metadata[m->get_name()]);
382 pending_metadata_rm.erase(m->get_name());
224ce89b
WB
383
384 mon->clog->info() << "Activating manager daemon "
385 << pending_map.active_name;
7c673cae
FG
386
387 updated = true;
388 } else {
389 if (pending_map.standbys.count(m->get_gid()) > 0) {
390 dout(10) << "from existing standby " << m->get_gid() << dendl;
224ce89b
WB
391 if (pending_map.standbys[m->get_gid()].available_modules !=
392 m->get_available_modules()) {
393 dout(10) << "existing standby " << m->get_gid() << " available_modules "
394 << m->get_available_modules() << " (was "
395 << pending_map.standbys[m->get_gid()].available_modules << ")"
396 << dendl;
397 pending_map.standbys[m->get_gid()].available_modules =
398 m->get_available_modules();
399 updated = true;
400 }
7c673cae
FG
401 } else {
402 dout(10) << "new standby " << m->get_gid() << dendl;
224ce89b
WB
403 mon->clog->debug() << "Standby manager daemon " << m->get_name()
404 << " started";
c07f9fc5
FG
405 pending_map.standbys[m->get_gid()] = {m->get_gid(), m->get_name(),
406 m->get_available_modules()};
407 ::encode(m->get_metadata(), pending_metadata[m->get_name()]);
408 pending_metadata_rm.erase(m->get_name());
7c673cae
FG
409 updated = true;
410 }
411 }
412
413 if (updated) {
414 dout(4) << "updating map" << dendl;
415 wait_for_finished_proposal(op, new C_Updated(this, op));
416 } else {
417 dout(10) << "no change" << dendl;
418 }
419
420 return updated;
421}
422
423void MgrMonitor::check_subs()
424{
425 const std::string type = "mgrmap";
426 if (mon->session_map.subs.count(type) == 0)
427 return;
428 for (auto sub : *(mon->session_map.subs[type])) {
429 check_sub(sub);
430 }
431}
432
433void MgrMonitor::check_sub(Subscription *sub)
434{
435 if (sub->type == "mgrmap") {
436 if (sub->next <= map.get_epoch()) {
224ce89b
WB
437 dout(20) << "Sending map to subscriber " << sub->session->con
438 << " " << sub->session->con->get_peer_addr() << dendl;
7c673cae
FG
439 sub->session->con->send_message(new MMgrMap(map));
440 if (sub->onetime) {
441 mon->session_map.remove_sub(sub);
442 } else {
443 sub->next = map.get_epoch() + 1;
444 }
445 }
446 } else {
447 assert(sub->type == "mgrdigest");
c07f9fc5
FG
448 if (sub->next == 0) {
449 // new registration; cancel previous timer
450 cancel_timer();
451 }
31f18b77 452 if (digest_event == nullptr) {
7c673cae
FG
453 send_digests();
454 }
455 }
456}
457
458/**
459 * Handle digest subscriptions separately (outside of check_sub) because
460 * they are going to be periodic rather than version-driven.
461 */
462void MgrMonitor::send_digests()
463{
31f18b77
FG
464 cancel_timer();
465
7c673cae
FG
466 const std::string type = "mgrdigest";
467 if (mon->session_map.subs.count(type) == 0)
468 return;
469
b32b8144
FG
470 if (!is_active()) {
471 // if paxos is currently not active, don't send a digest but reenable timer
472 goto timer;
473 }
474 dout(10) << __func__ << dendl;
475
7c673cae 476 for (auto sub : *(mon->session_map.subs[type])) {
224ce89b
WB
477 dout(10) << __func__ << " sending digest to subscriber " << sub->session->con
478 << " " << sub->session->con->get_peer_addr() << dendl;
7c673cae
FG
479 MMgrDigest *mdigest = new MMgrDigest;
480
481 JSONFormatter f;
224ce89b 482 mon->get_health_status(true, &f, nullptr, nullptr, nullptr);
7c673cae
FG
483 f.flush(mdigest->health_json);
484 f.reset();
485
486 std::ostringstream ss;
487 mon->get_mon_status(&f, ss);
488 f.flush(mdigest->mon_status_json);
489 f.reset();
490
491 sub->session->con->send_message(mdigest);
492 }
493
b32b8144 494timer:
3efd9988
FG
495 digest_event = mon->timer.add_event_after(
496 g_conf->get_val<int64_t>("mon_mgr_digest_period"),
497 new C_MonContext(mon, [this](int) {
7c673cae 498 send_digests();
3efd9988 499 }));
31f18b77
FG
500}
501
502void MgrMonitor::cancel_timer()
503{
504 if (digest_event) {
505 mon->timer.cancel_event(digest_event);
506 digest_event = nullptr;
507 }
7c673cae
FG
508}
509
510void MgrMonitor::on_active()
511{
224ce89b
WB
512 if (mon->is_leader()) {
513 mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map;
514 }
7c673cae
FG
515}
516
517void MgrMonitor::get_health(
518 list<pair<health_status_t,string> >& summary,
519 list<pair<health_status_t,string> > *detail,
520 CephContext *cct) const
521{
522 // start mgr warnings as soon as the mons and osds are all upgraded,
523 // but before the require_luminous osdmap flag is set. this way the
524 // user gets some warning before the osd flag is set and mgr is
525 // actually *required*.
526 if (!mon->monmap->get_required_features().contains_all(
527 ceph::features::mon::FEATURE_LUMINOUS) ||
528 !HAVE_FEATURE(mon->osdmon()->osdmap.get_up_osd_features(),
529 SERVER_LUMINOUS)) {
530 return;
531 }
532
224ce89b 533 if (map.active_gid == 0) {
7c673cae
FG
534 auto level = HEALTH_WARN;
535 // do not escalate to ERR if they are still upgrading to jewel.
31f18b77 536 if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
7c673cae
FG
537 utime_t now = ceph_clock_now();
538 if (first_seen_inactive != utime_t() &&
3efd9988 539 now - first_seen_inactive > g_conf->get_val<int64_t>("mon_mgr_inactive_grace")) {
7c673cae
FG
540 level = HEALTH_ERR;
541 }
542 }
543 summary.push_back(make_pair(level, "no active mgr"));
544 }
545}
546
547void MgrMonitor::tick()
548{
549 if (!is_active() || !mon->is_leader())
550 return;
551
31f18b77 552 const auto now = ceph::coarse_mono_clock::now();
3efd9988
FG
553
554 const auto mgr_beacon_grace = std::chrono::seconds(
555 g_conf->get_val<int64_t>("mon_mgr_beacon_grace"));
556
557 // Note that this is the mgr daemon's tick period, not ours (the
558 // beacon is sent with this period).
559 const auto mgr_tick_period = std::chrono::seconds(
560 g_conf->get_val<int64_t>("mgr_tick_period"));
561
562 if (last_tick != ceph::coarse_mono_clock::time_point::min()
563 && (now - last_tick > (mgr_beacon_grace - mgr_tick_period))) {
564 // This case handles either local slowness (calls being delayed
565 // for whatever reason) or cluster election slowness (a long gap
566 // between calls while an election happened)
567 dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
568 "(slow election?) of " << now - last_tick << " seconds" << dendl;
569 for (auto &i : last_beacon) {
570 i.second = now;
571 }
572 }
573
574 last_tick = now;
7c673cae
FG
575
576 // Populate any missing beacons (i.e. no beacon since MgrMonitor
577 // instantiation) with the current time, so that they will
578 // eventually look laggy if they fail to give us a beacon.
579 if (pending_map.active_gid != 0
580 && last_beacon.count(pending_map.active_gid) == 0) {
581 last_beacon[pending_map.active_gid] = now;
582 }
583 for (auto s : pending_map.standbys) {
584 if (last_beacon.count(s.first) == 0) {
585 last_beacon[s.first] = now;
586 }
587 }
588
589 // Cull standbys first so that any remaining standbys
590 // will be eligible to take over from the active if we cull him.
591 std::list<uint64_t> dead_standbys;
3efd9988 592 const auto cutoff = now - mgr_beacon_grace;
7c673cae
FG
593 for (const auto &i : pending_map.standbys) {
594 auto last_beacon_time = last_beacon.at(i.first);
595 if (last_beacon_time < cutoff) {
596 dead_standbys.push_back(i.first);
597 }
598 }
599
600 bool propose = false;
601
602 for (auto i : dead_standbys) {
603 dout(4) << "Dropping laggy standby " << i << dendl;
604 drop_standby(i);
605 propose = true;
606 }
607
608 if (pending_map.active_gid != 0
609 && last_beacon.at(pending_map.active_gid) < cutoff) {
224ce89b 610 const std::string old_active_name = pending_map.active_name;
7c673cae
FG
611 drop_active();
612 propose = true;
613 dout(4) << "Dropping active" << pending_map.active_gid << dendl;
614 if (promote_standby()) {
615 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
224ce89b
WB
616 mon->clog->info() << "Manager daemon " << old_active_name
617 << " is unresponsive, replacing it with standby"
618 << " daemon " << pending_map.active_name;
7c673cae
FG
619 } else {
620 dout(4) << "Active is laggy but have no standbys to replace it" << dendl;
28e407b8 621 mon->clog->info() << "Manager daemon " << old_active_name
224ce89b 622 << " is unresponsive. No standby daemons available.";
7c673cae
FG
623 }
624 } else if (pending_map.active_gid == 0) {
625 if (promote_standby()) {
626 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
224ce89b 627 mon->clog->info() << "Activating manager daemon "
3efd9988 628 << pending_map.active_name;
7c673cae
FG
629 propose = true;
630 }
631 }
632
224ce89b 633 if (!pending_map.available &&
c07f9fc5 634 !ever_had_active_mgr &&
224ce89b 635 should_warn_about_mgr_down() != HEALTH_OK) {
3efd9988
FG
636 dout(10) << " exceeded mon_mgr_mkfs_grace "
637 << g_conf->get_val<int64_t>("mon_mgr_mkfs_grace")
638 << " seconds" << dendl;
224ce89b
WB
639 propose = true;
640 }
641
7c673cae
FG
642 if (propose) {
643 propose_pending();
644 }
645}
646
224ce89b
WB
647void MgrMonitor::on_restart()
648{
649 // Clear out the leader-specific state.
650 last_beacon.clear();
3efd9988 651 last_tick = ceph::coarse_mono_clock::now();
224ce89b
WB
652}
653
654
7c673cae
FG
655bool MgrMonitor::promote_standby()
656{
657 assert(pending_map.active_gid == 0);
658 if (pending_map.standbys.size()) {
659 // Promote a replacement (arbitrary choice of standby)
660 auto replacement_gid = pending_map.standbys.begin()->first;
661 pending_map.active_gid = replacement_gid;
662 pending_map.active_name = pending_map.standbys.at(replacement_gid).name;
663 pending_map.available = false;
664 pending_map.active_addr = entity_addr_t();
665
181888fb
FG
666 drop_standby(replacement_gid, false);
667
7c673cae
FG
668 return true;
669 } else {
670 return false;
671 }
672}
673
674void MgrMonitor::drop_active()
675{
676 if (last_beacon.count(pending_map.active_gid) > 0) {
677 last_beacon.erase(pending_map.active_gid);
678 }
679
c07f9fc5
FG
680 pending_metadata_rm.insert(pending_map.active_name);
681 pending_metadata.erase(pending_map.active_name);
7c673cae
FG
682 pending_map.active_name = "";
683 pending_map.active_gid = 0;
684 pending_map.available = false;
685 pending_map.active_addr = entity_addr_t();
3efd9988 686 pending_map.services.clear();
224ce89b
WB
687
688 // So that when new active mgr subscribes to mgrdigest, it will
689 // get an immediate response instead of waiting for next timer
690 cancel_timer();
7c673cae
FG
691}
692
181888fb 693void MgrMonitor::drop_standby(uint64_t gid, bool drop_meta)
7c673cae 694{
181888fb
FG
695 if (drop_meta) {
696 pending_metadata_rm.insert(pending_map.standbys[gid].name);
697 pending_metadata.erase(pending_map.standbys[gid].name);
698 }
7c673cae
FG
699 pending_map.standbys.erase(gid);
700 if (last_beacon.count(gid) > 0) {
701 last_beacon.erase(gid);
702 }
7c673cae
FG
703}
704
705bool MgrMonitor::preprocess_command(MonOpRequestRef op)
706{
31f18b77
FG
707 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
708 std::stringstream ss;
709 bufferlist rdata;
710
711 std::map<std::string, cmd_vartype> cmdmap;
712 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
713 string rs = ss.str();
714 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
715 return true;
716 }
7c673cae 717
31f18b77
FG
718 MonSession *session = m->get_session();
719 if (!session) {
720 mon->reply_command(op, -EACCES, "access denied", rdata,
721 get_last_committed());
722 return true;
723 }
724
725 string format;
726 cmd_getval(g_ceph_context, cmdmap, "format", format, string("json-pretty"));
727 boost::scoped_ptr<Formatter> f(Formatter::create(format));
728
729 string prefix;
730 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
731 int r = 0;
732
733 if (prefix == "mgr dump") {
734 int64_t epoch = 0;
735 cmd_getval(g_ceph_context, cmdmap, "epoch", epoch, (int64_t)map.get_epoch());
736 if (epoch == (int64_t)map.get_epoch()) {
737 f->dump_object("mgrmap", map);
738 } else {
739 bufferlist bl;
740 int err = get_version(epoch, bl);
741 if (err == -ENOENT) {
742 r = -ENOENT;
743 ss << "there is no map for epoch " << epoch;
744 goto reply;
745 }
746 MgrMap m;
747 auto p = bl.begin();
748 m.decode(p);
749 f->dump_object("mgrmap", m);
750 }
751 f->flush(rdata);
224ce89b 752 } else if (prefix == "mgr module ls") {
3efd9988
FG
753 f->open_object_section("modules");
754 {
755 f->open_array_section("enabled_modules");
756 for (auto& p : map.modules) {
757 f->dump_string("module", p);
758 }
759 f->close_section();
760 f->open_array_section("disabled_modules");
761 for (auto& p : map.available_modules) {
762 if (map.modules.count(p) == 0) {
763 f->dump_string("module", p);
764 }
765 }
766 f->close_section();
767 }
768 f->close_section();
769 f->flush(rdata);
770 } else if (prefix == "mgr services") {
771 f->open_object_section("services");
772 for (const auto &i : map.services) {
773 f->dump_string(i.first.c_str(), i.second);
224ce89b
WB
774 }
775 f->close_section();
776 f->flush(rdata);
c07f9fc5
FG
777 } else if (prefix == "mgr metadata") {
778 string name;
779 cmd_getval(g_ceph_context, cmdmap, "id", name);
780 if (name.size() > 0 && !map.have_name(name)) {
781 ss << "mgr." << name << " does not exist";
782 r = -ENOENT;
783 goto reply;
784 }
785 string format;
786 cmd_getval(g_ceph_context, cmdmap, "format", format);
787 boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty"));
788 if (name.size()) {
789 f->open_object_section("mgr_metadata");
790 f->dump_string("id", name);
791 r = dump_metadata(name, f.get(), &ss);
792 if (r < 0)
793 goto reply;
794 f->close_section();
795 } else {
796 r = 0;
797 f->open_array_section("mgr_metadata");
798 for (auto& i : map.get_all_names()) {
799 f->open_object_section("mgr");
800 f->dump_string("id", i);
801 r = dump_metadata(i, f.get(), NULL);
802 if (r == -EINVAL || r == -ENOENT) {
803 // Drop error, continue to get other daemons' metadata
804 dout(4) << "No metadata for mgr." << i << dendl;
805 r = 0;
806 } else if (r < 0) {
807 // Unexpected error
808 goto reply;
809 }
810 f->close_section();
811 }
812 f->close_section();
813 }
814 f->flush(rdata);
815 } else if (prefix == "mgr versions") {
816 if (!f)
817 f.reset(Formatter::create("json-pretty"));
818 count_metadata("ceph_version", f.get());
819 f->flush(rdata);
820 r = 0;
821 } else if (prefix == "mgr count-metadata") {
822 if (!f)
823 f.reset(Formatter::create("json-pretty"));
824 string field;
825 cmd_getval(g_ceph_context, cmdmap, "property", field);
826 count_metadata(field, f.get());
827 f->flush(rdata);
828 r = 0;
31f18b77
FG
829 } else {
830 return false;
831 }
832
833reply:
834 string rs;
835 getline(ss, rs);
836 mon->reply_command(op, r, rs, rdata, get_last_committed());
837 return true;
7c673cae
FG
838}
839
840bool MgrMonitor::prepare_command(MonOpRequestRef op)
841{
842 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
843
844 std::stringstream ss;
845 bufferlist rdata;
846
847 std::map<std::string, cmd_vartype> cmdmap;
848 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
849 string rs = ss.str();
850 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
851 return true;
852 }
853
854 MonSession *session = m->get_session();
855 if (!session) {
856 mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed());
857 return true;
858 }
859
224ce89b
WB
860 string format;
861 cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
862 boost::scoped_ptr<Formatter> f(Formatter::create(format));
863
7c673cae
FG
864 string prefix;
865 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
866
867 int r = 0;
868
869 if (prefix == "mgr fail") {
870 string who;
871 cmd_getval(g_ceph_context, cmdmap, "who", who);
872
873 std::string err;
874 uint64_t gid = strict_strtol(who.c_str(), 10, &err);
875 bool changed = false;
876 if (!err.empty()) {
877 // Does not parse as a gid, treat it as a name
878 if (pending_map.active_name == who) {
879 drop_active();
880 changed = true;
881 } else {
882 gid = 0;
883 for (const auto &i : pending_map.standbys) {
884 if (i.second.name == who) {
885 gid = i.first;
886 break;
887 }
888 }
889 if (gid != 0) {
890 drop_standby(gid);
891 changed = true;
892 } else {
893 ss << "Daemon not found '" << who << "', already failed?";
894 }
895 }
896 } else {
897 if (pending_map.active_gid == gid) {
898 drop_active();
899 changed = true;
900 } else if (pending_map.standbys.count(gid) > 0) {
901 drop_standby(gid);
902 changed = true;
903 } else {
904 ss << "Daemon not found '" << gid << "', already failed?";
905 }
906 }
907
908 if (changed && pending_map.active_gid == 0) {
909 promote_standby();
910 }
224ce89b
WB
911 } else if (prefix == "mgr module enable") {
912 string module;
913 cmd_getval(g_ceph_context, cmdmap, "module", module);
914 if (module.empty()) {
915 r = -EINVAL;
916 goto out;
917 }
918 string force;
919 cmd_getval(g_ceph_context, cmdmap, "force", force);
920 if (!pending_map.all_support_module(module) &&
921 force != "--force") {
922 ss << "all mgr daemons do not support module '" << module << "', pass "
923 << "--force to force enablement";
924 r = -ENOENT;
925 goto out;
926 }
927 pending_map.modules.insert(module);
928 } else if (prefix == "mgr module disable") {
929 string module;
930 cmd_getval(g_ceph_context, cmdmap, "module", module);
931 if (module.empty()) {
932 r = -EINVAL;
933 goto out;
934 }
935 pending_map.modules.erase(module);
7c673cae 936 } else {
224ce89b 937 ss << "Command '" << prefix << "' not implemented!";
7c673cae
FG
938 r = -ENOSYS;
939 }
940
224ce89b 941out:
7c673cae
FG
942 dout(4) << __func__ << " done, r=" << r << dendl;
943 /* Compose response */
944 string rs;
945 getline(ss, rs);
946
947 if (r >= 0) {
948 // success.. delay reply
949 wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs,
950 get_last_committed() + 1));
951 return true;
952 } else {
953 // reply immediately
954 mon->reply_command(op, r, rs, rdata, get_last_committed());
955 return false;
956 }
957}
958
959void MgrMonitor::init()
960{
31f18b77 961 if (digest_event == nullptr) {
7c673cae
FG
962 send_digests(); // To get it to schedule its own event
963 }
964}
965
966void MgrMonitor::on_shutdown()
967{
31f18b77 968 cancel_timer();
7c673cae
FG
969}
970
c07f9fc5
FG
971int MgrMonitor::load_metadata(const string& name, std::map<string, string>& m,
972 ostream *err)
973{
974 bufferlist bl;
975 int r = mon->store->get(MGR_METADATA_PREFIX, name, bl);
976 if (r < 0)
977 return r;
978 try {
979 bufferlist::iterator p = bl.begin();
980 ::decode(m, p);
981 }
982 catch (buffer::error& e) {
983 if (err)
984 *err << "mgr." << name << " metadata is corrupt";
985 return -EIO;
986 }
987 return 0;
988}
989
990void MgrMonitor::count_metadata(const string& field, std::map<string,int> *out)
991{
992 std::set<string> ls = map.get_all_names();
993 for (auto& name : ls) {
994 std::map<string,string> meta;
995 load_metadata(name, meta, nullptr);
996 auto p = meta.find(field);
997 if (p == meta.end()) {
998 (*out)["unknown"]++;
999 } else {
1000 (*out)[p->second]++;
1001 }
1002 }
1003}
1004
1005void MgrMonitor::count_metadata(const string& field, Formatter *f)
1006{
1007 std::map<string,int> by_val;
1008 count_metadata(field, &by_val);
1009 f->open_object_section(field.c_str());
1010 for (auto& p : by_val) {
1011 f->dump_int(p.first.c_str(), p.second);
1012 }
1013 f->close_section();
1014}
1015
1016int MgrMonitor::dump_metadata(const string& name, Formatter *f, ostream *err)
1017{
1018 std::map<string,string> m;
1019 if (int r = load_metadata(name, m, err))
1020 return r;
1021 for (auto& p : m) {
1022 f->dump_string(p.first.c_str(), p.second);
1023 }
1024 return 0;
1025}
31f18b77 1026
d2e6a577
FG
1027const std::vector<MonCommand> &MgrMonitor::get_command_descs() const
1028{
1029 if (command_descs.empty()) {
1030 // must have just upgraded; fallback to static commands
1031 return mgr_commands;
1032 } else {
1033 return command_descs;
1034 }
1035}