]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MgrMonitor.cc
update sources to 12.2.2
[ceph.git] / ceph / src / mon / MgrMonitor.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
224ce89b
WB
14#include <boost/tokenizer.hpp>
15
7c673cae
FG
16#include "messages/MMgrBeacon.h"
17#include "messages/MMgrMap.h"
18#include "messages/MMgrDigest.h"
19
31f18b77 20#include "PGStatService.h"
7c673cae
FG
21#include "include/stringify.h"
22#include "mgr/MgrContext.h"
c07f9fc5 23#include "mgr/mgr_commands.h"
7c673cae
FG
24#include "OSDMonitor.h"
25
26#include "MgrMonitor.h"
27
c07f9fc5
FG
28#define MGR_METADATA_PREFIX "mgr_metadata"
29
7c673cae
FG
30#define dout_subsys ceph_subsys_mon
31#undef dout_prefix
32#define dout_prefix _prefix(_dout, mon, map)
33static ostream& _prefix(std::ostream *_dout, Monitor *mon,
34 const MgrMap& mgrmap) {
35 return *_dout << "mon." << mon->name << "@" << mon->rank
36 << "(" << mon->get_state_name()
37 << ").mgr e" << mgrmap.get_epoch() << " ";
38}
39
c07f9fc5
FG
40// Prefix for mon store of active mgr's command descriptions
41const static std::string command_descs_prefix = "mgr_command_descs";
42
31f18b77 43
7c673cae
FG
44void MgrMonitor::create_initial()
45{
3efd9988
FG
46 // Take a local copy of initial_modules for tokenizer to iterate over.
47 auto initial_modules = g_conf->get_val<std::string>("mgr_initial_modules");
48 boost::tokenizer<> tok(initial_modules);
224ce89b
WB
49 for (auto& m : tok) {
50 pending_map.modules.insert(m);
51 }
c07f9fc5
FG
52 pending_command_descs = mgr_commands;
53 dout(10) << __func__ << " initial modules " << pending_map.modules
54 << ", " << pending_command_descs.size() << " commands"
55 << dendl;
7c673cae
FG
56}
57
3efd9988
FG
58void MgrMonitor::get_store_prefixes(std::set<string>& s)
59{
60 s.insert(service_name);
61 s.insert(command_descs_prefix);
62 s.insert(MGR_METADATA_PREFIX);
63}
64
7c673cae
FG
65void MgrMonitor::update_from_paxos(bool *need_bootstrap)
66{
67 version_t version = get_last_committed();
68 if (version != map.epoch) {
69 dout(4) << "loading version " << version << dendl;
70
71 bufferlist bl;
72 int err = get_version(version, bl);
73 assert(err == 0);
74
c07f9fc5
FG
75 bool old_available = map.get_available();
76 uint64_t old_gid = map.get_active_gid();
77
7c673cae
FG
78 bufferlist::iterator p = bl.begin();
79 map.decode(p);
80
81 dout(4) << "active server: " << map.active_addr
82 << "(" << map.active_gid << ")" << dendl;
83
224ce89b
WB
84 ever_had_active_mgr = get_value("ever_had_active_mgr");
85
86 load_health();
87
7c673cae
FG
88 if (map.available) {
89 first_seen_inactive = utime_t();
90 } else {
91 first_seen_inactive = ceph_clock_now();
92 }
93
94 check_subs();
c07f9fc5
FG
95
96 if (version == 1
3efd9988
FG
97 || command_descs.empty()
98 || (map.get_available()
99 && (!old_available || old_gid != map.get_active_gid()))) {
c07f9fc5
FG
100 dout(4) << "mkfs or daemon transitioned to available, loading commands"
101 << dendl;
102 bufferlist loaded_commands;
103 int r = mon->store->get(command_descs_prefix, "", loaded_commands);
104 if (r < 0) {
105 derr << "Failed to load mgr commands: " << cpp_strerror(r) << dendl;
106 } else {
107 auto p = loaded_commands.begin();
108 ::decode(command_descs, p);
109 }
110 }
7c673cae
FG
111 }
112
113 // feed our pet MgrClient
114 mon->mgr_client.ms_dispatch(new MMgrMap(map));
115}
116
117void MgrMonitor::create_pending()
118{
119 pending_map = map;
120 pending_map.epoch++;
3efd9988
FG
121
122 if (map.get_epoch() == 1 &&
123 command_descs.empty() &&
124 pending_command_descs.empty()) {
125 // we've been through the initial map and we haven't populated the
126 // command_descs vector. This likely means we came from kraken, where
127 // we wouldn't populate the vector, nor would we write it to disk, on
128 // create_initial().
129 create_initial();
130 }
7c673cae
FG
131}
132
224ce89b
WB
133health_status_t MgrMonitor::should_warn_about_mgr_down()
134{
135 utime_t now = ceph_clock_now();
136 // we warn if
137 // - we've ever had an active mgr, or
138 // - we have osds AND we've exceeded the grace period
139 // which means a new mon cluster and be HEALTH_OK indefinitely as long as
140 // no OSDs are ever created.
141 if (ever_had_active_mgr ||
142 (mon->osdmon()->osdmap.get_num_osds() > 0 &&
3efd9988 143 now > mon->monmap->created + g_conf->get_val<int64_t>("mon_mgr_mkfs_grace"))) {
224ce89b
WB
144 health_status_t level = HEALTH_WARN;
145 if (first_seen_inactive != utime_t() &&
3efd9988 146 now - first_seen_inactive > g_conf->get_val<int64_t>("mon_mgr_inactive_grace")) {
224ce89b
WB
147 level = HEALTH_ERR;
148 }
149 return level;
150 }
151 return HEALTH_OK;
152}
153
7c673cae
FG
154void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t)
155{
156 dout(10) << __func__ << " " << pending_map << dendl;
157 bufferlist bl;
158 pending_map.encode(bl, mon->get_quorum_con_features());
159 put_version(t, pending_map.epoch, bl);
160 put_last_committed(t, pending_map.epoch);
224ce89b 161
c07f9fc5
FG
162 for (auto& p : pending_metadata) {
163 dout(10) << __func__ << " set metadata for " << p.first << dendl;
164 t->put(MGR_METADATA_PREFIX, p.first, p.second);
165 }
166 for (auto& name : pending_metadata_rm) {
167 dout(10) << __func__ << " rm metadata for " << name << dendl;
168 t->erase(MGR_METADATA_PREFIX, name);
169 }
170 pending_metadata.clear();
171 pending_metadata_rm.clear();
172
224ce89b
WB
173 health_check_map_t next;
174 if (pending_map.active_gid == 0) {
175 auto level = should_warn_about_mgr_down();
176 if (level != HEALTH_OK) {
177 next.add("MGR_DOWN", level, "no active mgr");
178 } else {
179 dout(10) << __func__ << " no health warning (never active and new cluster)"
180 << dendl;
181 }
182 } else {
183 put_value(t, "ever_had_active_mgr", 1);
184 }
185 encode_health(next, t);
c07f9fc5
FG
186
187 if (pending_command_descs.size()) {
188 dout(4) << __func__ << " encoding " << pending_command_descs.size()
189 << " command_descs" << dendl;
190 for (auto& p : pending_command_descs) {
191 p.set_flag(MonCommand::FLAG_MGR);
192 }
193 bufferlist bl;
194 ::encode(pending_command_descs, bl);
195 t->put(command_descs_prefix, "", bl);
196 pending_command_descs.clear();
197 }
7c673cae
FG
198}
199
200bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid)
201{
202 // check permissions
203 MonSession *session = op->get_session();
204 if (!session)
205 return false;
206 if (!session->is_capable("mgr", MON_CAP_X)) {
207 dout(1) << __func__ << " insufficient caps " << session->caps << dendl;
208 return false;
209 }
210 if (fsid != mon->monmap->fsid) {
211 dout(1) << __func__ << " op fsid " << fsid
212 << " != " << mon->monmap->fsid << dendl;
213 return false;
214 }
215 return true;
216}
217
218bool MgrMonitor::preprocess_query(MonOpRequestRef op)
219{
220 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
221 switch (m->get_type()) {
222 case MSG_MGR_BEACON:
223 return preprocess_beacon(op);
224 case MSG_MON_COMMAND:
225 return preprocess_command(op);
226 default:
227 mon->no_reply(op);
228 derr << "Unhandled message type " << m->get_type() << dendl;
229 return true;
230 }
231}
232
233bool MgrMonitor::prepare_update(MonOpRequestRef op)
234{
235 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
236 switch (m->get_type()) {
237 case MSG_MGR_BEACON:
238 return prepare_beacon(op);
239
240 case MSG_MON_COMMAND:
241 return prepare_command(op);
242
243 default:
244 mon->no_reply(op);
245 derr << "Unhandled message type " << m->get_type() << dendl;
246 return true;
247 }
248}
249
250
251
252class C_Updated : public Context {
253 MgrMonitor *mm;
254 MonOpRequestRef op;
255public:
256 C_Updated(MgrMonitor *a, MonOpRequestRef c) :
257 mm(a), op(c) {}
258 void finish(int r) override {
259 if (r >= 0) {
260 // Success
261 } else if (r == -ECANCELED) {
262 mm->mon->no_reply(op);
263 } else {
264 mm->dispatch(op); // try again
265 }
266 }
267};
268
269bool MgrMonitor::preprocess_beacon(MonOpRequestRef op)
270{
271 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
272 dout(4) << "beacon from " << m->get_gid() << dendl;
273
274 if (!check_caps(op, m->get_fsid())) {
275 // drop it on the floor
276 return true;
277 }
278
279 // always send this to the leader's prepare_beacon()
280 return false;
281}
282
283bool MgrMonitor::prepare_beacon(MonOpRequestRef op)
284{
285 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
286 dout(4) << "beacon from " << m->get_gid() << dendl;
287
288 // See if we are seeing same name, new GID for the active daemon
289 if (m->get_name() == pending_map.active_name
290 && m->get_gid() != pending_map.active_gid)
291 {
292 dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl;
224ce89b
WB
293 mon->clog->info() << "Active manager daemon " << m->get_name()
294 << " restarted";
7c673cae
FG
295 drop_active();
296 }
297
298 // See if we are seeing same name, new GID for any standbys
299 for (const auto &i : pending_map.standbys) {
300 const StandbyInfo &s = i.second;
301 if (s.name == m->get_name() && s.gid != m->get_gid()) {
302 dout(4) << "Standby daemon restart (mgr." << m->get_name() << ")" << dendl;
224ce89b
WB
303 mon->clog->debug() << "Standby manager daemon " << m->get_name()
304 << " restarted";
7c673cae
FG
305 drop_standby(i.first);
306 break;
307 }
308 }
309
31f18b77 310 last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now();
7c673cae
FG
311
312 // Track whether we modified pending_map
313 bool updated = false;
314
315 if (pending_map.active_gid == m->get_gid()) {
3efd9988
FG
316 if (pending_map.services != m->get_services()) {
317 dout(4) << "updated services from mgr." << m->get_name()
318 << ": " << m->get_services() << dendl;
319 pending_map.services = m->get_services();
320 updated = true;
321 }
322
7c673cae
FG
323 // A beacon from the currently active daemon
324 if (pending_map.active_addr != m->get_server_addr()) {
325 dout(4) << "learned address " << m->get_server_addr()
326 << " (was " << pending_map.active_addr << ")" << dendl;
327 pending_map.active_addr = m->get_server_addr();
328 updated = true;
329 }
330
331 if (pending_map.get_available() != m->get_available()) {
332 dout(4) << "available " << m->get_gid() << dendl;
224ce89b
WB
333 mon->clog->info() << "Manager daemon " << pending_map.active_name
334 << " is now available";
c07f9fc5
FG
335
336 // This beacon should include command descriptions
337 pending_command_descs = m->get_command_descs();
338 if (pending_command_descs.empty()) {
339 // This should not happen, but it also isn't fatal: we just
340 // won't successfully update our list of commands.
341 dout(4) << "First available beacon from " << pending_map.active_name
342 << "(" << m->get_gid() << ") does not include command descs"
343 << dendl;
344 } else {
345 dout(4) << "First available beacon from " << pending_map.active_name
346 << "(" << m->get_gid() << ") includes "
347 << pending_command_descs.size() << " command descs" << dendl;
348 }
349
7c673cae
FG
350 pending_map.available = m->get_available();
351 updated = true;
352 }
224ce89b
WB
353 if (pending_map.available_modules != m->get_available_modules()) {
354 dout(4) << "available_modules " << m->get_available_modules()
355 << " (was " << pending_map.available_modules << ")" << dendl;
356 pending_map.available_modules = m->get_available_modules();
357 updated = true;
358 }
7c673cae
FG
359 } else if (pending_map.active_gid == 0) {
360 // There is no currently active daemon, select this one.
361 if (pending_map.standbys.count(m->get_gid())) {
181888fb 362 drop_standby(m->get_gid(), false);
7c673cae
FG
363 }
364 dout(4) << "selecting new active " << m->get_gid()
365 << " " << m->get_name()
366 << " (was " << pending_map.active_gid << " "
367 << pending_map.active_name << ")" << dendl;
368 pending_map.active_gid = m->get_gid();
369 pending_map.active_name = m->get_name();
224ce89b 370 pending_map.available_modules = m->get_available_modules();
c07f9fc5
FG
371 ::encode(m->get_metadata(), pending_metadata[m->get_name()]);
372 pending_metadata_rm.erase(m->get_name());
224ce89b
WB
373
374 mon->clog->info() << "Activating manager daemon "
375 << pending_map.active_name;
7c673cae
FG
376
377 updated = true;
378 } else {
379 if (pending_map.standbys.count(m->get_gid()) > 0) {
380 dout(10) << "from existing standby " << m->get_gid() << dendl;
224ce89b
WB
381 if (pending_map.standbys[m->get_gid()].available_modules !=
382 m->get_available_modules()) {
383 dout(10) << "existing standby " << m->get_gid() << " available_modules "
384 << m->get_available_modules() << " (was "
385 << pending_map.standbys[m->get_gid()].available_modules << ")"
386 << dendl;
387 pending_map.standbys[m->get_gid()].available_modules =
388 m->get_available_modules();
389 updated = true;
390 }
7c673cae
FG
391 } else {
392 dout(10) << "new standby " << m->get_gid() << dendl;
224ce89b
WB
393 mon->clog->debug() << "Standby manager daemon " << m->get_name()
394 << " started";
c07f9fc5
FG
395 pending_map.standbys[m->get_gid()] = {m->get_gid(), m->get_name(),
396 m->get_available_modules()};
397 ::encode(m->get_metadata(), pending_metadata[m->get_name()]);
398 pending_metadata_rm.erase(m->get_name());
7c673cae
FG
399 updated = true;
400 }
401 }
402
403 if (updated) {
404 dout(4) << "updating map" << dendl;
405 wait_for_finished_proposal(op, new C_Updated(this, op));
406 } else {
407 dout(10) << "no change" << dendl;
408 }
409
410 return updated;
411}
412
413void MgrMonitor::check_subs()
414{
415 const std::string type = "mgrmap";
416 if (mon->session_map.subs.count(type) == 0)
417 return;
418 for (auto sub : *(mon->session_map.subs[type])) {
419 check_sub(sub);
420 }
421}
422
423void MgrMonitor::check_sub(Subscription *sub)
424{
425 if (sub->type == "mgrmap") {
426 if (sub->next <= map.get_epoch()) {
224ce89b
WB
427 dout(20) << "Sending map to subscriber " << sub->session->con
428 << " " << sub->session->con->get_peer_addr() << dendl;
7c673cae
FG
429 sub->session->con->send_message(new MMgrMap(map));
430 if (sub->onetime) {
431 mon->session_map.remove_sub(sub);
432 } else {
433 sub->next = map.get_epoch() + 1;
434 }
435 }
436 } else {
437 assert(sub->type == "mgrdigest");
c07f9fc5
FG
438 if (sub->next == 0) {
439 // new registration; cancel previous timer
440 cancel_timer();
441 }
31f18b77 442 if (digest_event == nullptr) {
7c673cae
FG
443 send_digests();
444 }
445 }
446}
447
448/**
449 * Handle digest subscriptions separately (outside of check_sub) because
450 * they are going to be periodic rather than version-driven.
451 */
452void MgrMonitor::send_digests()
453{
31f18b77
FG
454 cancel_timer();
455
456 if (!is_active()) {
457 return;
458 }
224ce89b 459 dout(10) << __func__ << dendl;
7c673cae
FG
460
461 const std::string type = "mgrdigest";
462 if (mon->session_map.subs.count(type) == 0)
463 return;
464
465 for (auto sub : *(mon->session_map.subs[type])) {
224ce89b
WB
466 dout(10) << __func__ << " sending digest to subscriber " << sub->session->con
467 << " " << sub->session->con->get_peer_addr() << dendl;
7c673cae
FG
468 MMgrDigest *mdigest = new MMgrDigest;
469
470 JSONFormatter f;
224ce89b 471 mon->get_health_status(true, &f, nullptr, nullptr, nullptr);
7c673cae
FG
472 f.flush(mdigest->health_json);
473 f.reset();
474
475 std::ostringstream ss;
476 mon->get_mon_status(&f, ss);
477 f.flush(mdigest->mon_status_json);
478 f.reset();
479
480 sub->session->con->send_message(mdigest);
481 }
482
3efd9988
FG
483 digest_event = mon->timer.add_event_after(
484 g_conf->get_val<int64_t>("mon_mgr_digest_period"),
485 new C_MonContext(mon, [this](int) {
7c673cae 486 send_digests();
3efd9988 487 }));
31f18b77
FG
488}
489
490void MgrMonitor::cancel_timer()
491{
492 if (digest_event) {
493 mon->timer.cancel_event(digest_event);
494 digest_event = nullptr;
495 }
7c673cae
FG
496}
497
498void MgrMonitor::on_active()
499{
224ce89b
WB
500 if (mon->is_leader()) {
501 mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map;
502 }
7c673cae
FG
503}
504
505void MgrMonitor::get_health(
506 list<pair<health_status_t,string> >& summary,
507 list<pair<health_status_t,string> > *detail,
508 CephContext *cct) const
509{
510 // start mgr warnings as soon as the mons and osds are all upgraded,
511 // but before the require_luminous osdmap flag is set. this way the
512 // user gets some warning before the osd flag is set and mgr is
513 // actually *required*.
514 if (!mon->monmap->get_required_features().contains_all(
515 ceph::features::mon::FEATURE_LUMINOUS) ||
516 !HAVE_FEATURE(mon->osdmon()->osdmap.get_up_osd_features(),
517 SERVER_LUMINOUS)) {
518 return;
519 }
520
224ce89b 521 if (map.active_gid == 0) {
7c673cae
FG
522 auto level = HEALTH_WARN;
523 // do not escalate to ERR if they are still upgrading to jewel.
31f18b77 524 if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
7c673cae
FG
525 utime_t now = ceph_clock_now();
526 if (first_seen_inactive != utime_t() &&
3efd9988 527 now - first_seen_inactive > g_conf->get_val<int64_t>("mon_mgr_inactive_grace")) {
7c673cae
FG
528 level = HEALTH_ERR;
529 }
530 }
531 summary.push_back(make_pair(level, "no active mgr"));
532 }
533}
534
535void MgrMonitor::tick()
536{
537 if (!is_active() || !mon->is_leader())
538 return;
539
31f18b77 540 const auto now = ceph::coarse_mono_clock::now();
3efd9988
FG
541
542 const auto mgr_beacon_grace = std::chrono::seconds(
543 g_conf->get_val<int64_t>("mon_mgr_beacon_grace"));
544
545 // Note that this is the mgr daemon's tick period, not ours (the
546 // beacon is sent with this period).
547 const auto mgr_tick_period = std::chrono::seconds(
548 g_conf->get_val<int64_t>("mgr_tick_period"));
549
550 if (last_tick != ceph::coarse_mono_clock::time_point::min()
551 && (now - last_tick > (mgr_beacon_grace - mgr_tick_period))) {
552 // This case handles either local slowness (calls being delayed
553 // for whatever reason) or cluster election slowness (a long gap
554 // between calls while an election happened)
555 dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
556 "(slow election?) of " << now - last_tick << " seconds" << dendl;
557 for (auto &i : last_beacon) {
558 i.second = now;
559 }
560 }
561
562 last_tick = now;
7c673cae
FG
563
564 // Populate any missing beacons (i.e. no beacon since MgrMonitor
565 // instantiation) with the current time, so that they will
566 // eventually look laggy if they fail to give us a beacon.
567 if (pending_map.active_gid != 0
568 && last_beacon.count(pending_map.active_gid) == 0) {
569 last_beacon[pending_map.active_gid] = now;
570 }
571 for (auto s : pending_map.standbys) {
572 if (last_beacon.count(s.first) == 0) {
573 last_beacon[s.first] = now;
574 }
575 }
576
577 // Cull standbys first so that any remaining standbys
578 // will be eligible to take over from the active if we cull him.
579 std::list<uint64_t> dead_standbys;
3efd9988 580 const auto cutoff = now - mgr_beacon_grace;
7c673cae
FG
581 for (const auto &i : pending_map.standbys) {
582 auto last_beacon_time = last_beacon.at(i.first);
583 if (last_beacon_time < cutoff) {
584 dead_standbys.push_back(i.first);
585 }
586 }
587
588 bool propose = false;
589
590 for (auto i : dead_standbys) {
591 dout(4) << "Dropping laggy standby " << i << dendl;
592 drop_standby(i);
593 propose = true;
594 }
595
596 if (pending_map.active_gid != 0
597 && last_beacon.at(pending_map.active_gid) < cutoff) {
224ce89b 598 const std::string old_active_name = pending_map.active_name;
7c673cae
FG
599 drop_active();
600 propose = true;
601 dout(4) << "Dropping active" << pending_map.active_gid << dendl;
602 if (promote_standby()) {
603 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
224ce89b
WB
604 mon->clog->info() << "Manager daemon " << old_active_name
605 << " is unresponsive, replacing it with standby"
606 << " daemon " << pending_map.active_name;
7c673cae
FG
607 } else {
608 dout(4) << "Active is laggy but have no standbys to replace it" << dendl;
224ce89b
WB
609 mon->clog->warn() << "Manager daemon " << old_active_name
610 << " is unresponsive. No standby daemons available.";
7c673cae
FG
611 }
612 } else if (pending_map.active_gid == 0) {
613 if (promote_standby()) {
614 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
224ce89b 615 mon->clog->info() << "Activating manager daemon "
3efd9988 616 << pending_map.active_name;
7c673cae
FG
617 propose = true;
618 }
619 }
620
224ce89b 621 if (!pending_map.available &&
c07f9fc5 622 !ever_had_active_mgr &&
224ce89b 623 should_warn_about_mgr_down() != HEALTH_OK) {
3efd9988
FG
624 dout(10) << " exceeded mon_mgr_mkfs_grace "
625 << g_conf->get_val<int64_t>("mon_mgr_mkfs_grace")
626 << " seconds" << dendl;
224ce89b
WB
627 propose = true;
628 }
629
7c673cae
FG
630 if (propose) {
631 propose_pending();
632 }
633}
634
224ce89b
WB
635void MgrMonitor::on_restart()
636{
637 // Clear out the leader-specific state.
638 last_beacon.clear();
3efd9988 639 last_tick = ceph::coarse_mono_clock::now();
224ce89b
WB
640}
641
642
7c673cae
FG
643bool MgrMonitor::promote_standby()
644{
645 assert(pending_map.active_gid == 0);
646 if (pending_map.standbys.size()) {
647 // Promote a replacement (arbitrary choice of standby)
648 auto replacement_gid = pending_map.standbys.begin()->first;
649 pending_map.active_gid = replacement_gid;
650 pending_map.active_name = pending_map.standbys.at(replacement_gid).name;
651 pending_map.available = false;
652 pending_map.active_addr = entity_addr_t();
653
181888fb
FG
654 drop_standby(replacement_gid, false);
655
7c673cae
FG
656 return true;
657 } else {
658 return false;
659 }
660}
661
662void MgrMonitor::drop_active()
663{
664 if (last_beacon.count(pending_map.active_gid) > 0) {
665 last_beacon.erase(pending_map.active_gid);
666 }
667
c07f9fc5
FG
668 pending_metadata_rm.insert(pending_map.active_name);
669 pending_metadata.erase(pending_map.active_name);
7c673cae
FG
670 pending_map.active_name = "";
671 pending_map.active_gid = 0;
672 pending_map.available = false;
673 pending_map.active_addr = entity_addr_t();
3efd9988 674 pending_map.services.clear();
224ce89b
WB
675
676 // So that when new active mgr subscribes to mgrdigest, it will
677 // get an immediate response instead of waiting for next timer
678 cancel_timer();
7c673cae
FG
679}
680
181888fb 681void MgrMonitor::drop_standby(uint64_t gid, bool drop_meta)
7c673cae 682{
181888fb
FG
683 if (drop_meta) {
684 pending_metadata_rm.insert(pending_map.standbys[gid].name);
685 pending_metadata.erase(pending_map.standbys[gid].name);
686 }
7c673cae
FG
687 pending_map.standbys.erase(gid);
688 if (last_beacon.count(gid) > 0) {
689 last_beacon.erase(gid);
690 }
7c673cae
FG
691}
692
693bool MgrMonitor::preprocess_command(MonOpRequestRef op)
694{
31f18b77
FG
695 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
696 std::stringstream ss;
697 bufferlist rdata;
698
699 std::map<std::string, cmd_vartype> cmdmap;
700 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
701 string rs = ss.str();
702 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
703 return true;
704 }
7c673cae 705
31f18b77
FG
706 MonSession *session = m->get_session();
707 if (!session) {
708 mon->reply_command(op, -EACCES, "access denied", rdata,
709 get_last_committed());
710 return true;
711 }
712
713 string format;
714 cmd_getval(g_ceph_context, cmdmap, "format", format, string("json-pretty"));
715 boost::scoped_ptr<Formatter> f(Formatter::create(format));
716
717 string prefix;
718 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
719 int r = 0;
720
721 if (prefix == "mgr dump") {
722 int64_t epoch = 0;
723 cmd_getval(g_ceph_context, cmdmap, "epoch", epoch, (int64_t)map.get_epoch());
724 if (epoch == (int64_t)map.get_epoch()) {
725 f->dump_object("mgrmap", map);
726 } else {
727 bufferlist bl;
728 int err = get_version(epoch, bl);
729 if (err == -ENOENT) {
730 r = -ENOENT;
731 ss << "there is no map for epoch " << epoch;
732 goto reply;
733 }
734 MgrMap m;
735 auto p = bl.begin();
736 m.decode(p);
737 f->dump_object("mgrmap", m);
738 }
739 f->flush(rdata);
224ce89b 740 } else if (prefix == "mgr module ls") {
3efd9988
FG
741 f->open_object_section("modules");
742 {
743 f->open_array_section("enabled_modules");
744 for (auto& p : map.modules) {
745 f->dump_string("module", p);
746 }
747 f->close_section();
748 f->open_array_section("disabled_modules");
749 for (auto& p : map.available_modules) {
750 if (map.modules.count(p) == 0) {
751 f->dump_string("module", p);
752 }
753 }
754 f->close_section();
755 }
756 f->close_section();
757 f->flush(rdata);
758 } else if (prefix == "mgr services") {
759 f->open_object_section("services");
760 for (const auto &i : map.services) {
761 f->dump_string(i.first.c_str(), i.second);
224ce89b
WB
762 }
763 f->close_section();
764 f->flush(rdata);
c07f9fc5
FG
765 } else if (prefix == "mgr metadata") {
766 string name;
767 cmd_getval(g_ceph_context, cmdmap, "id", name);
768 if (name.size() > 0 && !map.have_name(name)) {
769 ss << "mgr." << name << " does not exist";
770 r = -ENOENT;
771 goto reply;
772 }
773 string format;
774 cmd_getval(g_ceph_context, cmdmap, "format", format);
775 boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty"));
776 if (name.size()) {
777 f->open_object_section("mgr_metadata");
778 f->dump_string("id", name);
779 r = dump_metadata(name, f.get(), &ss);
780 if (r < 0)
781 goto reply;
782 f->close_section();
783 } else {
784 r = 0;
785 f->open_array_section("mgr_metadata");
786 for (auto& i : map.get_all_names()) {
787 f->open_object_section("mgr");
788 f->dump_string("id", i);
789 r = dump_metadata(i, f.get(), NULL);
790 if (r == -EINVAL || r == -ENOENT) {
791 // Drop error, continue to get other daemons' metadata
792 dout(4) << "No metadata for mgr." << i << dendl;
793 r = 0;
794 } else if (r < 0) {
795 // Unexpected error
796 goto reply;
797 }
798 f->close_section();
799 }
800 f->close_section();
801 }
802 f->flush(rdata);
803 } else if (prefix == "mgr versions") {
804 if (!f)
805 f.reset(Formatter::create("json-pretty"));
806 count_metadata("ceph_version", f.get());
807 f->flush(rdata);
808 r = 0;
809 } else if (prefix == "mgr count-metadata") {
810 if (!f)
811 f.reset(Formatter::create("json-pretty"));
812 string field;
813 cmd_getval(g_ceph_context, cmdmap, "property", field);
814 count_metadata(field, f.get());
815 f->flush(rdata);
816 r = 0;
31f18b77
FG
817 } else {
818 return false;
819 }
820
821reply:
822 string rs;
823 getline(ss, rs);
824 mon->reply_command(op, r, rs, rdata, get_last_committed());
825 return true;
7c673cae
FG
826}
827
828bool MgrMonitor::prepare_command(MonOpRequestRef op)
829{
830 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
831
832 std::stringstream ss;
833 bufferlist rdata;
834
835 std::map<std::string, cmd_vartype> cmdmap;
836 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
837 string rs = ss.str();
838 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
839 return true;
840 }
841
842 MonSession *session = m->get_session();
843 if (!session) {
844 mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed());
845 return true;
846 }
847
224ce89b
WB
848 string format;
849 cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
850 boost::scoped_ptr<Formatter> f(Formatter::create(format));
851
7c673cae
FG
852 string prefix;
853 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
854
855 int r = 0;
856
857 if (prefix == "mgr fail") {
858 string who;
859 cmd_getval(g_ceph_context, cmdmap, "who", who);
860
861 std::string err;
862 uint64_t gid = strict_strtol(who.c_str(), 10, &err);
863 bool changed = false;
864 if (!err.empty()) {
865 // Does not parse as a gid, treat it as a name
866 if (pending_map.active_name == who) {
867 drop_active();
868 changed = true;
869 } else {
870 gid = 0;
871 for (const auto &i : pending_map.standbys) {
872 if (i.second.name == who) {
873 gid = i.first;
874 break;
875 }
876 }
877 if (gid != 0) {
878 drop_standby(gid);
879 changed = true;
880 } else {
881 ss << "Daemon not found '" << who << "', already failed?";
882 }
883 }
884 } else {
885 if (pending_map.active_gid == gid) {
886 drop_active();
887 changed = true;
888 } else if (pending_map.standbys.count(gid) > 0) {
889 drop_standby(gid);
890 changed = true;
891 } else {
892 ss << "Daemon not found '" << gid << "', already failed?";
893 }
894 }
895
896 if (changed && pending_map.active_gid == 0) {
897 promote_standby();
898 }
224ce89b
WB
899 } else if (prefix == "mgr module enable") {
900 string module;
901 cmd_getval(g_ceph_context, cmdmap, "module", module);
902 if (module.empty()) {
903 r = -EINVAL;
904 goto out;
905 }
906 string force;
907 cmd_getval(g_ceph_context, cmdmap, "force", force);
908 if (!pending_map.all_support_module(module) &&
909 force != "--force") {
910 ss << "all mgr daemons do not support module '" << module << "', pass "
911 << "--force to force enablement";
912 r = -ENOENT;
913 goto out;
914 }
915 pending_map.modules.insert(module);
916 } else if (prefix == "mgr module disable") {
917 string module;
918 cmd_getval(g_ceph_context, cmdmap, "module", module);
919 if (module.empty()) {
920 r = -EINVAL;
921 goto out;
922 }
923 pending_map.modules.erase(module);
7c673cae 924 } else {
224ce89b 925 ss << "Command '" << prefix << "' not implemented!";
7c673cae
FG
926 r = -ENOSYS;
927 }
928
224ce89b 929out:
7c673cae
FG
930 dout(4) << __func__ << " done, r=" << r << dendl;
931 /* Compose response */
932 string rs;
933 getline(ss, rs);
934
935 if (r >= 0) {
936 // success.. delay reply
937 wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs,
938 get_last_committed() + 1));
939 return true;
940 } else {
941 // reply immediately
942 mon->reply_command(op, r, rs, rdata, get_last_committed());
943 return false;
944 }
945}
946
947void MgrMonitor::init()
948{
31f18b77 949 if (digest_event == nullptr) {
7c673cae
FG
950 send_digests(); // To get it to schedule its own event
951 }
952}
953
954void MgrMonitor::on_shutdown()
955{
31f18b77 956 cancel_timer();
7c673cae
FG
957}
958
c07f9fc5
FG
959int MgrMonitor::load_metadata(const string& name, std::map<string, string>& m,
960 ostream *err)
961{
962 bufferlist bl;
963 int r = mon->store->get(MGR_METADATA_PREFIX, name, bl);
964 if (r < 0)
965 return r;
966 try {
967 bufferlist::iterator p = bl.begin();
968 ::decode(m, p);
969 }
970 catch (buffer::error& e) {
971 if (err)
972 *err << "mgr." << name << " metadata is corrupt";
973 return -EIO;
974 }
975 return 0;
976}
977
978void MgrMonitor::count_metadata(const string& field, std::map<string,int> *out)
979{
980 std::set<string> ls = map.get_all_names();
981 for (auto& name : ls) {
982 std::map<string,string> meta;
983 load_metadata(name, meta, nullptr);
984 auto p = meta.find(field);
985 if (p == meta.end()) {
986 (*out)["unknown"]++;
987 } else {
988 (*out)[p->second]++;
989 }
990 }
991}
992
993void MgrMonitor::count_metadata(const string& field, Formatter *f)
994{
995 std::map<string,int> by_val;
996 count_metadata(field, &by_val);
997 f->open_object_section(field.c_str());
998 for (auto& p : by_val) {
999 f->dump_int(p.first.c_str(), p.second);
1000 }
1001 f->close_section();
1002}
1003
1004int MgrMonitor::dump_metadata(const string& name, Formatter *f, ostream *err)
1005{
1006 std::map<string,string> m;
1007 if (int r = load_metadata(name, m, err))
1008 return r;
1009 for (auto& p : m) {
1010 f->dump_string(p.first.c_str(), p.second);
1011 }
1012 return 0;
1013}
31f18b77 1014
d2e6a577
FG
1015const std::vector<MonCommand> &MgrMonitor::get_command_descs() const
1016{
1017 if (command_descs.empty()) {
1018 // must have just upgraded; fallback to static commands
1019 return mgr_commands;
1020 } else {
1021 return command_descs;
1022 }
1023}