]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MgrMonitor.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / mon / MgrMonitor.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 John Spray <john.spray@redhat.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 */
13
224ce89b
WB
14#include <boost/tokenizer.hpp>
15
7c673cae
FG
16#include "messages/MMgrBeacon.h"
17#include "messages/MMgrMap.h"
18#include "messages/MMgrDigest.h"
19
7c673cae
FG
20#include "include/stringify.h"
21#include "mgr/MgrContext.h"
c07f9fc5 22#include "mgr/mgr_commands.h"
7c673cae 23#include "OSDMonitor.h"
11fdf7f2 24#include "ConfigMonitor.h"
7c673cae
FG
25
26#include "MgrMonitor.h"
27
c07f9fc5
FG
28#define MGR_METADATA_PREFIX "mgr_metadata"
29
7c673cae
FG
30#define dout_subsys ceph_subsys_mon
31#undef dout_prefix
32#define dout_prefix _prefix(_dout, mon, map)
33static ostream& _prefix(std::ostream *_dout, Monitor *mon,
34 const MgrMap& mgrmap) {
35 return *_dout << "mon." << mon->name << "@" << mon->rank
36 << "(" << mon->get_state_name()
37 << ").mgr e" << mgrmap.get_epoch() << " ";
38}
39
11fdf7f2
TL
40// the system treats always_on_modules as if they provide built-in functionality
41// by ensuring that they are always enabled.
42const static std::map<uint32_t, std::set<std::string>> always_on_modules = {
43 {
44 CEPH_RELEASE_NAUTILUS, {
45 "crash",
46 "status",
47 "progress",
48 "balancer",
49 "devicehealth",
50 "orchestrator_cli",
51 "volumes",
52 }
53 }
54};
55
c07f9fc5
FG
56// Prefix for mon store of active mgr's command descriptions
57const static std::string command_descs_prefix = "mgr_command_descs";
58
11fdf7f2
TL
59const Option *MgrMonitor::find_module_option(const string& name)
60{
61 // we have two forms of names: "mgr/$module/$option" and
62 // localized "mgr/$module/$instance/$option". normalize to the
63 // former by stripping out $instance.
64 string real_name;
65 if (name.substr(0, 4) != "mgr/") {
66 return nullptr;
67 }
68 auto second_slash = name.find('/', 5);
69 if (second_slash == std::string::npos) {
70 return nullptr;
71 }
72 auto third_slash = name.find('/', second_slash + 1);
73 if (third_slash != std::string::npos) {
74 // drop the $instance part between the second and third slash
75 real_name = name.substr(0, second_slash) + name.substr(third_slash);
76 } else {
77 real_name = name;
78 }
79 auto p = mgr_module_options.find(real_name);
80 if (p != mgr_module_options.end()) {
81 return &p->second;
82 }
83 return nullptr;
84}
31f18b77 85
11fdf7f2 86version_t MgrMonitor::get_trim_to() const
b32b8144 87{
11fdf7f2 88 int64_t max = g_conf().get_val<int64_t>("mon_max_mgrmap_epochs");
b32b8144
FG
89 if (map.epoch > max) {
90 return map.epoch - max;
91 }
92 return 0;
93}
94
7c673cae
FG
95void MgrMonitor::create_initial()
96{
3efd9988 97 // Take a local copy of initial_modules for tokenizer to iterate over.
11fdf7f2 98 auto initial_modules = g_conf().get_val<std::string>("mgr_initial_modules");
3efd9988 99 boost::tokenizer<> tok(initial_modules);
224ce89b
WB
100 for (auto& m : tok) {
101 pending_map.modules.insert(m);
102 }
11fdf7f2 103 pending_map.always_on_modules = always_on_modules;
c07f9fc5
FG
104 pending_command_descs = mgr_commands;
105 dout(10) << __func__ << " initial modules " << pending_map.modules
11fdf7f2
TL
106 << ", always on modules " << pending_map.get_always_on_modules()
107 << ", " << pending_command_descs.size() << " commands"
c07f9fc5 108 << dendl;
7c673cae
FG
109}
110
11fdf7f2 111void MgrMonitor::get_store_prefixes(std::set<string>& s) const
3efd9988
FG
112{
113 s.insert(service_name);
114 s.insert(command_descs_prefix);
115 s.insert(MGR_METADATA_PREFIX);
116}
117
7c673cae
FG
118void MgrMonitor::update_from_paxos(bool *need_bootstrap)
119{
120 version_t version = get_last_committed();
121 if (version != map.epoch) {
122 dout(4) << "loading version " << version << dendl;
123
124 bufferlist bl;
125 int err = get_version(version, bl);
11fdf7f2 126 ceph_assert(err == 0);
7c673cae 127
c07f9fc5
FG
128 bool old_available = map.get_available();
129 uint64_t old_gid = map.get_active_gid();
130
11fdf7f2 131 auto p = bl.cbegin();
7c673cae
FG
132 map.decode(p);
133
11fdf7f2 134 dout(4) << "active server: " << map.active_addrs
7c673cae
FG
135 << "(" << map.active_gid << ")" << dendl;
136
224ce89b
WB
137 ever_had_active_mgr = get_value("ever_had_active_mgr");
138
139 load_health();
140
7c673cae
FG
141 if (map.available) {
142 first_seen_inactive = utime_t();
143 } else {
144 first_seen_inactive = ceph_clock_now();
145 }
146
147 check_subs();
c07f9fc5
FG
148
149 if (version == 1
3efd9988
FG
150 || command_descs.empty()
151 || (map.get_available()
152 && (!old_available || old_gid != map.get_active_gid()))) {
c07f9fc5
FG
153 dout(4) << "mkfs or daemon transitioned to available, loading commands"
154 << dendl;
155 bufferlist loaded_commands;
156 int r = mon->store->get(command_descs_prefix, "", loaded_commands);
157 if (r < 0) {
158 derr << "Failed to load mgr commands: " << cpp_strerror(r) << dendl;
159 } else {
11fdf7f2
TL
160 auto p = loaded_commands.cbegin();
161 decode(command_descs, p);
162 }
163 }
164 }
165
166 // populate module options
167 mgr_module_options.clear();
168 misc_option_strings.clear();
169 for (auto& i : map.available_modules) {
170 for (auto& j : i.module_options) {
171 string name = string("mgr/") + i.name + "/" + j.second.name;
172 auto p = mgr_module_options.emplace(
173 name,
174 Option(name, static_cast<Option::type_t>(j.second.type),
175 static_cast<Option::level_t>(j.second.level)));
176 Option& opt = p.first->second;
177 opt.set_flags(static_cast<Option::flag_t>(j.second.flags));
178 opt.set_flag(Option::FLAG_MGR);
179 opt.set_description(j.second.desc.c_str());
180 opt.set_long_description(j.second.long_desc.c_str());
181 for (auto& k : j.second.tags) {
182 opt.add_tag(k.c_str());
183 }
184 for (auto& k : j.second.see_also) {
185 if (i.module_options.count(k)) {
186 // it's another module option
187 misc_option_strings.push_back(string("mgr/") + i.name + "/" + k);
188 opt.add_see_also(misc_option_strings.back().c_str());
189 } else {
190 // it's a native option
191 opt.add_see_also(k.c_str());
192 }
193 }
194 Option::value_t v, v2;
195 std::string err;
196 if (j.second.default_value.size() &&
197 !opt.parse_value(j.second.default_value, &v, &err)) {
198 opt.set_default(v);
199 }
200 if (j.second.min.size() &&
201 j.second.max.size() &&
202 !opt.parse_value(j.second.min, &v, &err) &&
203 !opt.parse_value(j.second.max, &v2, &err)) {
204 opt.set_min_max(v, v2);
c07f9fc5 205 }
11fdf7f2
TL
206 std::vector<const char *> enum_allowed;
207 for (auto& k : j.second.enum_allowed) {
208 enum_allowed.push_back(k.c_str());
209 }
210 opt.set_enum_allowed(enum_allowed);
c07f9fc5 211 }
7c673cae 212 }
11fdf7f2
TL
213 // force ConfigMonitor to refresh, since it uses const Option *
214 // pointers into our mgr_module_options (which we just rebuilt).
215 mon->configmon()->load_config();
7c673cae 216
11fdf7f2
TL
217 if (!mon->is_init()) {
218 // feed our pet MgrClient, unless we are in Monitor::[pre]init()
219 prime_mgr_client();
220 }
221}
222
223void MgrMonitor::prime_mgr_client()
224{
225 dout(10) << __func__ << dendl;
7c673cae
FG
226 mon->mgr_client.ms_dispatch(new MMgrMap(map));
227}
228
229void MgrMonitor::create_pending()
230{
231 pending_map = map;
232 pending_map.epoch++;
233}
234
224ce89b
WB
235health_status_t MgrMonitor::should_warn_about_mgr_down()
236{
237 utime_t now = ceph_clock_now();
238 // we warn if
239 // - we've ever had an active mgr, or
240 // - we have osds AND we've exceeded the grace period
241 // which means a new mon cluster and be HEALTH_OK indefinitely as long as
242 // no OSDs are ever created.
243 if (ever_had_active_mgr ||
244 (mon->osdmon()->osdmap.get_num_osds() > 0 &&
11fdf7f2 245 now > mon->monmap->created + g_conf().get_val<int64_t>("mon_mgr_mkfs_grace"))) {
224ce89b
WB
246 health_status_t level = HEALTH_WARN;
247 if (first_seen_inactive != utime_t() &&
11fdf7f2 248 now - first_seen_inactive > g_conf().get_val<int64_t>("mon_mgr_inactive_grace")) {
224ce89b
WB
249 level = HEALTH_ERR;
250 }
251 return level;
252 }
253 return HEALTH_OK;
254}
255
11fdf7f2
TL
256void MgrMonitor::post_paxos_update()
257{
258 // are we handling digest subscribers?
259 if (digest_event) {
260 bool send = false;
261 if (prev_health_checks.empty()) {
262 prev_health_checks.resize(mon->paxos_service.size());
263 send = true;
264 }
265 ceph_assert(prev_health_checks.size() == mon->paxos_service.size());
266 for (auto i = 0u; i < prev_health_checks.size(); i++) {
267 const auto& curr = mon->paxos_service[i]->get_health_checks();
268 if (!send && curr != prev_health_checks[i]) {
269 send = true;
270 }
271 prev_health_checks[i] = curr;
272 }
273 if (send) {
274 if (is_active()) {
275 send_digests();
276 } else {
277 cancel_timer();
278 wait_for_active_ctx(new C_MonContext(mon, [this](int) {
279 send_digests();
280 }));
281 }
282 }
283 }
284}
285
7c673cae
FG
286void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t)
287{
288 dout(10) << __func__ << " " << pending_map << dendl;
289 bufferlist bl;
290 pending_map.encode(bl, mon->get_quorum_con_features());
291 put_version(t, pending_map.epoch, bl);
292 put_last_committed(t, pending_map.epoch);
224ce89b 293
c07f9fc5
FG
294 for (auto& p : pending_metadata) {
295 dout(10) << __func__ << " set metadata for " << p.first << dendl;
296 t->put(MGR_METADATA_PREFIX, p.first, p.second);
297 }
298 for (auto& name : pending_metadata_rm) {
299 dout(10) << __func__ << " rm metadata for " << name << dendl;
300 t->erase(MGR_METADATA_PREFIX, name);
301 }
302 pending_metadata.clear();
303 pending_metadata_rm.clear();
304
224ce89b
WB
305 health_check_map_t next;
306 if (pending_map.active_gid == 0) {
307 auto level = should_warn_about_mgr_down();
308 if (level != HEALTH_OK) {
309 next.add("MGR_DOWN", level, "no active mgr");
310 } else {
311 dout(10) << __func__ << " no health warning (never active and new cluster)"
312 << dendl;
313 }
314 } else {
315 put_value(t, "ever_had_active_mgr", 1);
316 }
317 encode_health(next, t);
c07f9fc5
FG
318
319 if (pending_command_descs.size()) {
320 dout(4) << __func__ << " encoding " << pending_command_descs.size()
321 << " command_descs" << dendl;
322 for (auto& p : pending_command_descs) {
323 p.set_flag(MonCommand::FLAG_MGR);
324 }
325 bufferlist bl;
11fdf7f2 326 encode(pending_command_descs, bl);
c07f9fc5
FG
327 t->put(command_descs_prefix, "", bl);
328 pending_command_descs.clear();
329 }
7c673cae
FG
330}
331
332bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid)
333{
334 // check permissions
335 MonSession *session = op->get_session();
336 if (!session)
337 return false;
338 if (!session->is_capable("mgr", MON_CAP_X)) {
339 dout(1) << __func__ << " insufficient caps " << session->caps << dendl;
340 return false;
341 }
342 if (fsid != mon->monmap->fsid) {
343 dout(1) << __func__ << " op fsid " << fsid
344 << " != " << mon->monmap->fsid << dendl;
345 return false;
346 }
347 return true;
348}
349
350bool MgrMonitor::preprocess_query(MonOpRequestRef op)
351{
352 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
353 switch (m->get_type()) {
354 case MSG_MGR_BEACON:
355 return preprocess_beacon(op);
356 case MSG_MON_COMMAND:
f64942e4
AA
357 try {
358 return preprocess_command(op);
11fdf7f2 359 } catch (const bad_cmd_get& e) {
f64942e4
AA
360 bufferlist bl;
361 mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed());
362 return true;
363 }
364
7c673cae
FG
365 default:
366 mon->no_reply(op);
367 derr << "Unhandled message type " << m->get_type() << dendl;
368 return true;
369 }
370}
371
372bool MgrMonitor::prepare_update(MonOpRequestRef op)
373{
374 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
375 switch (m->get_type()) {
376 case MSG_MGR_BEACON:
377 return prepare_beacon(op);
378
379 case MSG_MON_COMMAND:
f64942e4
AA
380 try {
381 return prepare_command(op);
11fdf7f2 382 } catch (const bad_cmd_get& e) {
f64942e4
AA
383 bufferlist bl;
384 mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed());
385 return true;
386 }
7c673cae
FG
387
388 default:
389 mon->no_reply(op);
390 derr << "Unhandled message type " << m->get_type() << dendl;
391 return true;
392 }
393}
394
395
396
397class C_Updated : public Context {
398 MgrMonitor *mm;
399 MonOpRequestRef op;
400public:
401 C_Updated(MgrMonitor *a, MonOpRequestRef c) :
402 mm(a), op(c) {}
403 void finish(int r) override {
404 if (r >= 0) {
405 // Success
406 } else if (r == -ECANCELED) {
407 mm->mon->no_reply(op);
408 } else {
409 mm->dispatch(op); // try again
410 }
411 }
412};
413
414bool MgrMonitor::preprocess_beacon(MonOpRequestRef op)
415{
416 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
94b18763 417 mon->no_reply(op); // we never reply to beacons
7c673cae
FG
418 dout(4) << "beacon from " << m->get_gid() << dendl;
419
420 if (!check_caps(op, m->get_fsid())) {
421 // drop it on the floor
422 return true;
423 }
424
425 // always send this to the leader's prepare_beacon()
426 return false;
427}
428
429bool MgrMonitor::prepare_beacon(MonOpRequestRef op)
430{
431 MMgrBeacon *m = static_cast<MMgrBeacon*>(op->get_req());
432 dout(4) << "beacon from " << m->get_gid() << dendl;
433
434 // See if we are seeing same name, new GID for the active daemon
435 if (m->get_name() == pending_map.active_name
436 && m->get_gid() != pending_map.active_gid)
437 {
438 dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl;
224ce89b
WB
439 mon->clog->info() << "Active manager daemon " << m->get_name()
440 << " restarted";
7c673cae
FG
441 drop_active();
442 }
443
444 // See if we are seeing same name, new GID for any standbys
445 for (const auto &i : pending_map.standbys) {
11fdf7f2 446 const MgrMap::StandbyInfo &s = i.second;
7c673cae
FG
447 if (s.name == m->get_name() && s.gid != m->get_gid()) {
448 dout(4) << "Standby daemon restart (mgr." << m->get_name() << ")" << dendl;
224ce89b
WB
449 mon->clog->debug() << "Standby manager daemon " << m->get_name()
450 << " restarted";
7c673cae
FG
451 drop_standby(i.first);
452 break;
453 }
454 }
455
31f18b77 456 last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now();
7c673cae
FG
457
458 // Track whether we modified pending_map
459 bool updated = false;
460
461 if (pending_map.active_gid == m->get_gid()) {
3efd9988
FG
462 if (pending_map.services != m->get_services()) {
463 dout(4) << "updated services from mgr." << m->get_name()
464 << ": " << m->get_services() << dendl;
465 pending_map.services = m->get_services();
466 updated = true;
467 }
468
7c673cae 469 // A beacon from the currently active daemon
11fdf7f2
TL
470 if (pending_map.active_addrs != m->get_server_addrs()) {
471 dout(4) << "learned address " << m->get_server_addrs()
472 << " (was " << pending_map.active_addrs << ")" << dendl;
473 pending_map.active_addrs = m->get_server_addrs();
7c673cae
FG
474 updated = true;
475 }
476
477 if (pending_map.get_available() != m->get_available()) {
478 dout(4) << "available " << m->get_gid() << dendl;
224ce89b
WB
479 mon->clog->info() << "Manager daemon " << pending_map.active_name
480 << " is now available";
c07f9fc5
FG
481
482 // This beacon should include command descriptions
483 pending_command_descs = m->get_command_descs();
484 if (pending_command_descs.empty()) {
485 // This should not happen, but it also isn't fatal: we just
486 // won't successfully update our list of commands.
487 dout(4) << "First available beacon from " << pending_map.active_name
488 << "(" << m->get_gid() << ") does not include command descs"
489 << dendl;
490 } else {
491 dout(4) << "First available beacon from " << pending_map.active_name
492 << "(" << m->get_gid() << ") includes "
493 << pending_command_descs.size() << " command descs" << dendl;
494 }
495
7c673cae
FG
496 pending_map.available = m->get_available();
497 updated = true;
498 }
224ce89b
WB
499 if (pending_map.available_modules != m->get_available_modules()) {
500 dout(4) << "available_modules " << m->get_available_modules()
501 << " (was " << pending_map.available_modules << ")" << dendl;
502 pending_map.available_modules = m->get_available_modules();
503 updated = true;
504 }
7c673cae
FG
505 } else if (pending_map.active_gid == 0) {
506 // There is no currently active daemon, select this one.
507 if (pending_map.standbys.count(m->get_gid())) {
181888fb 508 drop_standby(m->get_gid(), false);
7c673cae
FG
509 }
510 dout(4) << "selecting new active " << m->get_gid()
511 << " " << m->get_name()
512 << " (was " << pending_map.active_gid << " "
513 << pending_map.active_name << ")" << dendl;
514 pending_map.active_gid = m->get_gid();
515 pending_map.active_name = m->get_name();
11fdf7f2 516 pending_map.active_change = ceph_clock_now();
224ce89b 517 pending_map.available_modules = m->get_available_modules();
11fdf7f2 518 encode(m->get_metadata(), pending_metadata[m->get_name()]);
c07f9fc5 519 pending_metadata_rm.erase(m->get_name());
224ce89b
WB
520
521 mon->clog->info() << "Activating manager daemon "
522 << pending_map.active_name;
7c673cae
FG
523
524 updated = true;
525 } else {
526 if (pending_map.standbys.count(m->get_gid()) > 0) {
527 dout(10) << "from existing standby " << m->get_gid() << dendl;
224ce89b
WB
528 if (pending_map.standbys[m->get_gid()].available_modules !=
529 m->get_available_modules()) {
530 dout(10) << "existing standby " << m->get_gid() << " available_modules "
531 << m->get_available_modules() << " (was "
532 << pending_map.standbys[m->get_gid()].available_modules << ")"
533 << dendl;
534 pending_map.standbys[m->get_gid()].available_modules =
535 m->get_available_modules();
536 updated = true;
537 }
7c673cae
FG
538 } else {
539 dout(10) << "new standby " << m->get_gid() << dendl;
224ce89b
WB
540 mon->clog->debug() << "Standby manager daemon " << m->get_name()
541 << " started";
c07f9fc5
FG
542 pending_map.standbys[m->get_gid()] = {m->get_gid(), m->get_name(),
543 m->get_available_modules()};
11fdf7f2 544 encode(m->get_metadata(), pending_metadata[m->get_name()]);
c07f9fc5 545 pending_metadata_rm.erase(m->get_name());
7c673cae
FG
546 updated = true;
547 }
548 }
549
550 if (updated) {
551 dout(4) << "updating map" << dendl;
552 wait_for_finished_proposal(op, new C_Updated(this, op));
553 } else {
554 dout(10) << "no change" << dendl;
555 }
556
557 return updated;
558}
559
560void MgrMonitor::check_subs()
561{
562 const std::string type = "mgrmap";
563 if (mon->session_map.subs.count(type) == 0)
564 return;
565 for (auto sub : *(mon->session_map.subs[type])) {
566 check_sub(sub);
567 }
568}
569
570void MgrMonitor::check_sub(Subscription *sub)
571{
572 if (sub->type == "mgrmap") {
573 if (sub->next <= map.get_epoch()) {
224ce89b
WB
574 dout(20) << "Sending map to subscriber " << sub->session->con
575 << " " << sub->session->con->get_peer_addr() << dendl;
7c673cae
FG
576 sub->session->con->send_message(new MMgrMap(map));
577 if (sub->onetime) {
578 mon->session_map.remove_sub(sub);
579 } else {
580 sub->next = map.get_epoch() + 1;
581 }
582 }
583 } else {
11fdf7f2 584 ceph_assert(sub->type == "mgrdigest");
c07f9fc5
FG
585 if (sub->next == 0) {
586 // new registration; cancel previous timer
587 cancel_timer();
588 }
31f18b77 589 if (digest_event == nullptr) {
7c673cae
FG
590 send_digests();
591 }
592 }
593}
594
595/**
596 * Handle digest subscriptions separately (outside of check_sub) because
597 * they are going to be periodic rather than version-driven.
598 */
599void MgrMonitor::send_digests()
600{
31f18b77
FG
601 cancel_timer();
602
7c673cae 603 const std::string type = "mgrdigest";
11fdf7f2
TL
604 if (mon->session_map.subs.count(type) == 0) {
605 prev_health_checks.clear();
7c673cae 606 return;
11fdf7f2 607 }
7c673cae 608
b32b8144
FG
609 if (!is_active()) {
610 // if paxos is currently not active, don't send a digest but reenable timer
611 goto timer;
612 }
613 dout(10) << __func__ << dendl;
614
7c673cae 615 for (auto sub : *(mon->session_map.subs[type])) {
224ce89b
WB
616 dout(10) << __func__ << " sending digest to subscriber " << sub->session->con
617 << " " << sub->session->con->get_peer_addr() << dendl;
7c673cae
FG
618 MMgrDigest *mdigest = new MMgrDigest;
619
620 JSONFormatter f;
224ce89b 621 mon->get_health_status(true, &f, nullptr, nullptr, nullptr);
7c673cae
FG
622 f.flush(mdigest->health_json);
623 f.reset();
624
625 std::ostringstream ss;
626 mon->get_mon_status(&f, ss);
627 f.flush(mdigest->mon_status_json);
628 f.reset();
629
630 sub->session->con->send_message(mdigest);
631 }
632
b32b8144 633timer:
3efd9988 634 digest_event = mon->timer.add_event_after(
11fdf7f2 635 g_conf().get_val<int64_t>("mon_mgr_digest_period"),
3efd9988 636 new C_MonContext(mon, [this](int) {
7c673cae 637 send_digests();
3efd9988 638 }));
31f18b77
FG
639}
640
641void MgrMonitor::cancel_timer()
642{
643 if (digest_event) {
644 mon->timer.cancel_event(digest_event);
645 digest_event = nullptr;
646 }
7c673cae
FG
647}
648
649void MgrMonitor::on_active()
650{
224ce89b
WB
651 if (mon->is_leader()) {
652 mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map;
7c673cae 653
11fdf7f2
TL
654 if (HAVE_FEATURE(mon->get_quorum_con_features(), SERVER_NAUTILUS) &&
655 pending_map.always_on_modules != always_on_modules) {
656 pending_map.always_on_modules = always_on_modules;
657 dout(4) << "always on modules changed, pending "
658 << pending_map.get_always_on_modules()
659 << " != wanted " << always_on_modules << dendl;
660 propose_pending();
7c673cae 661 }
7c673cae
FG
662 }
663}
664
665void MgrMonitor::tick()
666{
667 if (!is_active() || !mon->is_leader())
668 return;
669
31f18b77 670 const auto now = ceph::coarse_mono_clock::now();
3efd9988 671
11fdf7f2
TL
672 const auto mgr_beacon_grace =
673 g_conf().get_val<std::chrono::seconds>("mon_mgr_beacon_grace");
3efd9988
FG
674
675 // Note that this is the mgr daemon's tick period, not ours (the
676 // beacon is sent with this period).
11fdf7f2
TL
677 const auto mgr_tick_period =
678 g_conf().get_val<std::chrono::seconds>("mgr_tick_period");
3efd9988
FG
679
680 if (last_tick != ceph::coarse_mono_clock::time_point::min()
681 && (now - last_tick > (mgr_beacon_grace - mgr_tick_period))) {
682 // This case handles either local slowness (calls being delayed
683 // for whatever reason) or cluster election slowness (a long gap
684 // between calls while an election happened)
685 dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
686 "(slow election?) of " << now - last_tick << " seconds" << dendl;
687 for (auto &i : last_beacon) {
688 i.second = now;
689 }
690 }
691
692 last_tick = now;
7c673cae
FG
693
694 // Populate any missing beacons (i.e. no beacon since MgrMonitor
695 // instantiation) with the current time, so that they will
696 // eventually look laggy if they fail to give us a beacon.
697 if (pending_map.active_gid != 0
698 && last_beacon.count(pending_map.active_gid) == 0) {
699 last_beacon[pending_map.active_gid] = now;
700 }
701 for (auto s : pending_map.standbys) {
702 if (last_beacon.count(s.first) == 0) {
703 last_beacon[s.first] = now;
704 }
705 }
706
707 // Cull standbys first so that any remaining standbys
708 // will be eligible to take over from the active if we cull him.
709 std::list<uint64_t> dead_standbys;
3efd9988 710 const auto cutoff = now - mgr_beacon_grace;
7c673cae
FG
711 for (const auto &i : pending_map.standbys) {
712 auto last_beacon_time = last_beacon.at(i.first);
713 if (last_beacon_time < cutoff) {
714 dead_standbys.push_back(i.first);
715 }
716 }
717
718 bool propose = false;
719
720 for (auto i : dead_standbys) {
721 dout(4) << "Dropping laggy standby " << i << dendl;
722 drop_standby(i);
723 propose = true;
724 }
725
726 if (pending_map.active_gid != 0
727 && last_beacon.at(pending_map.active_gid) < cutoff) {
224ce89b 728 const std::string old_active_name = pending_map.active_name;
7c673cae
FG
729 drop_active();
730 propose = true;
731 dout(4) << "Dropping active" << pending_map.active_gid << dendl;
732 if (promote_standby()) {
733 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
224ce89b
WB
734 mon->clog->info() << "Manager daemon " << old_active_name
735 << " is unresponsive, replacing it with standby"
736 << " daemon " << pending_map.active_name;
7c673cae
FG
737 } else {
738 dout(4) << "Active is laggy but have no standbys to replace it" << dendl;
28e407b8 739 mon->clog->info() << "Manager daemon " << old_active_name
224ce89b 740 << " is unresponsive. No standby daemons available.";
7c673cae
FG
741 }
742 } else if (pending_map.active_gid == 0) {
743 if (promote_standby()) {
744 dout(4) << "Promoted standby " << pending_map.active_gid << dendl;
224ce89b 745 mon->clog->info() << "Activating manager daemon "
3efd9988 746 << pending_map.active_name;
7c673cae
FG
747 propose = true;
748 }
749 }
750
224ce89b 751 if (!pending_map.available &&
c07f9fc5 752 !ever_had_active_mgr &&
224ce89b 753 should_warn_about_mgr_down() != HEALTH_OK) {
3efd9988 754 dout(10) << " exceeded mon_mgr_mkfs_grace "
11fdf7f2 755 << g_conf().get_val<int64_t>("mon_mgr_mkfs_grace")
3efd9988 756 << " seconds" << dendl;
224ce89b
WB
757 propose = true;
758 }
759
7c673cae
FG
760 if (propose) {
761 propose_pending();
762 }
763}
764
224ce89b
WB
765void MgrMonitor::on_restart()
766{
767 // Clear out the leader-specific state.
768 last_beacon.clear();
3efd9988 769 last_tick = ceph::coarse_mono_clock::now();
224ce89b
WB
770}
771
772
7c673cae
FG
773bool MgrMonitor::promote_standby()
774{
11fdf7f2 775 ceph_assert(pending_map.active_gid == 0);
7c673cae
FG
776 if (pending_map.standbys.size()) {
777 // Promote a replacement (arbitrary choice of standby)
778 auto replacement_gid = pending_map.standbys.begin()->first;
779 pending_map.active_gid = replacement_gid;
780 pending_map.active_name = pending_map.standbys.at(replacement_gid).name;
781 pending_map.available = false;
11fdf7f2
TL
782 pending_map.active_addrs = entity_addrvec_t();
783 pending_map.active_change = ceph_clock_now();
7c673cae 784
181888fb
FG
785 drop_standby(replacement_gid, false);
786
7c673cae
FG
787 return true;
788 } else {
789 return false;
790 }
791}
792
793void MgrMonitor::drop_active()
794{
795 if (last_beacon.count(pending_map.active_gid) > 0) {
796 last_beacon.erase(pending_map.active_gid);
797 }
798
c07f9fc5
FG
799 pending_metadata_rm.insert(pending_map.active_name);
800 pending_metadata.erase(pending_map.active_name);
7c673cae
FG
801 pending_map.active_name = "";
802 pending_map.active_gid = 0;
11fdf7f2 803 pending_map.active_change = ceph_clock_now();
7c673cae 804 pending_map.available = false;
11fdf7f2 805 pending_map.active_addrs = entity_addrvec_t();
3efd9988 806 pending_map.services.clear();
224ce89b
WB
807
808 // So that when new active mgr subscribes to mgrdigest, it will
809 // get an immediate response instead of waiting for next timer
810 cancel_timer();
7c673cae
FG
811}
812
181888fb 813void MgrMonitor::drop_standby(uint64_t gid, bool drop_meta)
7c673cae 814{
181888fb
FG
815 if (drop_meta) {
816 pending_metadata_rm.insert(pending_map.standbys[gid].name);
817 pending_metadata.erase(pending_map.standbys[gid].name);
818 }
7c673cae
FG
819 pending_map.standbys.erase(gid);
820 if (last_beacon.count(gid) > 0) {
821 last_beacon.erase(gid);
822 }
7c673cae
FG
823}
824
825bool MgrMonitor::preprocess_command(MonOpRequestRef op)
826{
31f18b77
FG
827 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
828 std::stringstream ss;
829 bufferlist rdata;
830
11fdf7f2 831 cmdmap_t cmdmap;
31f18b77
FG
832 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
833 string rs = ss.str();
834 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
835 return true;
836 }
7c673cae 837
11fdf7f2 838 MonSession *session = op->get_session();
31f18b77
FG
839 if (!session) {
840 mon->reply_command(op, -EACCES, "access denied", rdata,
841 get_last_committed());
842 return true;
843 }
844
845 string format;
11fdf7f2 846 cmd_getval(g_ceph_context, cmdmap, "format", format, string("json-pretty"));
31f18b77
FG
847 boost::scoped_ptr<Formatter> f(Formatter::create(format));
848
849 string prefix;
11fdf7f2 850 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
31f18b77
FG
851 int r = 0;
852
853 if (prefix == "mgr dump") {
854 int64_t epoch = 0;
11fdf7f2 855 cmd_getval(g_ceph_context, cmdmap, "epoch", epoch, (int64_t)map.get_epoch());
31f18b77
FG
856 if (epoch == (int64_t)map.get_epoch()) {
857 f->dump_object("mgrmap", map);
858 } else {
859 bufferlist bl;
860 int err = get_version(epoch, bl);
861 if (err == -ENOENT) {
862 r = -ENOENT;
863 ss << "there is no map for epoch " << epoch;
864 goto reply;
865 }
866 MgrMap m;
11fdf7f2 867 auto p = bl.cbegin();
31f18b77
FG
868 m.decode(p);
869 f->dump_object("mgrmap", m);
870 }
871 f->flush(rdata);
224ce89b 872 } else if (prefix == "mgr module ls") {
3efd9988
FG
873 f->open_object_section("modules");
874 {
875 f->open_array_section("enabled_modules");
876 for (auto& p : map.modules) {
11fdf7f2
TL
877 if (map.get_always_on_modules().count(p) > 0)
878 continue;
879 // We only show the name for enabled modules. The any errors
880 // etc will show up as a health checks.
3efd9988
FG
881 f->dump_string("module", p);
882 }
883 f->close_section();
884 f->open_array_section("disabled_modules");
885 for (auto& p : map.available_modules) {
11fdf7f2
TL
886 if (map.modules.count(p.name) == 0 &&
887 map.get_always_on_modules().count(p.name) == 0) {
888 // For disabled modules, we show the full info, to
889 // give a hint about whether enabling it will work
890 p.dump(f.get());
3efd9988
FG
891 }
892 }
893 f->close_section();
894 }
895 f->close_section();
896 f->flush(rdata);
897 } else if (prefix == "mgr services") {
898 f->open_object_section("services");
899 for (const auto &i : map.services) {
900 f->dump_string(i.first.c_str(), i.second);
224ce89b
WB
901 }
902 f->close_section();
903 f->flush(rdata);
c07f9fc5
FG
904 } else if (prefix == "mgr metadata") {
905 string name;
11fdf7f2 906 cmd_getval(g_ceph_context, cmdmap, "who", name);
c07f9fc5
FG
907 if (name.size() > 0 && !map.have_name(name)) {
908 ss << "mgr." << name << " does not exist";
909 r = -ENOENT;
910 goto reply;
911 }
912 string format;
11fdf7f2 913 cmd_getval(g_ceph_context, cmdmap, "format", format);
c07f9fc5
FG
914 boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty"));
915 if (name.size()) {
916 f->open_object_section("mgr_metadata");
11fdf7f2 917 f->dump_string("name", name);
c07f9fc5
FG
918 r = dump_metadata(name, f.get(), &ss);
919 if (r < 0)
920 goto reply;
921 f->close_section();
922 } else {
923 r = 0;
924 f->open_array_section("mgr_metadata");
925 for (auto& i : map.get_all_names()) {
926 f->open_object_section("mgr");
11fdf7f2 927 f->dump_string("name", i);
c07f9fc5
FG
928 r = dump_metadata(i, f.get(), NULL);
929 if (r == -EINVAL || r == -ENOENT) {
930 // Drop error, continue to get other daemons' metadata
931 dout(4) << "No metadata for mgr." << i << dendl;
932 r = 0;
933 } else if (r < 0) {
934 // Unexpected error
935 goto reply;
936 }
937 f->close_section();
938 }
939 f->close_section();
940 }
941 f->flush(rdata);
942 } else if (prefix == "mgr versions") {
943 if (!f)
944 f.reset(Formatter::create("json-pretty"));
945 count_metadata("ceph_version", f.get());
946 f->flush(rdata);
947 r = 0;
948 } else if (prefix == "mgr count-metadata") {
949 if (!f)
950 f.reset(Formatter::create("json-pretty"));
951 string field;
11fdf7f2 952 cmd_getval(g_ceph_context, cmdmap, "property", field);
c07f9fc5
FG
953 count_metadata(field, f.get());
954 f->flush(rdata);
955 r = 0;
31f18b77
FG
956 } else {
957 return false;
958 }
959
960reply:
961 string rs;
962 getline(ss, rs);
963 mon->reply_command(op, r, rs, rdata, get_last_committed());
964 return true;
7c673cae
FG
965}
966
967bool MgrMonitor::prepare_command(MonOpRequestRef op)
968{
969 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
970
971 std::stringstream ss;
972 bufferlist rdata;
973
11fdf7f2 974 cmdmap_t cmdmap;
7c673cae
FG
975 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
976 string rs = ss.str();
977 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
978 return true;
979 }
980
11fdf7f2 981 MonSession *session = op->get_session();
7c673cae
FG
982 if (!session) {
983 mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed());
984 return true;
985 }
986
224ce89b 987 string format;
11fdf7f2 988 cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
224ce89b
WB
989 boost::scoped_ptr<Formatter> f(Formatter::create(format));
990
7c673cae 991 string prefix;
11fdf7f2 992 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
7c673cae
FG
993
994 int r = 0;
995
996 if (prefix == "mgr fail") {
997 string who;
11fdf7f2 998 cmd_getval(g_ceph_context, cmdmap, "who", who);
7c673cae
FG
999
1000 std::string err;
1001 uint64_t gid = strict_strtol(who.c_str(), 10, &err);
1002 bool changed = false;
1003 if (!err.empty()) {
1004 // Does not parse as a gid, treat it as a name
1005 if (pending_map.active_name == who) {
1006 drop_active();
1007 changed = true;
1008 } else {
1009 gid = 0;
1010 for (const auto &i : pending_map.standbys) {
1011 if (i.second.name == who) {
1012 gid = i.first;
1013 break;
1014 }
1015 }
1016 if (gid != 0) {
1017 drop_standby(gid);
1018 changed = true;
1019 } else {
1020 ss << "Daemon not found '" << who << "', already failed?";
1021 }
1022 }
1023 } else {
1024 if (pending_map.active_gid == gid) {
1025 drop_active();
1026 changed = true;
1027 } else if (pending_map.standbys.count(gid) > 0) {
1028 drop_standby(gid);
1029 changed = true;
1030 } else {
1031 ss << "Daemon not found '" << gid << "', already failed?";
1032 }
1033 }
1034
1035 if (changed && pending_map.active_gid == 0) {
1036 promote_standby();
1037 }
224ce89b
WB
1038 } else if (prefix == "mgr module enable") {
1039 string module;
11fdf7f2 1040 cmd_getval(g_ceph_context, cmdmap, "module", module);
224ce89b
WB
1041 if (module.empty()) {
1042 r = -EINVAL;
1043 goto out;
1044 }
11fdf7f2
TL
1045 if (pending_map.get_always_on_modules().count(module) > 0) {
1046 ss << "module '" << module << "' is already enabled (always-on)";
1047 goto out;
1048 }
224ce89b 1049 string force;
11fdf7f2 1050 cmd_getval(g_ceph_context, cmdmap, "force", force);
224ce89b
WB
1051 if (!pending_map.all_support_module(module) &&
1052 force != "--force") {
1053 ss << "all mgr daemons do not support module '" << module << "', pass "
1054 << "--force to force enablement";
1055 r = -ENOENT;
1056 goto out;
1057 }
11fdf7f2
TL
1058
1059 std::string can_run_error;
1060 if (force != "--force" && !pending_map.can_run_module(module, &can_run_error)) {
1061 ss << "module '" << module << "' reports that it cannot run on the active "
1062 "manager daemon: " << can_run_error << " (pass --force to force "
1063 "enablement)";
1064 r = -ENOENT;
1065 goto out;
1066 }
1067
1068 if (pending_map.module_enabled(module)) {
1069 ss << "module '" << module << "' is already enabled";
1070 r = 0;
1071 goto out;
1072 }
224ce89b
WB
1073 pending_map.modules.insert(module);
1074 } else if (prefix == "mgr module disable") {
1075 string module;
11fdf7f2 1076 cmd_getval(g_ceph_context, cmdmap, "module", module);
224ce89b
WB
1077 if (module.empty()) {
1078 r = -EINVAL;
1079 goto out;
1080 }
11fdf7f2
TL
1081 if (pending_map.get_always_on_modules().count(module) > 0) {
1082 ss << "module '" << module << "' cannot be disabled (always-on)";
1083 r = -EINVAL;
1084 goto out;
1085 }
1086 if (!pending_map.module_enabled(module)) {
1087 ss << "module '" << module << "' is already disabled";
1088 r = 0;
1089 goto out;
1090 }
1091 if (!pending_map.any_supports_module(module)) {
1092 ss << "module '" << module << "' does not exist";
1093 }
224ce89b 1094 pending_map.modules.erase(module);
7c673cae 1095 } else {
224ce89b 1096 ss << "Command '" << prefix << "' not implemented!";
7c673cae
FG
1097 r = -ENOSYS;
1098 }
1099
224ce89b 1100out:
7c673cae
FG
1101 dout(4) << __func__ << " done, r=" << r << dendl;
1102 /* Compose response */
1103 string rs;
1104 getline(ss, rs);
1105
1106 if (r >= 0) {
1107 // success.. delay reply
1108 wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs,
1109 get_last_committed() + 1));
1110 return true;
1111 } else {
1112 // reply immediately
1113 mon->reply_command(op, r, rs, rdata, get_last_committed());
1114 return false;
1115 }
1116}
1117
1118void MgrMonitor::init()
1119{
31f18b77 1120 if (digest_event == nullptr) {
7c673cae
FG
1121 send_digests(); // To get it to schedule its own event
1122 }
1123}
1124
1125void MgrMonitor::on_shutdown()
1126{
31f18b77 1127 cancel_timer();
7c673cae
FG
1128}
1129
c07f9fc5 1130int MgrMonitor::load_metadata(const string& name, std::map<string, string>& m,
11fdf7f2 1131 ostream *err) const
c07f9fc5
FG
1132{
1133 bufferlist bl;
1134 int r = mon->store->get(MGR_METADATA_PREFIX, name, bl);
1135 if (r < 0)
1136 return r;
1137 try {
11fdf7f2
TL
1138 auto p = bl.cbegin();
1139 decode(m, p);
c07f9fc5
FG
1140 }
1141 catch (buffer::error& e) {
1142 if (err)
1143 *err << "mgr." << name << " metadata is corrupt";
1144 return -EIO;
1145 }
1146 return 0;
1147}
1148
1149void MgrMonitor::count_metadata(const string& field, std::map<string,int> *out)
1150{
1151 std::set<string> ls = map.get_all_names();
1152 for (auto& name : ls) {
1153 std::map<string,string> meta;
1154 load_metadata(name, meta, nullptr);
1155 auto p = meta.find(field);
1156 if (p == meta.end()) {
1157 (*out)["unknown"]++;
1158 } else {
1159 (*out)[p->second]++;
1160 }
1161 }
1162}
1163
1164void MgrMonitor::count_metadata(const string& field, Formatter *f)
1165{
1166 std::map<string,int> by_val;
1167 count_metadata(field, &by_val);
1168 f->open_object_section(field.c_str());
1169 for (auto& p : by_val) {
1170 f->dump_int(p.first.c_str(), p.second);
1171 }
1172 f->close_section();
1173}
1174
1175int MgrMonitor::dump_metadata(const string& name, Formatter *f, ostream *err)
1176{
1177 std::map<string,string> m;
1178 if (int r = load_metadata(name, m, err))
1179 return r;
1180 for (auto& p : m) {
1181 f->dump_string(p.first.c_str(), p.second);
1182 }
1183 return 0;
1184}
31f18b77 1185
11fdf7f2
TL
1186void MgrMonitor::print_nodes(Formatter *f) const
1187{
1188 ceph_assert(f);
1189
1190 std::map<string, list<string> > mgrs; // hostname => mgr
1191 auto ls = map.get_all_names();
1192 for (auto& name : ls) {
1193 std::map<string,string> meta;
1194 if (load_metadata(name, meta, nullptr)) {
1195 continue;
1196 }
1197 auto hostname = meta.find("hostname");
1198 if (hostname == meta.end()) {
1199 // not likely though
1200 continue;
1201 }
1202 mgrs[hostname->second].push_back(name);
1203 }
1204
1205 dump_services(f, mgrs, "mgr");
1206}
1207
d2e6a577
FG
1208const std::vector<MonCommand> &MgrMonitor::get_command_descs() const
1209{
1210 if (command_descs.empty()) {
1211 // must have just upgraded; fallback to static commands
1212 return mgr_commands;
1213 } else {
1214 return command_descs;
1215 }
1216}