]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MDSMonitor.cc
bump version to 12.2.11-pve1
[ceph.git] / ceph / src / mon / MDSMonitor.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <sstream>
16#include <boost/utility.hpp>
224ce89b 17#include <boost/regex.hpp>
7c673cae
FG
18
19#include "MDSMonitor.h"
20#include "FSCommands.h"
21#include "Monitor.h"
22#include "MonitorDBStore.h"
23#include "OSDMonitor.h"
24#include "PGMonitor.h"
25
26#include "common/strtol.h"
27#include "common/perf_counters.h"
28#include "common/config.h"
29#include "common/cmdparse.h"
30#include "messages/MMDSMap.h"
31#include "messages/MFSMap.h"
32#include "messages/MFSMapUser.h"
33#include "messages/MMDSLoadTargets.h"
34#include "messages/MMonCommand.h"
35#include "messages/MGenericMessage.h"
36
37#include "include/assert.h"
38#include "include/str_list.h"
39#include "include/stringify.h"
40#include "mds/mdstypes.h"
41#include "Session.h"
42
43#define dout_subsys ceph_subsys_mon
44#undef dout_prefix
28e407b8
AA
45#define dout_prefix _prefix(_dout, mon, get_fsmap())
46static ostream& _prefix(std::ostream *_dout, Monitor *mon, const FSMap& fsmap) {
7c673cae
FG
47 return *_dout << "mon." << mon->name << "@" << mon->rank
48 << "(" << mon->get_state_name()
49 << ").mds e" << fsmap.get_epoch() << " ";
50}
51
3efd9988
FG
52static const string MDS_METADATA_PREFIX("mds_metadata");
53static const string MDS_HEALTH_PREFIX("mds_health");
54
55
7c673cae
FG
56/*
57 * Specialized implementation of cmd_getval to allow us to parse
58 * out strongly-typedef'd types
59 */
60template<> bool cmd_getval(CephContext *cct, const cmdmap_t& cmdmap,
31f18b77 61 const std::string& k, mds_gid_t &val)
7c673cae
FG
62{
63 return cmd_getval(cct, cmdmap, k, (int64_t&)val);
64}
65
66template<> bool cmd_getval(CephContext *cct, const cmdmap_t& cmdmap,
31f18b77 67 const std::string& k, mds_rank_t &val)
7c673cae
FG
68{
69 return cmd_getval(cct, cmdmap, k, (int64_t&)val);
70}
71
72template<> bool cmd_getval(CephContext *cct, const cmdmap_t& cmdmap,
31f18b77 73 const std::string& k, MDSMap::DaemonState &val)
7c673cae
FG
74{
75 return cmd_getval(cct, cmdmap, k, (int64_t&)val);
76}
77
7c673cae
FG
78// my methods
79
28e407b8 80void MDSMonitor::print_map(const FSMap &m, int dbl)
7c673cae
FG
81{
82 dout(dbl) << "print_map\n";
83 m.print(*_dout);
84 *_dout << dendl;
85}
86
87// service methods
88void MDSMonitor::create_initial()
89{
90 dout(10) << "create_initial" << dendl;
91}
92
3efd9988
FG
93void MDSMonitor::get_store_prefixes(std::set<string>& s)
94{
95 s.insert(service_name);
96 s.insert(MDS_METADATA_PREFIX);
97 s.insert(MDS_HEALTH_PREFIX);
98}
7c673cae
FG
99
100void MDSMonitor::update_from_paxos(bool *need_bootstrap)
101{
102 version_t version = get_last_committed();
28e407b8 103 if (version == get_fsmap().epoch)
7c673cae
FG
104 return;
105
106 dout(10) << __func__ << " version " << version
28e407b8
AA
107 << ", my e " << get_fsmap().epoch << dendl;
108 assert(version > get_fsmap().epoch);
7c673cae 109
224ce89b
WB
110 load_health();
111
7c673cae
FG
112 // read and decode
113 bufferlist fsmap_bl;
114 fsmap_bl.clear();
115 int err = get_version(version, fsmap_bl);
116 assert(err == 0);
117
118 assert(fsmap_bl.length() > 0);
119 dout(10) << __func__ << " got " << version << dendl;
28e407b8 120 PaxosFSMap::decode(fsmap_bl);
7c673cae
FG
121
122 // new map
91327a77 123 dout(0) << "new map" << dendl;
28e407b8 124 print_map(get_fsmap(), 0);
7c673cae 125 if (!g_conf->mon_mds_skip_sanity) {
28e407b8 126 get_fsmap().sanity();
7c673cae
FG
127 }
128
129 check_subs();
130 update_logger();
131}
132
133void MDSMonitor::init()
134{
135 (void)load_metadata(pending_metadata);
136}
137
138void MDSMonitor::create_pending()
139{
28e407b8 140 auto &fsmap = PaxosFSMap::create_pending();
7c673cae 141
3efd9988 142 if (mon->osdmon()->is_readable()) {
28e407b8
AA
143 const auto &osdmap = mon->osdmon()->osdmap;
144 fsmap.sanitize([&osdmap](int64_t pool){return osdmap.have_pg_pool(pool);});
3efd9988
FG
145 }
146
28e407b8 147 dout(10) << "create_pending e" << fsmap.epoch << dendl;
7c673cae
FG
148}
149
150void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t)
151{
28e407b8
AA
152 auto &pending = get_pending_fsmap_writeable();
153 auto &epoch = pending.epoch;
7c673cae 154
28e407b8 155 dout(10) << "encode_pending e" << epoch << dendl;
7c673cae
FG
156
157 // print map iff 'debug mon = 30' or higher
28e407b8 158 print_map(get_pending_fsmap(), 30);
7c673cae 159 if (!g_conf->mon_mds_skip_sanity) {
28e407b8 160 pending.sanity();
7c673cae
FG
161 }
162
163 // Set 'modified' on maps modified this epoch
28e407b8
AA
164 for (auto &p : pending.filesystems) {
165 if (p.second->mds_map.epoch == epoch) {
166 p.second->mds_map.modified = ceph_clock_now();
7c673cae
FG
167 }
168 }
169
170 // apply to paxos
28e407b8
AA
171 assert(get_last_committed() + 1 == pending.epoch);
172 bufferlist pending_bl;
173 pending.encode(pending_bl, mon->get_quorum_con_features());
7c673cae
FG
174
175 /* put everything in the transaction */
28e407b8
AA
176 put_version(t, pending.epoch, pending_bl);
177 put_last_committed(t, pending.epoch);
7c673cae
FG
178
179 // Encode MDSHealth data
180 for (std::map<uint64_t, MDSHealth>::iterator i = pending_daemon_health.begin();
181 i != pending_daemon_health.end(); ++i) {
182 bufferlist bl;
183 i->second.encode(bl);
184 t->put(MDS_HEALTH_PREFIX, stringify(i->first), bl);
185 }
186
187 for (std::set<uint64_t>::iterator i = pending_daemon_health_rm.begin();
188 i != pending_daemon_health_rm.end(); ++i) {
189 t->erase(MDS_HEALTH_PREFIX, stringify(*i));
190 }
191 pending_daemon_health_rm.clear();
1adf2230 192 remove_from_metadata(pending, t);
224ce89b
WB
193
194 // health
195 health_check_map_t new_checks;
28e407b8 196 const auto &info_map = pending.get_mds_info();
224ce89b
WB
197 for (const auto &i : info_map) {
198 const auto &gid = i.first;
199 const auto &info = i.second;
200 if (pending_daemon_health_rm.count(gid)) {
201 continue;
202 }
203 MDSHealth health;
204 auto p = pending_daemon_health.find(gid);
205 if (p != pending_daemon_health.end()) {
206 health = p->second;
207 } else {
208 bufferlist bl;
209 mon->store->get(MDS_HEALTH_PREFIX, stringify(gid), bl);
210 if (!bl.length()) {
211 derr << "Missing health data for MDS " << gid << dendl;
212 continue;
213 }
214 bufferlist::iterator bl_i = bl.begin();
215 health.decode(bl_i);
216 }
217 for (const auto &metric : health.metrics) {
d2e6a577 218 const int rank = info.rank;
224ce89b
WB
219 health_check_t *check = &new_checks.get_or_add(
220 mds_metric_name(metric.type),
221 metric.sev,
222 mds_metric_summary(metric.type));
223 ostringstream ss;
224 ss << "mds" << info.name << "(mds." << rank << "): " << metric.message;
28e407b8
AA
225 bool first = true;
226 for (auto &p : metric.metadata) {
227 if (first) {
228 ss << " ";
229 } else {
224ce89b 230 ss << ", ";
28e407b8
AA
231 }
232 ss << p.first << ": " << p.second;
233 first = false;
224ce89b
WB
234 }
235 check->detail.push_back(ss.str());
236 }
237 }
28e407b8 238 pending.get_health_checks(&new_checks);
224ce89b
WB
239 for (auto& p : new_checks.checks) {
240 p.second.summary = boost::regex_replace(
241 p.second.summary,
242 boost::regex("%num%"),
243 stringify(p.second.detail.size()));
244 p.second.summary = boost::regex_replace(
245 p.second.summary,
246 boost::regex("%plurals%"),
247 p.second.detail.size() > 1 ? "s" : "");
248 p.second.summary = boost::regex_replace(
249 p.second.summary,
250 boost::regex("%isorare%"),
251 p.second.detail.size() > 1 ? "are" : "is");
181888fb
FG
252 p.second.summary = boost::regex_replace(
253 p.second.summary,
254 boost::regex("%hasorhave%"),
255 p.second.detail.size() > 1 ? "have" : "has");
224ce89b
WB
256 }
257 encode_health(new_checks, t);
7c673cae
FG
258}
259
260version_t MDSMonitor::get_trim_to()
261{
262 version_t floor = 0;
263 if (g_conf->mon_mds_force_trim_to > 0 &&
264 g_conf->mon_mds_force_trim_to < (int)get_last_committed()) {
265 floor = g_conf->mon_mds_force_trim_to;
266 dout(10) << __func__ << " explicit mon_mds_force_trim_to = "
267 << floor << dendl;
268 }
269
270 unsigned max = g_conf->mon_max_mdsmap_epochs;
271 version_t last = get_last_committed();
272
273 if (last - get_first_committed() > max && floor < last - max)
274 return last - max;
275 return floor;
276}
277
278void MDSMonitor::update_logger()
279{
280 dout(10) << "update_logger" << dendl;
281
28e407b8
AA
282 const auto &fsmap = get_fsmap();
283
7c673cae
FG
284 uint64_t up = 0;
285 uint64_t in = 0;
286 uint64_t failed = 0;
287 for (const auto &i : fsmap.filesystems) {
288 const MDSMap &mds_map = i.second->mds_map;
289
290 up += mds_map.get_num_up_mds();
291 in += mds_map.get_num_in_mds();
292 failed += mds_map.get_num_failed_mds();
293 }
294 mon->cluster_logger->set(l_cluster_num_mds_up, up);
295 mon->cluster_logger->set(l_cluster_num_mds_in, in);
296 mon->cluster_logger->set(l_cluster_num_mds_failed, failed);
297 mon->cluster_logger->set(l_cluster_mds_epoch, fsmap.get_epoch());
298}
299
300bool MDSMonitor::preprocess_query(MonOpRequestRef op)
301{
302 op->mark_mdsmon_event(__func__);
303 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
304 dout(10) << "preprocess_query " << *m << " from " << m->get_orig_source_inst() << dendl;
305
306 switch (m->get_type()) {
307
308 case MSG_MDS_BEACON:
309 return preprocess_beacon(op);
310
311 case MSG_MON_COMMAND:
f64942e4
AA
312 try {
313 return preprocess_command(op);
314 }
315 catch (const bad_cmd_get& e) {
316 bufferlist bl;
317 mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed());
318 return true;
319 }
7c673cae
FG
320
321 case MSG_MDS_OFFLOAD_TARGETS:
322 return preprocess_offload_targets(op);
323
324 default:
325 ceph_abort();
326 return true;
327 }
328}
329
330void MDSMonitor::_note_beacon(MMDSBeacon *m)
331{
332 mds_gid_t gid = mds_gid_t(m->get_global_id());
333 version_t seq = m->get_seq();
334
91327a77 335 dout(5) << "_note_beacon " << *m << " noting time" << dendl;
1adf2230
AA
336 auto &beacon = last_beacon[gid];
337 beacon.stamp = mono_clock::now();
338 beacon.seq = seq;
7c673cae
FG
339}
340
341bool MDSMonitor::preprocess_beacon(MonOpRequestRef op)
342{
343 op->mark_mdsmon_event(__func__);
344 MMDSBeacon *m = static_cast<MMDSBeacon*>(op->get_req());
345 MDSMap::DaemonState state = m->get_state();
346 mds_gid_t gid = m->get_global_id();
347 version_t seq = m->get_seq();
348 MDSMap::mds_info_t info;
349 epoch_t effective_epoch = 0;
350
1adf2230 351 const auto &fsmap = get_fsmap();
28e407b8 352
7c673cae
FG
353 // check privileges, ignore if fails
354 MonSession *session = m->get_session();
355 assert(session);
356 if (!session->is_capable("mds", MON_CAP_X)) {
357 dout(0) << "preprocess_beacon got MMDSBeacon from entity with insufficient privileges "
358 << session->caps << dendl;
359 goto ignore;
360 }
361
362 if (m->get_fsid() != mon->monmap->fsid) {
363 dout(0) << "preprocess_beacon on fsid " << m->get_fsid() << " != " << mon->monmap->fsid << dendl;
364 goto ignore;
365 }
366
91327a77 367 dout(5) << "preprocess_beacon " << *m
7c673cae
FG
368 << " from " << m->get_orig_source_inst()
369 << " " << m->get_compat()
370 << dendl;
371
372 // make sure the address has a port
373 if (m->get_orig_source_addr().get_port() == 0) {
374 dout(1) << " ignoring boot message without a port" << dendl;
375 goto ignore;
376 }
377
378 // check compat
379 if (!m->get_compat().writeable(fsmap.compat)) {
380 dout(1) << " mds " << m->get_source_inst() << " can't write to fsmap " << fsmap.compat << dendl;
381 goto ignore;
382 }
383
384 // fw to leader?
28e407b8 385 if (!is_leader())
7c673cae
FG
386 return false;
387
388 // booted, but not in map?
28e407b8 389 if (!fsmap.gid_exists(gid)) {
7c673cae
FG
390 if (state != MDSMap::STATE_BOOT) {
391 dout(7) << "mds_beacon " << *m << " is not in fsmap (state "
392 << ceph_mds_state_name(state) << ")" << dendl;
393
1adf2230
AA
394 /* We can't send an MDSMap this MDS was a part of because we no longer
395 * know which FS it was part of. Nor does this matter. Sending an empty
396 * MDSMap is sufficient for getting the MDS to respawn.
397 */
7c673cae
FG
398 MDSMap null_map;
399 null_map.epoch = fsmap.epoch;
400 null_map.compat = fsmap.compat;
401 mon->send_reply(op, new MMDSMap(mon->monmap->fsid, &null_map));
402 return true;
403 } else {
404 return false; // not booted yet.
405 }
406 }
407 dout(10) << __func__ << ": GID exists in map: " << gid << dendl;
28e407b8 408 info = fsmap.get_info_gid(gid);
7c673cae
FG
409
410 // old seq?
411 if (info.state_seq > seq) {
412 dout(7) << "mds_beacon " << *m << " has old seq, ignoring" << dendl;
413 goto ignore;
414 }
415
416 // Work out the latest epoch that this daemon should have seen
417 {
28e407b8 418 fs_cluster_id_t fscid = fsmap.mds_roles.at(gid);
7c673cae 419 if (fscid == FS_CLUSTER_ID_NONE) {
28e407b8 420 effective_epoch = fsmap.standby_epochs.at(gid);
7c673cae 421 } else {
28e407b8 422 effective_epoch = fsmap.get_filesystem(fscid)->mds_map.epoch;
7c673cae
FG
423 }
424 if (effective_epoch != m->get_last_epoch_seen()) {
425 dout(10) << "mds_beacon " << *m
426 << " ignoring requested state, because mds hasn't seen latest map" << dendl;
427 goto reply;
428 }
429 }
430
431 if (info.laggy()) {
432 _note_beacon(m);
433 return false; // no longer laggy, need to update map.
434 }
435 if (state == MDSMap::STATE_BOOT) {
436 // ignore, already booted.
437 goto ignore;
438 }
439 // is there a state change here?
440 if (info.state != state) {
441 // legal state change?
442 if ((info.state == MDSMap::STATE_STANDBY ||
443 info.state == MDSMap::STATE_STANDBY_REPLAY) && state > 0) {
444 dout(10) << "mds_beacon mds can't activate itself (" << ceph_mds_state_name(info.state)
445 << " -> " << ceph_mds_state_name(state) << ")" << dendl;
446 goto reply;
447 }
448
449 if ((state == MDSMap::STATE_STANDBY || state == MDSMap::STATE_STANDBY_REPLAY)
450 && info.rank != MDS_RANK_NONE)
451 {
452 dout(4) << "mds_beacon MDS can't go back into standby after taking rank: "
453 "held rank " << info.rank << " while requesting state "
454 << ceph_mds_state_name(state) << dendl;
455 goto reply;
456 }
457
458 _note_beacon(m);
459 return false;
460 }
461
462 // Comparing known daemon health with m->get_health()
463 // and return false (i.e. require proposal) if they
464 // do not match, to update our stored
465 if (!(pending_daemon_health[gid] == m->get_health())) {
91327a77 466 dout(10) << __func__ << " health metrics for gid " << gid << " were updated" << dendl;
7c673cae
FG
467 _note_beacon(m);
468 return false;
469 }
470
471 reply:
472 // note time and reply
473 assert(effective_epoch > 0);
474 _note_beacon(m);
475 mon->send_reply(op,
476 new MMDSBeacon(mon->monmap->fsid, m->get_global_id(), m->get_name(),
477 effective_epoch, state, seq,
478 CEPH_FEATURES_SUPPORTED_DEFAULT));
479 return true;
480
481 ignore:
482 // I won't reply this beacon, drop it.
483 mon->no_reply(op);
484 return true;
485}
486
487bool MDSMonitor::preprocess_offload_targets(MonOpRequestRef op)
488{
489 op->mark_mdsmon_event(__func__);
490 MMDSLoadTargets *m = static_cast<MMDSLoadTargets*>(op->get_req());
491 dout(10) << "preprocess_offload_targets " << *m << " from " << m->get_orig_source() << dendl;
28e407b8 492
1adf2230 493 const auto &fsmap = get_fsmap();
7c673cae
FG
494
495 // check privileges, ignore message if fails
496 MonSession *session = m->get_session();
497 if (!session)
1adf2230 498 goto ignore;
7c673cae
FG
499 if (!session->is_capable("mds", MON_CAP_X)) {
500 dout(0) << "preprocess_offload_targets got MMDSLoadTargets from entity with insufficient caps "
501 << session->caps << dendl;
1adf2230 502 goto ignore;
7c673cae
FG
503 }
504
505 if (fsmap.gid_exists(m->global_id) &&
506 m->targets == fsmap.get_info_gid(m->global_id).export_targets)
1adf2230 507 goto ignore;
7c673cae
FG
508
509 return false;
510
1adf2230
AA
511 ignore:
512 mon->no_reply(op);
7c673cae
FG
513 return true;
514}
515
516
517bool MDSMonitor::prepare_update(MonOpRequestRef op)
518{
519 op->mark_mdsmon_event(__func__);
520 PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
521 dout(7) << "prepare_update " << *m << dendl;
522
523 switch (m->get_type()) {
524
525 case MSG_MDS_BEACON:
526 return prepare_beacon(op);
527
528 case MSG_MON_COMMAND:
f64942e4
AA
529 try {
530 return prepare_command(op);
531 }
532 catch (const bad_cmd_get& e) {
533 bufferlist bl;
534 mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed());
535 return true;
536 }
7c673cae
FG
537
538 case MSG_MDS_OFFLOAD_TARGETS:
539 return prepare_offload_targets(op);
540
541 default:
542 ceph_abort();
543 }
544
545 return true;
546}
547
548bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
549{
550 op->mark_mdsmon_event(__func__);
551 MMDSBeacon *m = static_cast<MMDSBeacon*>(op->get_req());
552 // -- this is an update --
553 dout(12) << "prepare_beacon " << *m << " from " << m->get_orig_source_inst() << dendl;
554 entity_addr_t addr = m->get_orig_source_inst().addr;
555 mds_gid_t gid = m->get_global_id();
556 MDSMap::DaemonState state = m->get_state();
557 version_t seq = m->get_seq();
558
28e407b8
AA
559 auto &pending = get_pending_fsmap_writeable();
560
91327a77 561 dout(15) << __func__ << " got health from gid " << gid << " with " << m->get_health().metrics.size() << " metrics." << dendl;
7c673cae
FG
562
563 // Calculate deltas of health metrics created and removed
564 // Do this by type rather than MDSHealthMetric equality, because messages can
565 // change a lot when they include e.g. a number of items.
566 const auto &old_health = pending_daemon_health[gid].metrics;
567 const auto &new_health = m->get_health().metrics;
568
569 std::set<mds_metric_t> old_types;
570 for (const auto &i : old_health) {
571 old_types.insert(i.type);
572 }
573
574 std::set<mds_metric_t> new_types;
575 for (const auto &i : new_health) {
576 new_types.insert(i.type);
577 }
578
579 for (const auto &new_metric: new_health) {
580 if (old_types.count(new_metric.type) == 0) {
28e407b8
AA
581 dout(10) << "MDS health message (" << m->get_orig_source_inst().name
582 << "): " << new_metric.sev << " " << new_metric.message << dendl;
7c673cae
FG
583 }
584 }
585
586 // Log the disappearance of health messages at INFO
587 for (const auto &old_metric : old_health) {
588 if (new_types.count(old_metric.type) == 0) {
589 mon->clog->info() << "MDS health message cleared ("
590 << m->get_orig_source_inst().name << "): " << old_metric.message;
591 }
592 }
593
594 // Store health
595 pending_daemon_health[gid] = m->get_health();
596
597 // boot?
598 if (state == MDSMap::STATE_BOOT) {
599 // zap previous instance of this name?
600 if (g_conf->mds_enforce_unique_name) {
601 bool failed_mds = false;
28e407b8 602 while (mds_gid_t existing = pending.find_mds_gid_by_name(m->get_name())) {
7c673cae
FG
603 if (!mon->osdmon()->is_writeable()) {
604 mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
605 return false;
606 }
d2e6a577 607 const MDSMap::mds_info_t &existing_info =
28e407b8 608 pending.get_info_gid(existing);
d2e6a577 609 mon->clog->info() << existing_info.human_name() << " restarted";
1adf2230 610 fail_mds_gid(pending, existing);
7c673cae
FG
611 failed_mds = true;
612 }
613 if (failed_mds) {
614 assert(mon->osdmon()->is_writeable());
615 request_proposal(mon->osdmon());
616 }
617 }
618
619 // Add this daemon to the map
28e407b8 620 if (pending.mds_roles.count(gid) == 0) {
7c673cae
FG
621 MDSMap::mds_info_t new_info;
622 new_info.global_id = gid;
623 new_info.name = m->get_name();
624 new_info.addr = addr;
625 new_info.mds_features = m->get_mds_features();
626 new_info.state = MDSMap::STATE_STANDBY;
627 new_info.state_seq = seq;
628 new_info.standby_for_rank = m->get_standby_for_rank();
629 new_info.standby_for_name = m->get_standby_for_name();
630 new_info.standby_for_fscid = m->get_standby_for_fscid();
631 new_info.standby_replay = m->get_standby_replay();
28e407b8 632 pending.insert(new_info);
7c673cae
FG
633 }
634
635 // Resolve standby_for_name to a rank
28e407b8 636 const MDSMap::mds_info_t &info = pending.get_info_gid(gid);
7c673cae 637 if (!info.standby_for_name.empty()) {
28e407b8 638 const MDSMap::mds_info_t *leaderinfo = pending.find_by_name(
7c673cae
FG
639 info.standby_for_name);
640 if (leaderinfo && (leaderinfo->rank >= 0)) {
28e407b8 641 const auto &fscid = pending.mds_roles.at(leaderinfo->global_id);
7c673cae 642
28e407b8 643 pending.modify_daemon(gid, [fscid, leaderinfo](
7c673cae
FG
644 MDSMap::mds_info_t *info) {
645 info->standby_for_rank = leaderinfo->rank;
646 info->standby_for_fscid = fscid;
647 });
648 }
649 }
650
651 // initialize the beacon timer
1adf2230
AA
652 auto &beacon = last_beacon[gid];
653 beacon.stamp = mono_clock::now();
654 beacon.seq = seq;
7c673cae
FG
655
656 // new incompat?
28e407b8
AA
657 if (!pending.compat.writeable(m->get_compat())) {
658 dout(10) << " fsmap " << pending.compat
7c673cae
FG
659 << " can't write to new mds' " << m->get_compat()
660 << ", updating fsmap and killing old mds's"
661 << dendl;
28e407b8 662 pending.update_compat(m->get_compat());
7c673cae
FG
663 }
664
665 update_metadata(m->get_global_id(), m->get_sys_info());
666 } else {
667 // state update
91327a77
AA
668
669 if (!pending.gid_exists(gid)) {
670 /* gid has been removed from pending, send null map */
671 dout(5) << "mds_beacon " << *m << " is not in fsmap (state "
672 << ceph_mds_state_name(state) << ")" << dendl;
673
674 /* We can't send an MDSMap this MDS was a part of because we no longer
675 * know which FS it was part of. Nor does this matter. Sending an empty
676 * MDSMap is sufficient for getting the MDS to respawn.
677 */
678 wait_for_finished_proposal(op, new FunctionContext([op, this](int r){
679 if (r >= 0) {
680 const auto& fsmap = get_fsmap();
681 MDSMap null_map;
682 null_map.epoch = fsmap.epoch;
683 null_map.compat = fsmap.compat;
684 mon->send_reply(op, new MMDSMap(mon->monmap->fsid, &null_map));
685 } else {
686 dispatch(op); // try again
687 }
688 }));
689 return true;
690 }
691
28e407b8 692 const MDSMap::mds_info_t &info = pending.get_info_gid(gid);
7c673cae
FG
693 // Old MDS daemons don't mention that they're standby replay until
694 // after they've sent their boot beacon, so update this field.
695 if (info.standby_replay != m->get_standby_replay()) {
28e407b8 696 pending.modify_daemon(info.global_id, [&m](
7c673cae
FG
697 MDSMap::mds_info_t *i)
698 {
699 i->standby_replay = m->get_standby_replay();
700 });
701 }
702
f64942e4
AA
703 if (info.state == MDSMap::STATE_STOPPING &&
704 state != MDSMap::STATE_STOPPING &&
705 state != MDSMap::STATE_STOPPED) {
7c673cae
FG
706 // we can't transition to any other states from STOPPING
707 dout(0) << "got beacon for MDS in STATE_STOPPING, ignoring requested state change"
708 << dendl;
709 _note_beacon(m);
710 return true;
711 }
712
713 if (info.laggy()) {
91327a77 714 dout(1) << "prepare_beacon clearing laggy flag on " << addr << dendl;
28e407b8 715 pending.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info)
7c673cae
FG
716 {
717 info->clear_laggy();
718 }
719 );
720 }
721
91327a77 722 dout(5) << "prepare_beacon mds." << info.rank
7c673cae
FG
723 << " " << ceph_mds_state_name(info.state)
724 << " -> " << ceph_mds_state_name(state)
725 << " standby_for_rank=" << m->get_standby_for_rank()
726 << dendl;
727 if (state == MDSMap::STATE_STOPPED) {
28e407b8
AA
728 const auto fscid = pending.mds_roles.at(gid);
729 const auto &fs = pending.get_filesystem(fscid);
181888fb 730
d2e6a577
FG
731 mon->clog->info() << info.human_name() << " finished "
732 << "deactivating rank " << info.rank << " in filesystem "
733 << fs->mds_map.fs_name << " (now has "
181888fb 734 << fs->mds_map.get_num_in_mds() - 1 << " ranks)";
d2e6a577 735
28e407b8 736 auto erased = pending.stop(gid);
7c673cae
FG
737 erased.push_back(gid);
738
739 for (const auto &erased_gid : erased) {
740 last_beacon.erase(erased_gid);
741 if (pending_daemon_health.count(erased_gid)) {
742 pending_daemon_health.erase(erased_gid);
743 pending_daemon_health_rm.insert(erased_gid);
744 }
745 }
d2e6a577
FG
746
747
7c673cae
FG
748 } else if (state == MDSMap::STATE_DAMAGED) {
749 if (!mon->osdmon()->is_writeable()) {
91327a77 750 dout(1) << __func__ << ": DAMAGED from rank " << info.rank
7c673cae
FG
751 << " waiting for osdmon writeable to blacklist it" << dendl;
752 mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
753 return false;
754 }
755
756 // Record this MDS rank as damaged, so that other daemons
757 // won't try to run it.
91327a77 758 dout(0) << __func__ << ": marking rank "
7c673cae
FG
759 << info.rank << " damaged" << dendl;
760
761 utime_t until = ceph_clock_now();
b32b8144 762 until += g_conf->get_val<double>("mon_mds_blacklist_interval");
7c673cae
FG
763 const auto blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
764 request_proposal(mon->osdmon());
28e407b8 765 pending.damaged(gid, blacklist_epoch);
7c673cae
FG
766 last_beacon.erase(gid);
767
768 // Respond to MDS, so that it knows it can continue to shut down
769 mon->send_reply(op,
770 new MMDSBeacon(
771 mon->monmap->fsid, m->get_global_id(),
28e407b8 772 m->get_name(), pending.get_epoch(), state, seq,
7c673cae
FG
773 CEPH_FEATURES_SUPPORTED_DEFAULT));
774 } else if (state == MDSMap::STATE_DNE) {
775 if (!mon->osdmon()->is_writeable()) {
91327a77 776 dout(1) << __func__ << ": DNE from rank " << info.rank
7c673cae
FG
777 << " waiting for osdmon writeable to blacklist it" << dendl;
778 mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
779 return false;
780 }
781
1adf2230 782 fail_mds_gid(pending, gid);
7c673cae
FG
783 assert(mon->osdmon()->is_writeable());
784 request_proposal(mon->osdmon());
785
786 // Respond to MDS, so that it knows it can continue to shut down
787 mon->send_reply(op,
788 new MMDSBeacon(
789 mon->monmap->fsid, m->get_global_id(),
28e407b8 790 m->get_name(), pending.get_epoch(), state, seq,
7c673cae
FG
791 CEPH_FEATURES_SUPPORTED_DEFAULT));
792 } else if (info.state == MDSMap::STATE_STANDBY && state != info.state) {
793 // Standby daemons should never modify their own
794 // state. Reject any attempts to do so.
795 derr << "standby " << gid << " attempted to change state to "
796 << ceph_mds_state_name(state) << ", rejecting" << dendl;
797 return true;
798 } else if (info.state != MDSMap::STATE_STANDBY && state != info.state &&
799 !MDSMap::state_transition_valid(info.state, state)) {
800 // Validate state transitions for daemons that hold a rank
801 derr << "daemon " << gid << " (rank " << info.rank << ") "
802 << "reported invalid state transition "
803 << ceph_mds_state_name(info.state) << " -> "
804 << ceph_mds_state_name(state) << dendl;
805 return true;
806 } else {
b32b8144 807 if (info.state != MDSMap::STATE_ACTIVE && state == MDSMap::STATE_ACTIVE) {
28e407b8
AA
808 const auto &fscid = pending.mds_roles.at(gid);
809 const auto &fs = pending.get_filesystem(fscid);
d2e6a577
FG
810 mon->clog->info() << info.human_name() << " is now active in "
811 << "filesystem " << fs->mds_map.fs_name << " as rank "
812 << info.rank;
813 }
b32b8144
FG
814
815 // Made it through special cases and validations, record the
816 // daemon's reported state to the FSMap.
28e407b8 817 pending.modify_daemon(gid, [state, seq](MDSMap::mds_info_t *info) {
b32b8144
FG
818 info->state = state;
819 info->state_seq = seq;
820 });
7c673cae
FG
821 }
822 }
823
91327a77 824 dout(5) << "prepare_beacon pending map now:" << dendl;
28e407b8 825 print_map(pending);
7c673cae
FG
826
827 wait_for_finished_proposal(op, new FunctionContext([op, this](int r){
828 if (r >= 0)
829 _updated(op); // success
830 else if (r == -ECANCELED) {
831 mon->no_reply(op);
832 } else {
833 dispatch(op); // try again
834 }
835 }));
836
837 return true;
838}
839
840bool MDSMonitor::prepare_offload_targets(MonOpRequestRef op)
841{
28e407b8
AA
842 auto &pending = get_pending_fsmap_writeable();
843
7c673cae
FG
844 op->mark_mdsmon_event(__func__);
845 MMDSLoadTargets *m = static_cast<MMDSLoadTargets*>(op->get_req());
846 mds_gid_t gid = m->global_id;
28e407b8 847 if (pending.gid_has_rank(gid)) {
7c673cae 848 dout(10) << "prepare_offload_targets " << gid << " " << m->targets << dendl;
28e407b8 849 pending.update_export_targets(gid, m->targets);
7c673cae
FG
850 } else {
851 dout(10) << "prepare_offload_targets " << gid << " not in map" << dendl;
852 }
91327a77 853 mon->no_reply(op);
7c673cae
FG
854 return true;
855}
856
857bool MDSMonitor::should_propose(double& delay)
858{
859 // delegate to PaxosService to assess whether we should propose
860 return PaxosService::should_propose(delay);
861}
862
863void MDSMonitor::_updated(MonOpRequestRef op)
864{
28e407b8 865 const auto &fsmap = get_fsmap();
7c673cae
FG
866 op->mark_mdsmon_event(__func__);
867 MMDSBeacon *m = static_cast<MMDSBeacon*>(op->get_req());
868 dout(10) << "_updated " << m->get_orig_source() << " " << *m << dendl;
d2e6a577 869 mon->clog->debug() << m->get_orig_source_inst() << " "
7c673cae
FG
870 << ceph_mds_state_name(m->get_state());
871
872 if (m->get_state() == MDSMap::STATE_STOPPED) {
873 // send the map manually (they're out of the map, so they won't get it automatic)
874 MDSMap null_map;
875 null_map.epoch = fsmap.epoch;
876 null_map.compat = fsmap.compat;
877 mon->send_reply(op, new MMDSMap(mon->monmap->fsid, &null_map));
878 } else {
879 mon->send_reply(op, new MMDSBeacon(mon->monmap->fsid,
880 m->get_global_id(),
881 m->get_name(),
882 fsmap.get_epoch(),
883 m->get_state(),
884 m->get_seq(),
885 CEPH_FEATURES_SUPPORTED_DEFAULT));
886 }
887}
888
889void MDSMonitor::on_active()
890{
891 tick();
892 update_logger();
893
28e407b8
AA
894 if (is_leader()) {
895 mon->clog->debug() << "fsmap " << get_fsmap();
224ce89b 896 }
7c673cae
FG
897}
898
899void MDSMonitor::get_health(list<pair<health_status_t, string> >& summary,
900 list<pair<health_status_t, string> > *detail,
901 CephContext* cct) const
902{
28e407b8
AA
903 const auto &fsmap = get_fsmap();
904
7c673cae
FG
905 fsmap.get_health(summary, detail);
906
907 // For each MDS GID...
28e407b8 908 const auto &info_map = fsmap.get_mds_info();
7c673cae
FG
909 for (const auto &i : info_map) {
910 const auto &gid = i.first;
911 const auto &info = i.second;
912
913 // Decode MDSHealth
914 bufferlist bl;
915 mon->store->get(MDS_HEALTH_PREFIX, stringify(gid), bl);
916 if (!bl.length()) {
917 derr << "Missing health data for MDS " << gid << dendl;
918 continue;
919 }
920 MDSHealth health;
921 bufferlist::iterator bl_i = bl.begin();
922 health.decode(bl_i);
923
924 for (const auto &metric : health.metrics) {
d2e6a577 925 const int rank = info.rank;
7c673cae
FG
926 std::ostringstream message;
927 message << "mds" << rank << ": " << metric.message;
928 summary.push_back(std::make_pair(metric.sev, message.str()));
929
930 if (detail) {
931 // There is no way for us to clealy associate detail entries with summary entries (#7192), so
932 // we duplicate the summary message in the detail string and tag the metadata on.
933 std::ostringstream detail_message;
934 detail_message << message.str();
935 if (metric.metadata.size()) {
936 detail_message << "(";
937 auto k = metric.metadata.begin();
938 while (k != metric.metadata.end()) {
939 detail_message << k->first << ": " << k->second;
940 if (boost::next(k) != metric.metadata.end()) {
941 detail_message << ", ";
942 }
943 ++k;
944 }
945 detail_message << ")";
946 }
947 detail->push_back(std::make_pair(metric.sev, detail_message.str()));
948 }
949 }
950 }
951}
952
953void MDSMonitor::dump_info(Formatter *f)
954{
955 f->open_object_section("fsmap");
28e407b8 956 get_fsmap().dump(f);
7c673cae
FG
957 f->close_section();
958
959 f->dump_unsigned("mdsmap_first_committed", get_first_committed());
960 f->dump_unsigned("mdsmap_last_committed", get_last_committed());
961}
962
963bool MDSMonitor::preprocess_command(MonOpRequestRef op)
964{
965 op->mark_mdsmon_event(__func__);
966 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
967 int r = -1;
968 bufferlist rdata;
969 stringstream ss, ds;
970
971 map<string, cmd_vartype> cmdmap;
1adf2230 972 const auto &fsmap = get_fsmap();
28e407b8 973
7c673cae
FG
974 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
975 // ss has reason for failure
976 string rs = ss.str();
977 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
978 return true;
979 }
980
981 string prefix;
982 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
983 string format;
984 cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
1adf2230 985 std::unique_ptr<Formatter> f(Formatter::create(format));
7c673cae
FG
986
987 MonSession *session = m->get_session();
988 if (!session) {
989 mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed());
990 return true;
991 }
992
993 if (prefix == "mds stat") {
994 if (f) {
995 f->open_object_section("mds_stat");
996 dump_info(f.get());
997 f->close_section();
998 f->flush(ds);
999 } else {
1000 ds << fsmap;
1001 }
1002 r = 0;
1003 } else if (prefix == "mds dump") {
1004 int64_t epocharg;
1005 epoch_t epoch;
1006
28e407b8
AA
1007 const FSMap *fsmapp = &get_fsmap();
1008 FSMap dummy;
7c673cae
FG
1009 if (cmd_getval(g_ceph_context, cmdmap, "epoch", epocharg)) {
1010 epoch = epocharg;
1011 bufferlist b;
1012 int err = get_version(epoch, b);
1013 if (err == -ENOENT) {
7c673cae 1014 r = -ENOENT;
28e407b8 1015 goto out;
7c673cae
FG
1016 } else {
1017 assert(err == 0);
1018 assert(b.length());
28e407b8
AA
1019 dummy.decode(b);
1020 fsmapp = &dummy;
7c673cae
FG
1021 }
1022 }
c07f9fc5 1023
28e407b8
AA
1024 stringstream ds;
1025 const MDSMap *mdsmapp = nullptr;
1026 MDSMap blank;
1027 blank.epoch = fsmapp->epoch;
1028 if (fsmapp->legacy_client_fscid != FS_CLUSTER_ID_NONE) {
1029 mdsmapp = &fsmapp->filesystems.at(fsmapp->legacy_client_fscid)->mds_map;
1030 } else {
1031 mdsmapp = &blank;
7c673cae 1032 }
28e407b8
AA
1033 if (f != NULL) {
1034 f->open_object_section("mdsmap");
1035 mdsmapp->dump(f.get());
1036 f->close_section();
1037 f->flush(ds);
1038 r = 0;
1039 } else {
1040 mdsmapp->print(ds);
1041 r = 0;
1042 }
1043
1044 rdata.append(ds);
1045 ss << "dumped fsmap epoch " << fsmapp->get_epoch();
7c673cae
FG
1046 } else if (prefix == "fs dump") {
1047 int64_t epocharg;
1048 epoch_t epoch;
1049
1adf2230 1050 const FSMap *fsmapp = &fsmap;
28e407b8 1051 FSMap dummy;
7c673cae
FG
1052 if (cmd_getval(g_ceph_context, cmdmap, "epoch", epocharg)) {
1053 epoch = epocharg;
1054 bufferlist b;
1055 int err = get_version(epoch, b);
1056 if (err == -ENOENT) {
7c673cae 1057 r = -ENOENT;
28e407b8 1058 goto out;
7c673cae
FG
1059 } else {
1060 assert(err == 0);
1061 assert(b.length());
28e407b8
AA
1062 dummy.decode(b);
1063 fsmapp = &dummy;
7c673cae
FG
1064 }
1065 }
c07f9fc5 1066
28e407b8
AA
1067 stringstream ds;
1068 if (f != NULL) {
1069 f->open_object_section("fsmap");
1070 fsmapp->dump(f.get());
1071 f->close_section();
1072 f->flush(ds);
1073 r = 0;
1074 } else {
1075 fsmapp->print(ds);
1076 r = 0;
7c673cae 1077 }
28e407b8
AA
1078
1079 rdata.append(ds);
1080 ss << "dumped fsmap epoch " << fsmapp->get_epoch();
7c673cae
FG
1081 } else if (prefix == "mds metadata") {
1082 if (!f)
1083 f.reset(Formatter::create("json-pretty"));
1084
1085 string who;
1086 bool all = !cmd_getval(g_ceph_context, cmdmap, "who", who);
1087 dout(1) << "all = " << all << dendl;
1088 if (all) {
1089 r = 0;
1090 // Dump all MDSs' metadata
1091 const auto all_info = fsmap.get_mds_info();
1092
1093 f->open_array_section("mds_metadata");
1094 for(const auto &i : all_info) {
1095 const auto &info = i.second;
1096
1097 f->open_object_section("mds");
1098 f->dump_string("name", info.name);
1099 std::ostringstream get_err;
1adf2230 1100 r = dump_metadata(fsmap, info.name, f.get(), get_err);
7c673cae
FG
1101 if (r == -EINVAL || r == -ENOENT) {
1102 // Drop error, list what metadata we do have
1103 dout(1) << get_err.str() << dendl;
1104 r = 0;
1105 } else if (r != 0) {
1106 derr << "Unexpected error reading metadata: " << cpp_strerror(r)
1107 << dendl;
1108 ss << get_err.str();
c07f9fc5 1109 f->close_section();
7c673cae
FG
1110 break;
1111 }
1112 f->close_section();
1113 }
1114 f->close_section();
1115 } else {
1116 // Dump a single daemon's metadata
1117 f->open_object_section("mds_metadata");
1adf2230 1118 r = dump_metadata(fsmap, who, f.get(), ss);
7c673cae
FG
1119 f->close_section();
1120 }
1121 f->flush(ds);
31f18b77
FG
1122 } else if (prefix == "mds versions") {
1123 if (!f)
1124 f.reset(Formatter::create("json-pretty"));
1125 count_metadata("ceph_version", f.get());
1126 f->flush(ds);
1127 r = 0;
1128 } else if (prefix == "mds count-metadata") {
1129 if (!f)
1130 f.reset(Formatter::create("json-pretty"));
1131 string field;
1132 cmd_getval(g_ceph_context, cmdmap, "property", field);
1133 count_metadata(field, f.get());
1134 f->flush(ds);
1135 r = 0;
7c673cae
FG
1136 } else if (prefix == "mds getmap") {
1137 epoch_t e;
1138 int64_t epocharg;
1139 bufferlist b;
1140 if (cmd_getval(g_ceph_context, cmdmap, "epoch", epocharg)) {
1141 e = epocharg;
1142 int err = get_version(e, b);
1143 if (err == -ENOENT) {
1144 r = -ENOENT;
1145 } else {
1146 assert(err == 0);
1147 assert(b.length());
1148 FSMap mm;
1149 mm.decode(b);
1150 mm.encode(rdata, m->get_connection()->get_features());
1151 ss << "got fsmap epoch " << mm.get_epoch();
1152 r = 0;
1153 }
1154 } else {
1155 fsmap.encode(rdata, m->get_connection()->get_features());
1156 ss << "got fsmap epoch " << fsmap.get_epoch();
1157 r = 0;
1158 }
1159 } else if (prefix == "mds compat show") {
1160 if (f) {
1161 f->open_object_section("mds_compat");
1162 fsmap.compat.dump(f.get());
1163 f->close_section();
1164 f->flush(ds);
1165 } else {
1166 ds << fsmap.compat;
1167 }
1168 r = 0;
1169 } else if (prefix == "fs get") {
1170 string fs_name;
1171 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
28e407b8 1172 const auto &fs = fsmap.get_filesystem(fs_name);
7c673cae
FG
1173 if (fs == nullptr) {
1174 ss << "filesystem '" << fs_name << "' not found";
1175 r = -ENOENT;
1176 } else {
1177 if (f != nullptr) {
1178 f->open_object_section("filesystem");
1179 fs->dump(f.get());
1180 f->close_section();
1181 f->flush(ds);
1182 r = 0;
1183 } else {
1184 fs->print(ds);
1185 r = 0;
1186 }
1187 }
1188 } else if (prefix == "fs ls") {
1189 if (f) {
1190 f->open_array_section("filesystems");
1adf2230
AA
1191 for (const auto &p : fsmap.filesystems) {
1192 const auto &fs = p.second;
1193 f->open_object_section("filesystem");
1194 {
1195 const MDSMap &mds_map = fs->mds_map;
1196 f->dump_string("name", mds_map.fs_name);
1197 /* Output both the names and IDs of pools, for use by
1198 * humans and machines respectively */
1199 f->dump_string("metadata_pool", mon->osdmon()->osdmap.get_pool_name(
1200 mds_map.metadata_pool));
1201 f->dump_int("metadata_pool_id", mds_map.metadata_pool);
1202 f->open_array_section("data_pool_ids");
1203 for (const auto &id : mds_map.data_pools) {
1204 f->dump_int("data_pool_id", id);
1205 }
1206 f->close_section();
7c673cae 1207
1adf2230
AA
1208 f->open_array_section("data_pools");
1209 for (const auto &id : mds_map.data_pools) {
1210 const auto &name = mon->osdmon()->osdmap.get_pool_name(id);
1211 f->dump_string("data_pool", name);
7c673cae
FG
1212 }
1213 f->close_section();
1214 }
1adf2230 1215 f->close_section();
7c673cae
FG
1216 }
1217 f->close_section();
1218 f->flush(ds);
1219 } else {
28e407b8
AA
1220 for (const auto &p : fsmap.filesystems) {
1221 const auto &fs = p.second;
7c673cae
FG
1222 const MDSMap &mds_map = fs->mds_map;
1223 const string &md_pool_name = mon->osdmon()->osdmap.get_pool_name(
1224 mds_map.metadata_pool);
1225
1226 ds << "name: " << mds_map.fs_name << ", metadata pool: "
1227 << md_pool_name << ", data pools: [";
1adf2230
AA
1228 for (const auto &id : mds_map.data_pools) {
1229 const string &pool_name = mon->osdmon()->osdmap.get_pool_name(id);
7c673cae
FG
1230 ds << pool_name << " ";
1231 }
1232 ds << "]" << std::endl;
1233 }
1234
1235 if (fsmap.filesystems.empty()) {
1236 ds << "No filesystems enabled" << std::endl;
1237 }
1238 }
1239 r = 0;
1240 }
1241
28e407b8 1242out:
7c673cae
FG
1243 if (r != -1) {
1244 rdata.append(ds);
1245 string rs;
1246 getline(ss, rs);
1247 mon->reply_command(op, r, rs, rdata, get_last_committed());
1248 return true;
1249 } else
1250 return false;
1251}
1252
1adf2230 1253bool MDSMonitor::fail_mds_gid(FSMap &fsmap, mds_gid_t gid)
7c673cae 1254{
1adf2230 1255 const MDSMap::mds_info_t &info = fsmap.get_info_gid(gid);
91327a77 1256 dout(1) << "fail_mds_gid " << gid << " mds." << info.name << " role " << info.rank << dendl;
7c673cae
FG
1257
1258 epoch_t blacklist_epoch = 0;
1259 if (info.rank >= 0 && info.state != MDSMap::STATE_STANDBY_REPLAY) {
1260 utime_t until = ceph_clock_now();
b32b8144 1261 until += g_conf->get_val<double>("mon_mds_blacklist_interval");
7c673cae
FG
1262 blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
1263 }
1264
1adf2230 1265 fsmap.erase(gid, blacklist_epoch);
7c673cae
FG
1266 last_beacon.erase(gid);
1267 if (pending_daemon_health.count(gid)) {
1268 pending_daemon_health.erase(gid);
1269 pending_daemon_health_rm.insert(gid);
1270 }
1271
1272 return blacklist_epoch != 0;
1273}
1274
1adf2230 1275mds_gid_t MDSMonitor::gid_from_arg(const FSMap &fsmap, const std::string &arg, std::ostream &ss)
7c673cae
FG
1276{
1277 // Try parsing as a role
1278 mds_role_t role;
1279 std::ostringstream ignore_err; // Don't spam 'ss' with parse_role errors
1adf2230 1280 int r = fsmap.parse_role(arg, &role, ignore_err);
7c673cae
FG
1281 if (r == 0) {
1282 // See if a GID is assigned to this role
28e407b8 1283 const auto &fs = fsmap.get_filesystem(role.fscid);
7c673cae
FG
1284 assert(fs != nullptr); // parse_role ensures it exists
1285 if (fs->mds_map.is_up(role.rank)) {
1286 dout(10) << __func__ << ": validated rank/GID " << role
1287 << " as a rank" << dendl;
1288 return fs->mds_map.get_mds_info(role.rank).global_id;
1289 }
1290 }
1291
1292 // Try parsing as a gid
1293 std::string err;
1294 unsigned long long maybe_gid = strict_strtoll(arg.c_str(), 10, &err);
1295 if (!err.empty()) {
1296 // Not a role or a GID, try as a daemon name
28e407b8 1297 const MDSMap::mds_info_t *mds_info = fsmap.find_by_name(arg);
7c673cae
FG
1298 if (!mds_info) {
1299 ss << "MDS named '" << arg
1300 << "' does not exist, or is not up";
1301 return MDS_GID_NONE;
1302 }
1303 dout(10) << __func__ << ": resolved MDS name '" << arg
1304 << "' to GID " << mds_info->global_id << dendl;
1305 return mds_info->global_id;
1306 } else {
1307 // Not a role, but parses as a an integer, might be a GID
1308 dout(10) << __func__ << ": treating MDS reference '" << arg
1309 << "' as an integer " << maybe_gid << dendl;
31f18b77 1310
28e407b8 1311 if (fsmap.gid_exists(mds_gid_t(maybe_gid))) {
31f18b77 1312 return mds_gid_t(maybe_gid);
7c673cae
FG
1313 }
1314 }
1315
1316 dout(1) << __func__ << ": rank/GID " << arg
1317 << " not a existent rank or GID" << dendl;
1318 return MDS_GID_NONE;
1319}
1320
1adf2230
AA
1321int MDSMonitor::fail_mds(FSMap &fsmap, std::ostream &ss,
1322 const std::string &arg, MDSMap::mds_info_t *failed_info)
7c673cae 1323{
d2e6a577
FG
1324 assert(failed_info != nullptr);
1325
1adf2230 1326 mds_gid_t gid = gid_from_arg(fsmap, arg, ss);
7c673cae
FG
1327 if (gid == MDS_GID_NONE) {
1328 return 0;
1329 }
1330 if (!mon->osdmon()->is_writeable()) {
1331 return -EAGAIN;
1332 }
d2e6a577
FG
1333
1334 // Take a copy of the info before removing the MDS from the map,
1335 // so that the caller knows which mds (if any) they ended up removing.
1adf2230 1336 *failed_info = fsmap.get_info_gid(gid);
d2e6a577 1337
1adf2230 1338 fail_mds_gid(fsmap, gid);
7c673cae
FG
1339 ss << "failed mds gid " << gid;
1340 assert(mon->osdmon()->is_writeable());
1341 request_proposal(mon->osdmon());
1342 return 0;
1343}
1344
1345bool MDSMonitor::prepare_command(MonOpRequestRef op)
1346{
1347 op->mark_mdsmon_event(__func__);
1348 MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
1349 int r = -EINVAL;
1350 stringstream ss;
1351 bufferlist rdata;
1352
1353 map<string, cmd_vartype> cmdmap;
1354 if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
1355 string rs = ss.str();
1356 mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed());
1357 return true;
1358 }
1359
1360 string prefix;
1361 cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
1362
1363 /* Refuse access if message not associated with a valid session */
1364 MonSession *session = m->get_session();
1365 if (!session) {
1366 mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed());
1367 return true;
1368 }
1369
28e407b8
AA
1370 auto &pending = get_pending_fsmap_writeable();
1371
c07f9fc5 1372 bool batched_propose = false;
28e407b8 1373 for (const auto &h : handlers) {
7c673cae 1374 if (h->can_handle(prefix)) {
c07f9fc5
FG
1375 batched_propose = h->batched_propose();
1376 if (batched_propose) {
1377 paxos->plug();
1378 }
28e407b8 1379 r = h->handle(mon, pending, op, cmdmap, ss);
c07f9fc5
FG
1380 if (batched_propose) {
1381 paxos->unplug();
1382 }
1383
7c673cae
FG
1384 if (r == -EAGAIN) {
1385 // message has been enqueued for retry; return.
1386 dout(4) << __func__ << " enqueue for retry by prepare_command" << dendl;
1387 return false;
1388 } else {
1389 if (r == 0) {
1390 // On successful updates, print the updated map
28e407b8 1391 print_map(pending);
7c673cae
FG
1392 }
1393 // Successful or not, we're done: respond.
1394 goto out;
1395 }
1396 }
1397 }
1398
1adf2230 1399 r = filesystem_command(pending, op, prefix, cmdmap, ss);
7c673cae
FG
1400 if (r >= 0) {
1401 goto out;
1402 } else if (r == -EAGAIN) {
1403 // Do not reply, the message has been enqueued for retry
1404 dout(4) << __func__ << " enqueue for retry by filesystem_command" << dendl;
1405 return false;
1406 } else if (r != -ENOSYS) {
1407 goto out;
1408 }
1409
1410 // Only handle legacy commands if there is a filesystem configured
28e407b8
AA
1411 if (pending.legacy_client_fscid == FS_CLUSTER_ID_NONE) {
1412 if (pending.filesystems.size() == 0) {
7c673cae
FG
1413 ss << "No filesystem configured: use `ceph fs new` to create a filesystem";
1414 } else {
1415 ss << "No filesystem set for use with legacy commands";
1416 }
1417 r = -EINVAL;
1418 goto out;
1419 }
1420
1adf2230 1421 r = legacy_filesystem_command(pending, op, prefix, cmdmap, ss);
7c673cae
FG
1422
1423 if (r == -ENOSYS && ss.str().empty()) {
1424 ss << "unrecognized command";
1425 }
1426
1427out:
1428 dout(4) << __func__ << " done, r=" << r << dendl;
1429 /* Compose response */
1430 string rs;
1431 getline(ss, rs);
1432
1433 if (r >= 0) {
1434 // success.. delay reply
1435 wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs,
1436 get_last_committed() + 1));
c07f9fc5
FG
1437 if (batched_propose) {
1438 force_immediate_propose();
1439 }
7c673cae
FG
1440 return true;
1441 } else {
1442 // reply immediately
1443 mon->reply_command(op, r, rs, rdata, get_last_committed());
1444 return false;
1445 }
1446}
1447
7c673cae 1448int MDSMonitor::filesystem_command(
1adf2230 1449 FSMap &fsmap,
7c673cae
FG
1450 MonOpRequestRef op,
1451 std::string const &prefix,
1452 map<string, cmd_vartype> &cmdmap,
1453 std::stringstream &ss)
1454{
1455 dout(4) << __func__ << " prefix='" << prefix << "'" << dendl;
1456 op->mark_mdsmon_event(__func__);
1457 int r = 0;
1458 string whostr;
1459 cmd_getval(g_ceph_context, cmdmap, "who", whostr);
1460
1461 if (prefix == "mds stop" ||
1462 prefix == "mds deactivate") {
7c673cae 1463 mds_role_t role;
1adf2230 1464 r = fsmap.parse_role(whostr, &role, ss);
7c673cae
FG
1465 if (r < 0 ) {
1466 return r;
1467 }
1adf2230 1468 const auto &fs = fsmap.get_filesystem(role.fscid);
7c673cae
FG
1469
1470 if (!fs->mds_map.is_active(role.rank)) {
1471 r = -EEXIST;
1472 ss << "mds." << role << " not active ("
1473 << ceph_mds_state_name(fs->mds_map.get_state(role.rank)) << ")";
1474 } else if (fs->mds_map.get_root() == role.rank ||
1475 fs->mds_map.get_tableserver() == role.rank) {
1476 r = -EINVAL;
1477 ss << "can't tell the root (" << fs->mds_map.get_root()
1478 << ") or tableserver (" << fs->mds_map.get_tableserver()
1479 << ") to deactivate";
31f18b77
FG
1480 } else if (role.rank != fs->mds_map.get_last_in_mds()) {
1481 r = -EINVAL;
1482 ss << "mds." << role << " doesn't have the max rank ("
1483 << fs->mds_map.get_last_in_mds() << ")";
7c673cae
FG
1484 } else if (fs->mds_map.get_num_in_mds() <= size_t(fs->mds_map.get_max_mds())) {
1485 r = -EBUSY;
1486 ss << "must decrease max_mds or else MDS will immediately reactivate";
1487 } else {
1488 r = 0;
1489 mds_gid_t gid = fs->mds_map.up.at(role.rank);
1490 ss << "telling mds." << role << " "
1adf2230 1491 << fsmap.get_info_gid(gid).addr << " to deactivate";
7c673cae 1492
1adf2230 1493 fsmap.modify_daemon(gid, [](MDSMap::mds_info_t *info) {
7c673cae
FG
1494 info->state = MDSMap::STATE_STOPPING;
1495 });
1496 }
1497 } else if (prefix == "mds set_state") {
1498 mds_gid_t gid;
1499 if (!cmd_getval(g_ceph_context, cmdmap, "gid", gid)) {
1500 ss << "error parsing 'gid' value '"
1501 << cmd_vartype_stringify(cmdmap["gid"]) << "'";
1502 return -EINVAL;
1503 }
1504 MDSMap::DaemonState state;
1505 if (!cmd_getval(g_ceph_context, cmdmap, "state", state)) {
1506 ss << "error parsing 'state' string value '"
1507 << cmd_vartype_stringify(cmdmap["state"]) << "'";
1508 return -EINVAL;
1509 }
1adf2230
AA
1510 if (fsmap.gid_exists(gid)) {
1511 fsmap.modify_daemon(gid, [state](MDSMap::mds_info_t *info) {
7c673cae
FG
1512 info->state = state;
1513 });
1514 ss << "set mds gid " << gid << " to state " << state << " "
1515 << ceph_mds_state_name(state);
1516 return 0;
1517 }
1518 } else if (prefix == "mds fail") {
1519 string who;
1520 cmd_getval(g_ceph_context, cmdmap, "who", who);
d2e6a577
FG
1521
1522 MDSMap::mds_info_t failed_info;
1adf2230 1523 r = fail_mds(fsmap, ss, who, &failed_info);
7c673cae
FG
1524 if (r < 0 && r == -EAGAIN) {
1525 mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
1526 return -EAGAIN; // don't propose yet; wait for message to be retried
d2e6a577
FG
1527 } else if (r == 0) {
1528 // Only log if we really did something (not when was already gone)
1529 if (failed_info.global_id != MDS_GID_NONE) {
1530 mon->clog->info() << failed_info.human_name() << " marked failed by "
1531 << op->get_session()->entity_name;
1532 }
7c673cae
FG
1533 }
1534 } else if (prefix == "mds rm") {
1535 mds_gid_t gid;
1536 if (!cmd_getval(g_ceph_context, cmdmap, "gid", gid)) {
1537 ss << "error parsing 'gid' value '"
1538 << cmd_vartype_stringify(cmdmap["gid"]) << "'";
1539 return -EINVAL;
1540 }
1adf2230 1541 if (!fsmap.gid_exists(gid)) {
7c673cae
FG
1542 ss << "mds gid " << gid << " dne";
1543 r = 0;
1544 } else {
1adf2230 1545 const auto &info = fsmap.get_info_gid(gid);
28e407b8 1546 MDSMap::DaemonState state = info.state;
7c673cae 1547 if (state > 0) {
28e407b8
AA
1548 ss << "cannot remove active mds." << info.name
1549 << " rank " << info.rank;
7c673cae
FG
1550 return -EBUSY;
1551 } else {
1adf2230 1552 fsmap.erase(gid, {});
7c673cae
FG
1553 ss << "removed mds gid " << gid;
1554 return 0;
1555 }
1556 }
1557 } else if (prefix == "mds rmfailed") {
1558 string confirm;
1559 if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
1560 confirm != "--yes-i-really-mean-it") {
1561 ss << "WARNING: this can make your filesystem inaccessible! "
1562 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
1563 return -EPERM;
1564 }
1565
1566 std::string role_str;
1567 cmd_getval(g_ceph_context, cmdmap, "who", role_str);
1568 mds_role_t role;
1adf2230 1569 int r = fsmap.parse_role(role_str, &role, ss);
7c673cae
FG
1570 if (r < 0) {
1571 ss << "invalid role '" << role_str << "'";
1572 return -EINVAL;
1573 }
1574
1adf2230 1575 fsmap.modify_filesystem(
7c673cae
FG
1576 role.fscid,
1577 [role](std::shared_ptr<Filesystem> fs)
1578 {
1579 fs->mds_map.failed.erase(role.rank);
1580 });
1581
1582 ss << "removed failed mds." << role;
1583 return 0;
1584 } else if (prefix == "mds compat rm_compat") {
1585 int64_t f;
1586 if (!cmd_getval(g_ceph_context, cmdmap, "feature", f)) {
1587 ss << "error parsing feature value '"
1588 << cmd_vartype_stringify(cmdmap["feature"]) << "'";
1589 return -EINVAL;
1590 }
1adf2230 1591 if (fsmap.compat.compat.contains(f)) {
7c673cae 1592 ss << "removing compat feature " << f;
1adf2230 1593 CompatSet modified = fsmap.compat;
7c673cae 1594 modified.compat.remove(f);
1adf2230 1595 fsmap.update_compat(modified);
7c673cae 1596 } else {
1adf2230 1597 ss << "compat feature " << f << " not present in " << fsmap.compat;
7c673cae
FG
1598 }
1599 r = 0;
1600 } else if (prefix == "mds compat rm_incompat") {
1601 int64_t f;
1602 if (!cmd_getval(g_ceph_context, cmdmap, "feature", f)) {
1603 ss << "error parsing feature value '"
1604 << cmd_vartype_stringify(cmdmap["feature"]) << "'";
1605 return -EINVAL;
1606 }
1adf2230 1607 if (fsmap.compat.incompat.contains(f)) {
7c673cae 1608 ss << "removing incompat feature " << f;
1adf2230 1609 CompatSet modified = fsmap.compat;
7c673cae 1610 modified.incompat.remove(f);
1adf2230 1611 fsmap.update_compat(modified);
7c673cae 1612 } else {
1adf2230 1613 ss << "incompat feature " << f << " not present in " << fsmap.compat;
7c673cae
FG
1614 }
1615 r = 0;
1616 } else if (prefix == "mds repaired") {
1617 std::string role_str;
1618 cmd_getval(g_ceph_context, cmdmap, "rank", role_str);
1619 mds_role_t role;
1adf2230 1620 r = fsmap.parse_role(role_str, &role, ss);
7c673cae
FG
1621 if (r < 0) {
1622 return r;
1623 }
1624
1adf2230 1625 bool modified = fsmap.undamaged(role.fscid, role.rank);
7c673cae 1626 if (modified) {
91327a77 1627 dout(1) << "repaired: restoring rank " << role << dendl;
7c673cae 1628 } else {
91327a77 1629 dout(1) << "repaired: no-op on rank " << role << dendl;
7c673cae
FG
1630 }
1631
1632 r = 0;
1633 } else {
1634 return -ENOSYS;
1635 }
1636
1637 return r;
1638}
1639
1640/**
1641 * Helper to legacy_filesystem_command
1642 */
1adf2230 1643void MDSMonitor::modify_legacy_filesystem(FSMap &fsmap,
7c673cae
FG
1644 std::function<void(std::shared_ptr<Filesystem> )> fn)
1645{
1adf2230
AA
1646 fsmap.modify_filesystem(
1647 fsmap.legacy_client_fscid,
7c673cae
FG
1648 fn
1649 );
1650}
1651
1652
1653
1654/**
1655 * Handle a command that affects the filesystem (i.e. a filesystem
1656 * must exist for the command to act upon).
1657 *
1658 * @retval 0 Command was successfully handled and has side effects
1659 * @retval -EAGAIN Messages has been requeued for retry
1660 * @retval -ENOSYS Unknown command
1661 * @retval < 0 An error has occurred; **ss** may have been set.
1662 */
1663int MDSMonitor::legacy_filesystem_command(
1adf2230 1664 FSMap &fsmap,
7c673cae
FG
1665 MonOpRequestRef op,
1666 std::string const &prefix,
1667 map<string, cmd_vartype> &cmdmap,
1668 std::stringstream &ss)
1669{
1670 dout(4) << __func__ << " prefix='" << prefix << "'" << dendl;
1671 op->mark_mdsmon_event(__func__);
1672 int r = 0;
1673 string whostr;
1674 cmd_getval(g_ceph_context, cmdmap, "who", whostr);
1675
1adf2230 1676 assert (fsmap.legacy_client_fscid != FS_CLUSTER_ID_NONE);
7c673cae
FG
1677
1678 if (prefix == "mds set_max_mds") {
1679 // NOTE: deprecated by "fs set max_mds"
1680 int64_t maxmds;
1681 if (!cmd_getval(g_ceph_context, cmdmap, "maxmds", maxmds) || maxmds <= 0) {
1682 return -EINVAL;
1683 }
1684
1685 const MDSMap& mdsmap =
1adf2230 1686 fsmap.filesystems.at(fsmap.legacy_client_fscid)->mds_map;
7c673cae
FG
1687
1688 if (!mdsmap.allows_multimds() &&
1689 maxmds > mdsmap.get_max_mds() &&
1690 maxmds > 1) {
1691 ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
1692 return -EINVAL;
1693 }
1694
1695 if (maxmds > MAX_MDS) {
1696 ss << "may not have more than " << MAX_MDS << " MDS ranks";
1697 return -EINVAL;
1698 }
1699
1adf2230 1700 modify_legacy_filesystem(fsmap,
7c673cae
FG
1701 [maxmds](std::shared_ptr<Filesystem> fs)
1702 {
1703 fs->mds_map.set_max_mds(maxmds);
1704 });
1705
1706 r = 0;
1707 ss << "max_mds = " << maxmds;
1708 } else if (prefix == "mds cluster_down") {
1709 // NOTE: deprecated by "fs set cluster_down"
1adf2230 1710 modify_legacy_filesystem(fsmap,
7c673cae
FG
1711 [](std::shared_ptr<Filesystem> fs)
1712 {
1713 fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
1714 });
1715 ss << "marked fsmap DOWN";
1716 r = 0;
1717 } else if (prefix == "mds cluster_up") {
1718 // NOTE: deprecated by "fs set cluster_up"
1adf2230 1719 modify_legacy_filesystem(fsmap,
7c673cae
FG
1720 [](std::shared_ptr<Filesystem> fs)
1721 {
1722 fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
1723 });
1724 ss << "unmarked fsmap DOWN";
1725 r = 0;
1726 } else {
1727 return -ENOSYS;
1728 }
1729
1730 return r;
1731}
1732
1733
1734void MDSMonitor::check_subs()
1735{
1736 std::list<std::string> types;
1737
1738 // Subscriptions may be to "mdsmap" (MDS and legacy clients),
1739 // "mdsmap.<namespace>", or to "fsmap" for the full state of all
1740 // filesystems. Build a list of all the types we service
1741 // subscriptions for.
1742 types.push_back("fsmap");
1743 types.push_back("fsmap.user");
1744 types.push_back("mdsmap");
28e407b8
AA
1745 for (const auto &p : get_fsmap().filesystems) {
1746 const auto &fscid = p.first;
7c673cae
FG
1747 std::ostringstream oss;
1748 oss << "mdsmap." << fscid;
1749 types.push_back(oss.str());
1750 }
1751
1752 for (const auto &type : types) {
1753 if (mon->session_map.subs.count(type) == 0)
1754 continue;
1755 xlist<Subscription*>::iterator p = mon->session_map.subs[type]->begin();
1756 while (!p.end()) {
1757 Subscription *sub = *p;
1758 ++p;
1759 check_sub(sub);
1760 }
1761 }
1762}
1763
1764
1765void MDSMonitor::check_sub(Subscription *sub)
1766{
1767 dout(20) << __func__ << ": " << sub->type << dendl;
1768
28e407b8
AA
1769 const auto &fsmap = get_fsmap();
1770
7c673cae
FG
1771 if (sub->type == "fsmap") {
1772 if (sub->next <= fsmap.get_epoch()) {
1773 sub->session->con->send_message(new MFSMap(mon->monmap->fsid, fsmap));
1774 if (sub->onetime) {
1775 mon->session_map.remove_sub(sub);
1776 } else {
1777 sub->next = fsmap.get_epoch() + 1;
1778 }
1779 }
1780 } else if (sub->type == "fsmap.user") {
1781 if (sub->next <= fsmap.get_epoch()) {
1782 FSMapUser fsmap_u;
1783 fsmap_u.epoch = fsmap.get_epoch();
1784 fsmap_u.legacy_client_fscid = fsmap.legacy_client_fscid;
28e407b8
AA
1785 for (const auto &p : fsmap.filesystems) {
1786 FSMapUser::fs_info_t& fs_info = fsmap_u.filesystems[p.second->fscid];
1787 fs_info.cid = p.second->fscid;
1788 fs_info.name = p.second->mds_map.fs_name;
7c673cae
FG
1789 }
1790 sub->session->con->send_message(new MFSMapUser(mon->monmap->fsid, fsmap_u));
1791 if (sub->onetime) {
1792 mon->session_map.remove_sub(sub);
1793 } else {
1794 sub->next = fsmap.get_epoch() + 1;
1795 }
1796 }
1797 } else if (sub->type.compare(0, 6, "mdsmap") == 0) {
1798 if (sub->next > fsmap.get_epoch()) {
1799 return;
1800 }
1801
1802 const bool is_mds = sub->session->inst.name.is_mds();
1803 mds_gid_t mds_gid = MDS_GID_NONE;
1804 fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
1805 if (is_mds) {
1806 // What (if any) namespace are you assigned to?
1807 auto mds_info = fsmap.get_mds_info();
1adf2230
AA
1808 for (const auto &p : mds_info) {
1809 if (p.second.addr == sub->session->inst.addr) {
1810 mds_gid = p.first;
7c673cae
FG
1811 fscid = fsmap.mds_roles.at(mds_gid);
1812 }
1813 }
1814 } else {
1815 // You're a client. Did you request a particular
1816 // namespace?
1817 if (sub->type.find("mdsmap.") == 0) {
1818 auto namespace_id_str = sub->type.substr(std::string("mdsmap.").size());
1819 dout(10) << __func__ << ": namespace_id " << namespace_id_str << dendl;
1820 std::string err;
1821 fscid = strict_strtoll(namespace_id_str.c_str(), 10, &err);
1822 if (!err.empty()) {
1823 // Client asked for a non-existent namespace, send them nothing
1824 dout(1) << "Invalid client subscription '" << sub->type
1825 << "'" << dendl;
1826 return;
1827 }
1828 if (fsmap.filesystems.count(fscid) == 0) {
1829 // Client asked for a non-existent namespace, send them nothing
1830 // TODO: something more graceful for when a client has a filesystem
1831 // mounted, and the fileysstem is deleted. Add a "shut down you fool"
1832 // flag to MMDSMap?
1833 dout(1) << "Client subscribed to non-existent namespace '" <<
1834 fscid << "'" << dendl;
1835 return;
1836 }
1837 } else {
1838 // Unqualified request for "mdsmap": give it the one marked
1839 // for use by legacy clients.
1840 if (fsmap.legacy_client_fscid != FS_CLUSTER_ID_NONE) {
1841 fscid = fsmap.legacy_client_fscid;
1842 } else {
1843 dout(1) << "Client subscribed for legacy filesystem but "
1844 "none is configured" << dendl;
1845 return;
1846 }
1847 }
1848 }
1849 dout(10) << __func__ << ": is_mds=" << is_mds << ", fscid= " << fscid << dendl;
1850
1851 // Work out the effective latest epoch
28e407b8 1852 const MDSMap *mds_map = nullptr;
7c673cae
FG
1853 MDSMap null_map;
1854 null_map.compat = fsmap.compat;
1855 if (fscid == FS_CLUSTER_ID_NONE) {
1856 // For a client, we should have already dropped out
1857 assert(is_mds);
1858
28e407b8
AA
1859 auto it = fsmap.standby_daemons.find(mds_gid);
1860 if (it != fsmap.standby_daemons.end()) {
7c673cae 1861 // For an MDS, we need to feed it an MDSMap with its own state in
28e407b8
AA
1862 null_map.mds_info[mds_gid] = it->second;
1863 null_map.epoch = fsmap.standby_epochs.at(mds_gid);
7c673cae
FG
1864 } else {
1865 null_map.epoch = fsmap.epoch;
1866 }
1867 mds_map = &null_map;
1868 } else {
1869 // Check the effective epoch
28e407b8 1870 mds_map = &fsmap.get_filesystem(fscid)->mds_map;
7c673cae
FG
1871 }
1872
1873 assert(mds_map != nullptr);
1874 dout(10) << __func__ << " selected MDS map epoch " <<
1875 mds_map->epoch << " for namespace " << fscid << " for subscriber "
1876 << sub->session->inst.name << " who wants epoch " << sub->next << dendl;
1877
1878 if (sub->next > mds_map->epoch) {
1879 return;
1880 }
1881 auto msg = new MMDSMap(mon->monmap->fsid, mds_map);
1882
1883 sub->session->con->send_message(msg);
1884 if (sub->onetime) {
1885 mon->session_map.remove_sub(sub);
1886 } else {
1887 sub->next = mds_map->get_epoch() + 1;
1888 }
1889 }
1890}
1891
1892
1893void MDSMonitor::update_metadata(mds_gid_t gid,
1894 const map<string, string>& metadata)
1895{
1896 if (metadata.empty()) {
1897 return;
1898 }
1899 pending_metadata[gid] = metadata;
1900
1901 MonitorDBStore::TransactionRef t = paxos->get_pending_transaction();
1902 bufferlist bl;
1903 ::encode(pending_metadata, bl);
1904 t->put(MDS_METADATA_PREFIX, "last_metadata", bl);
1905 paxos->trigger_propose();
1906}
1907
1adf2230 1908void MDSMonitor::remove_from_metadata(const FSMap &fsmap, MonitorDBStore::TransactionRef t)
7c673cae
FG
1909{
1910 bool update = false;
1adf2230
AA
1911 for (auto it = pending_metadata.begin(); it != pending_metadata.end(); ) {
1912 if (!fsmap.gid_exists(it->first)) {
1913 it = pending_metadata.erase(it);
7c673cae
FG
1914 update = true;
1915 } else {
1adf2230 1916 ++it;
7c673cae
FG
1917 }
1918 }
1919 if (!update)
1920 return;
1921 bufferlist bl;
1922 ::encode(pending_metadata, bl);
1923 t->put(MDS_METADATA_PREFIX, "last_metadata", bl);
1924}
1925
1926int MDSMonitor::load_metadata(map<mds_gid_t, Metadata>& m)
1927{
1928 bufferlist bl;
1929 int r = mon->store->get(MDS_METADATA_PREFIX, "last_metadata", bl);
1930 if (r) {
1931 dout(1) << "Unable to load 'last_metadata'" << dendl;
1932 return r;
1933 }
1934
1935 bufferlist::iterator it = bl.begin();
1936 ::decode(m, it);
1937 return 0;
1938}
1939
1adf2230 1940void MDSMonitor::count_metadata(const std::string &field, map<string,int> *out)
31f18b77 1941{
31f18b77
FG
1942 map<mds_gid_t,Metadata> meta;
1943 load_metadata(meta);
1944 for (auto& p : meta) {
1945 auto q = p.second.find(field);
1946 if (q == p.second.end()) {
c07f9fc5 1947 (*out)["unknown"]++;
31f18b77 1948 } else {
c07f9fc5 1949 (*out)[q->second]++;
31f18b77
FG
1950 }
1951 }
c07f9fc5
FG
1952}
1953
1adf2230 1954void MDSMonitor::count_metadata(const std::string &field, Formatter *f)
c07f9fc5
FG
1955{
1956 map<string,int> by_val;
1957 count_metadata(field, &by_val);
31f18b77
FG
1958 f->open_object_section(field.c_str());
1959 for (auto& p : by_val) {
1960 f->dump_int(p.first.c_str(), p.second);
1961 }
1962 f->close_section();
1963}
1964
1adf2230
AA
1965int MDSMonitor::dump_metadata(const FSMap& fsmap, const std::string &who,
1966 Formatter *f, ostream& err)
7c673cae
FG
1967{
1968 assert(f);
1969
1adf2230 1970 mds_gid_t gid = gid_from_arg(fsmap, who, err);
7c673cae
FG
1971 if (gid == MDS_GID_NONE) {
1972 return -EINVAL;
1973 }
1974
1975 map<mds_gid_t, Metadata> metadata;
1976 if (int r = load_metadata(metadata)) {
1977 err << "Unable to load 'last_metadata'";
1978 return r;
1979 }
1980
1981 if (!metadata.count(gid)) {
1982 return -ENOENT;
1983 }
1984 const Metadata& m = metadata[gid];
1985 for (Metadata::const_iterator p = m.begin(); p != m.end(); ++p) {
1986 f->dump_string(p->first.c_str(), p->second);
1987 }
1988 return 0;
1989}
1990
1991int MDSMonitor::print_nodes(Formatter *f)
1992{
1993 assert(f);
1994
1adf2230
AA
1995 const auto &fsmap = get_fsmap();
1996
7c673cae
FG
1997 map<mds_gid_t, Metadata> metadata;
1998 if (int r = load_metadata(metadata)) {
1999 return r;
2000 }
2001
2002 map<string, list<int> > mdses; // hostname => rank
1adf2230
AA
2003 for (const auto &p : metadata) {
2004 const mds_gid_t& gid = p.first;
2005 const Metadata& m = p.second;
7c673cae
FG
2006 Metadata::const_iterator hostname = m.find("hostname");
2007 if (hostname == m.end()) {
2008 // not likely though
2009 continue;
2010 }
1adf2230 2011 if (!fsmap.gid_exists(gid)) {
7c673cae
FG
2012 dout(5) << __func__ << ": GID " << gid << " not existent" << dendl;
2013 continue;
2014 }
1adf2230 2015 const MDSMap::mds_info_t& mds_info = fsmap.get_info_gid(gid);
7c673cae
FG
2016 // FIXME: include filesystem name with rank here
2017 mdses[hostname->second].push_back(mds_info.rank);
2018 }
2019
2020 dump_services(f, mdses, "mds");
2021 return 0;
2022}
2023
2024/**
2025 * If a cluster is undersized (with respect to max_mds), then
2026 * attempt to find daemons to grow it.
2027 */
1adf2230 2028bool MDSMonitor::maybe_expand_cluster(FSMap &fsmap, fs_cluster_id_t fscid)
7c673cae 2029{
1adf2230
AA
2030 auto fs = fsmap.get_filesystem(fscid);
2031 auto &mds_map = fs->mds_map;
7c673cae
FG
2032
2033 if (fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
1adf2230 2034 return false;
7c673cae
FG
2035 }
2036
1adf2230
AA
2037 int in = mds_map.get_num_in_mds();
2038 int max = mds_map.get_max_mds();
2039
2040 dout(20) << __func__ << " in " << in << " max " << max << dendl;
2041
2042 if (in < max) {
7c673cae
FG
2043 mds_rank_t mds = mds_rank_t(0);
2044 string name;
1adf2230 2045 while (mds_map.is_in(mds)) {
7c673cae
FG
2046 mds++;
2047 }
1adf2230 2048 mds_gid_t newgid = fsmap.find_replacement_for({fscid, mds},
7c673cae
FG
2049 name, g_conf->mon_force_standby_active);
2050 if (newgid == MDS_GID_NONE) {
1adf2230 2051 return false;
7c673cae
FG
2052 }
2053
1adf2230 2054 const auto &new_info = fsmap.get_info_gid(newgid);
d2e6a577 2055 dout(1) << "assigned standby " << new_info.addr
7c673cae 2056 << " as mds." << mds << dendl;
d2e6a577
FG
2057
2058 mon->clog->info() << new_info.human_name() << " assigned to "
1adf2230
AA
2059 "filesystem " << mds_map.fs_name << " as rank "
2060 << mds << " (now has " << mds_map.get_num_in_mds() + 1
d2e6a577 2061 << " ranks)";
1adf2230
AA
2062 fsmap.promote(newgid, fs, mds);
2063 return true;
7c673cae
FG
2064 }
2065
1adf2230 2066 return false;
7c673cae
FG
2067}
2068
2069
2070/**
2071 * If a daemon is laggy, and a suitable replacement
2072 * is available, fail this daemon (remove from map) and pass its
2073 * role to another daemon.
2074 */
1adf2230
AA
2075void MDSMonitor::maybe_replace_gid(FSMap &fsmap, mds_gid_t gid,
2076 const MDSMap::mds_info_t& info, bool *mds_propose, bool *osd_propose)
7c673cae
FG
2077{
2078 assert(mds_propose != nullptr);
2079 assert(osd_propose != nullptr);
2080
1adf2230 2081 const auto fscid = fsmap.mds_roles.at(gid);
7c673cae 2082
31f18b77
FG
2083 // We will only take decisive action (replacing/removing a daemon)
2084 // if we have some indicating that some other daemon(s) are successfully
2085 // getting beacons through recently.
1adf2230
AA
2086 mono_time latest_beacon = mono_clock::zero();
2087 for (const auto &p : last_beacon) {
2088 latest_beacon = std::max(p.second.stamp, latest_beacon);
31f18b77 2089 }
1adf2230
AA
2090 mono_time now = mono_clock::now();
2091 chrono::duration<double> since = now-latest_beacon;
2092 const bool may_replace = since.count() <
2093 std::max(g_conf->mds_beacon_interval, g_conf->mds_beacon_grace * 0.5);
31f18b77 2094
7c673cae
FG
2095 // are we in?
2096 // and is there a non-laggy standby that can take over for us?
2097 mds_gid_t sgid;
2098 if (info.rank >= 0 &&
2099 info.state != MDSMap::STATE_STANDBY &&
2100 info.state != MDSMap::STATE_STANDBY_REPLAY &&
31f18b77 2101 may_replace &&
1adf2230
AA
2102 !fsmap.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
2103 (sgid = fsmap.find_replacement_for({fscid, info.rank}, info.name,
7c673cae
FG
2104 g_conf->mon_force_standby_active)) != MDS_GID_NONE)
2105 {
2106
1adf2230 2107 MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
91327a77 2108 dout(1) << " replacing " << gid << " " << info.addr << " mds."
7c673cae
FG
2109 << info.rank << "." << info.inc
2110 << " " << ceph_mds_state_name(info.state)
2111 << " with " << sgid << "/" << si.name << " " << si.addr << dendl;
2112
d2e6a577 2113 mon->clog->warn() << info.human_name()
31f18b77
FG
2114 << " is not responding, replacing it "
2115 << "as rank " << info.rank
d2e6a577 2116 << " with standby " << si.human_name();
31f18b77 2117
7c673cae 2118 // Remember what NS the old one was in
1adf2230 2119 const fs_cluster_id_t fscid = fsmap.mds_roles.at(gid);
7c673cae
FG
2120
2121 // Remove the old one
1adf2230 2122 *osd_propose |= fail_mds_gid(fsmap, gid);
7c673cae
FG
2123
2124 // Promote the replacement
1adf2230
AA
2125 auto fs = fsmap.filesystems.at(fscid);
2126 fsmap.promote(sgid, fs, info.rank);
7c673cae
FG
2127
2128 *mds_propose = true;
31f18b77
FG
2129 } else if ((info.state == MDSMap::STATE_STANDBY_REPLAY ||
2130 info.state == MDSMap::STATE_STANDBY) && may_replace) {
91327a77 2131 dout(1) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank
7c673cae
FG
2132 << "." << info.inc << " " << ceph_mds_state_name(info.state)
2133 << dendl;
d2e6a577
FG
2134 mon->clog->info() << "Standby " << info.human_name() << " is not "
2135 "responding, dropping it";
1adf2230 2136 fail_mds_gid(fsmap, gid);
7c673cae
FG
2137 *mds_propose = true;
2138 } else if (!info.laggy()) {
91327a77 2139 dout(1) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc
7c673cae
FG
2140 << " " << ceph_mds_state_name(info.state)
2141 << " laggy" << dendl;
1adf2230 2142 fsmap.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info) {
7c673cae
FG
2143 info->laggy_since = ceph_clock_now();
2144 });
2145 *mds_propose = true;
2146 }
2147}
2148
1adf2230 2149bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, std::shared_ptr<Filesystem> &fs)
7c673cae
FG
2150{
2151 assert(!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN));
2152
2153 bool do_propose = false;
2154
2155 // have a standby take over?
2156 set<mds_rank_t> failed;
2157 fs->mds_map.get_failed_mds_set(failed);
2158 if (!failed.empty()) {
2159 set<mds_rank_t>::iterator p = failed.begin();
2160 while (p != failed.end()) {
2161 mds_rank_t f = *p++;
1adf2230 2162 mds_gid_t sgid = fsmap.find_replacement_for({fs->fscid, f}, {},
7c673cae
FG
2163 g_conf->mon_force_standby_active);
2164 if (sgid) {
1adf2230 2165 const MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
91327a77 2166 dout(1) << " taking over failed mds." << f << " with " << sgid
7c673cae 2167 << "/" << si.name << " " << si.addr << dendl;
d2e6a577
FG
2168 mon->clog->info() << "Standby " << si.human_name()
2169 << " assigned to filesystem " << fs->mds_map.fs_name
2170 << " as rank " << f;
2171
1adf2230 2172 fsmap.promote(sgid, fs, f);
7c673cae
FG
2173 do_propose = true;
2174 }
2175 }
2176 } else {
2177 // There were no failures to replace, so try using any available standbys
2178 // as standby-replay daemons.
2179
2180 // Take a copy of the standby GIDs so that we can iterate over
2181 // them while perhaps-modifying standby_daemons during the loop
2182 // (if we promote anyone they are removed from standby_daemons)
2183 std::vector<mds_gid_t> standby_gids;
1adf2230 2184 for (const auto &j : fsmap.standby_daemons) {
7c673cae
FG
2185 standby_gids.push_back(j.first);
2186 }
2187
2188 for (const auto &gid : standby_gids) {
1adf2230 2189 const auto &info = fsmap.standby_daemons.at(gid);
7c673cae
FG
2190 assert(info.state == MDSMap::STATE_STANDBY);
2191
2192 if (!info.standby_replay) {
2193 continue;
2194 }
2195
2196 /*
2197 * This mds is standby but has no rank assigned.
2198 * See if we can find it somebody to shadow
2199 */
2200 dout(20) << "gid " << gid << " is standby and following nobody" << dendl;
2201
2202 // standby for someone specific?
2203 if (info.standby_for_rank >= 0) {
2204 // The mds_info_t may or may not tell us exactly which filesystem
2205 // the standby_for_rank refers to: lookup via legacy_client_fscid
2206 mds_role_t target_role = {
2207 info.standby_for_fscid == FS_CLUSTER_ID_NONE ?
1adf2230 2208 fsmap.legacy_client_fscid : info.standby_for_fscid,
7c673cae
FG
2209 info.standby_for_rank};
2210
2211 // It is possible that the map contains a standby_for_fscid
2212 // that doesn't correspond to an existing filesystem, especially
2213 // if we loaded from a version with a bug (#17466)
2214 if (info.standby_for_fscid != FS_CLUSTER_ID_NONE
1adf2230 2215 && !fsmap.filesystem_exists(info.standby_for_fscid)) {
7c673cae
FG
2216 derr << "gid " << gid << " has invalid standby_for_fscid "
2217 << info.standby_for_fscid << dendl;
2218 continue;
2219 }
2220
2221 // If we managed to resolve a full target role
2222 if (target_role.fscid != FS_CLUSTER_ID_NONE) {
1adf2230 2223 const auto &fs = fsmap.get_filesystem(target_role.fscid);
7c673cae 2224 if (fs->mds_map.is_followable(target_role.rank)) {
1adf2230 2225 do_propose |= try_standby_replay(fsmap, info, *fs,
7c673cae
FG
2226 fs->mds_map.get_info(target_role.rank));
2227 }
2228 }
2229
2230 continue;
2231 }
2232
2233 // check everyone
1adf2230 2234 for (const auto &p : fsmap.filesystems) {
28e407b8
AA
2235 if (info.standby_for_fscid != FS_CLUSTER_ID_NONE &&
2236 info.standby_for_fscid != p.first)
2237 continue;
2238
2239 bool assigned = false;
2240 const auto &fs = p.second;
2241 const MDSMap &mds_map = fs->mds_map;
2242 for (const auto &mds_i : mds_map.mds_info) {
2243 const MDSMap::mds_info_t &cand_info = mds_i.second;
7c673cae
FG
2244 if (cand_info.rank >= 0 && mds_map.is_followable(cand_info.rank)) {
2245 if ((info.standby_for_name.length() && info.standby_for_name != cand_info.name) ||
2246 info.standby_for_rank != MDS_RANK_NONE) {
2247 continue; // we're supposed to follow someone else
2248 }
2249
1adf2230 2250 if (try_standby_replay(fsmap, info, *fs, cand_info)) {
28e407b8 2251 assigned = true;
7c673cae
FG
2252 break;
2253 }
7c673cae
FG
2254 }
2255 }
28e407b8
AA
2256 if (assigned) {
2257 do_propose = true;
2258 break;
2259 }
7c673cae
FG
2260 }
2261 }
2262 }
2263
2264 return do_propose;
2265}
2266
2267void MDSMonitor::tick()
2268{
2269 // make sure mds's are still alive
2270 // ...if i am an active leader
28e407b8 2271
1adf2230 2272 if (!is_active() || !is_leader()) return;
28e407b8
AA
2273
2274 auto &pending = get_pending_fsmap_writeable();
7c673cae 2275
28e407b8 2276 bool do_propose = false;
7c673cae 2277
28e407b8 2278 do_propose |= pending.check_health();
7c673cae
FG
2279
2280 // expand mds cluster (add new nodes to @in)?
28e407b8 2281 for (auto &p : pending.filesystems) {
1adf2230 2282 do_propose |= maybe_expand_cluster(pending, p.second->fscid);
7c673cae
FG
2283 }
2284
1adf2230
AA
2285 mono_time now = mono_clock::now();
2286 if (last_tick == decltype(last_tick)::min()) {
7c673cae
FG
2287 last_tick = now;
2288 }
1adf2230 2289 chrono::duration<double> since_last = now-last_tick;
7c673cae 2290
1adf2230
AA
2291 if (since_last.count() >
2292 (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) {
7c673cae
FG
2293 // This case handles either local slowness (calls being delayed
2294 // for whatever reason) or cluster election slowness (a long gap
2295 // between calls while an election happened)
91327a77 2296 dout(1) << __func__ << ": resetting beacon timeouts due to mon delay "
7c673cae 2297 "(slow election?) of " << now - last_tick << " seconds" << dendl;
1adf2230
AA
2298 for (auto &p : last_beacon) {
2299 p.second.stamp = now;
7c673cae
FG
2300 }
2301 }
2302
2303 last_tick = now;
2304
7c673cae 2305 // make sure last_beacon is fully populated
28e407b8 2306 for (auto &p : pending.mds_roles) {
7c673cae 2307 auto &gid = p.first;
1adf2230
AA
2308 last_beacon.emplace(std::piecewise_construct,
2309 std::forward_as_tuple(gid),
2310 std::forward_as_tuple(mono_clock::now(), 0));
7c673cae
FG
2311 }
2312
1adf2230
AA
2313
2314 // check beacon timestamps
c07f9fc5
FG
2315 bool propose_osdmap = false;
2316 bool osdmap_writeable = mon->osdmon()->is_writeable();
1adf2230
AA
2317 for (auto it = last_beacon.begin(); it != last_beacon.end(); ) {
2318 mds_gid_t gid = it->first;
2319 auto beacon_info = it->second;
2320 chrono::duration<double> since_last = now-beacon_info.stamp;
7c673cae 2321
28e407b8 2322 if (!pending.gid_exists(gid)) {
c07f9fc5 2323 // clean it out
1adf2230 2324 it = last_beacon.erase(it);
c07f9fc5 2325 continue;
7c673cae
FG
2326 }
2327
1adf2230
AA
2328
2329 if (since_last.count() >= g_conf->mds_beacon_grace) {
28e407b8 2330 auto &info = pending.get_info_gid(gid);
c07f9fc5
FG
2331 dout(1) << "no beacon from mds." << info.rank << "." << info.inc
2332 << " (gid: " << gid << " addr: " << info.addr
2333 << " state: " << ceph_mds_state_name(info.state) << ")"
1adf2230 2334 << " since " << since_last.count() << "s" << dendl;
c07f9fc5
FG
2335 // If the OSDMap is writeable, we can blacklist things, so we can
2336 // try failing any laggy MDS daemons. Consider each one for failure.
2337 if (osdmap_writeable) {
1adf2230 2338 maybe_replace_gid(pending, gid, info, &do_propose, &propose_osdmap);
c07f9fc5 2339 }
7c673cae 2340 }
1adf2230
AA
2341
2342 ++it;
7c673cae 2343 }
c07f9fc5
FG
2344 if (propose_osdmap) {
2345 request_proposal(mon->osdmon());
2346 }
7c673cae 2347
28e407b8
AA
2348 for (auto &p : pending.filesystems) {
2349 auto &fs = p.second;
7c673cae 2350 if (!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
1adf2230 2351 do_propose |= maybe_promote_standby(pending, fs);
7c673cae
FG
2352 }
2353 }
2354
2355 if (do_propose) {
2356 propose_pending();
2357 }
2358}
2359
2360/**
2361 * finfo: the would-be follower
2362 * leader_fs: the Filesystem containing the would-be leader
2363 * ainfo: the would-be leader
2364 */
2365bool MDSMonitor::try_standby_replay(
1adf2230 2366 FSMap &fsmap,
7c673cae
FG
2367 const MDSMap::mds_info_t& finfo,
2368 const Filesystem &leader_fs,
2369 const MDSMap::mds_info_t& ainfo)
2370{
2371 // someone else already following?
2372 if (leader_fs.has_standby_replay(ainfo.global_id)) {
2373 dout(20) << " mds." << ainfo.rank << " already has a follower" << dendl;
2374 return false;
2375 } else {
2376 // Assign the new role to the standby
2377 dout(10) << " setting to follow mds rank " << ainfo.rank << dendl;
1adf2230 2378 fsmap.assign_standby_replay(finfo.global_id, leader_fs.fscid, ainfo.rank);
7c673cae
FG
2379 return true;
2380 }
2381}
2382
2383MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name)
2384 : PaxosService(mn, p, service_name)
2385{
c07f9fc5 2386 handlers = FileSystemCommandHandler::load(p);
7c673cae
FG
2387}
2388
2389void MDSMonitor::on_restart()
2390{
2391 // Clear out the leader-specific state.
1adf2230 2392 last_tick = mono_clock::now();
7c673cae
FG
2393 last_beacon.clear();
2394}
2395