PaxosFSMap::decode(fsmap_bl);
// new map
- dout(4) << "new map" << dendl;
+ dout(0) << "new map" << dendl;
print_map(get_fsmap(), 0);
if (!g_conf->mon_mds_skip_sanity) {
get_fsmap().sanity();
mds_gid_t gid = mds_gid_t(m->get_global_id());
version_t seq = m->get_seq();
- dout(15) << "_note_beacon " << *m << " noting time" << dendl;
+ dout(5) << "_note_beacon " << *m << " noting time" << dendl;
auto &beacon = last_beacon[gid];
beacon.stamp = mono_clock::now();
beacon.seq = seq;
goto ignore;
}
- dout(12) << "preprocess_beacon " << *m
+ dout(5) << "preprocess_beacon " << *m
<< " from " << m->get_orig_source_inst()
<< " " << m->get_compat()
<< dendl;
// and return false (i.e. require proposal) if they
// do not match, to update our stored
if (!(pending_daemon_health[gid] == m->get_health())) {
- dout(20) << __func__ << " health metrics for gid " << gid << " were updated" << dendl;
+ dout(10) << __func__ << " health metrics for gid " << gid << " were updated" << dendl;
_note_beacon(m);
return false;
}
auto &pending = get_pending_fsmap_writeable();
- dout(20) << __func__ << " got health from gid " << gid << " with " << m->get_health().metrics.size() << " metrics." << dendl;
+ dout(15) << __func__ << " got health from gid " << gid << " with " << m->get_health().metrics.size() << " metrics." << dendl;
// Calculate deltas of health metrics created and removed
// Do this by type rather than MDSHealthMetric equality, because messages can
update_metadata(m->get_global_id(), m->get_sys_info());
} else {
// state update
+
+ if (!pending.gid_exists(gid)) {
+ /* gid has been removed from pending, send null map */
+ dout(5) << "mds_beacon " << *m << " is not in fsmap (state "
+ << ceph_mds_state_name(state) << ")" << dendl;
+
+ /* We can't send an MDSMap this MDS was a part of because we no longer
+ * know which FS it was part of. Nor does this matter. Sending an empty
+ * MDSMap is sufficient for getting the MDS to respawn.
+ */
+ wait_for_finished_proposal(op, new FunctionContext([op, this](int r){
+ if (r >= 0) {
+ const auto& fsmap = get_fsmap();
+ MDSMap null_map;
+ null_map.epoch = fsmap.epoch;
+ null_map.compat = fsmap.compat;
+ mon->send_reply(op, new MMDSMap(mon->monmap->fsid, &null_map));
+ } else {
+ dispatch(op); // try again
+ }
+ }));
+ return true;
+ }
+
const MDSMap::mds_info_t &info = pending.get_info_gid(gid);
// Old MDS daemons don't mention that they're standby replay until
// after they've sent their boot beacon, so update this field.
}
if (info.laggy()) {
- dout(10) << "prepare_beacon clearing laggy flag on " << addr << dendl;
+ dout(1) << "prepare_beacon clearing laggy flag on " << addr << dendl;
pending.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info)
{
info->clear_laggy();
);
}
- dout(10) << "prepare_beacon mds." << info.rank
+ dout(5) << "prepare_beacon mds." << info.rank
<< " " << ceph_mds_state_name(info.state)
<< " -> " << ceph_mds_state_name(state)
<< " standby_for_rank=" << m->get_standby_for_rank()
} else if (state == MDSMap::STATE_DAMAGED) {
if (!mon->osdmon()->is_writeable()) {
- dout(4) << __func__ << ": DAMAGED from rank " << info.rank
+ dout(1) << __func__ << ": DAMAGED from rank " << info.rank
<< " waiting for osdmon writeable to blacklist it" << dendl;
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return false;
// Record this MDS rank as damaged, so that other daemons
// won't try to run it.
- dout(4) << __func__ << ": marking rank "
+ dout(0) << __func__ << ": marking rank "
<< info.rank << " damaged" << dendl;
utime_t until = ceph_clock_now();
CEPH_FEATURES_SUPPORTED_DEFAULT));
} else if (state == MDSMap::STATE_DNE) {
if (!mon->osdmon()->is_writeable()) {
- dout(4) << __func__ << ": DNE from rank " << info.rank
+ dout(1) << __func__ << ": DNE from rank " << info.rank
<< " waiting for osdmon writeable to blacklist it" << dendl;
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return false;
}
}
- dout(7) << "prepare_beacon pending map now:" << dendl;
+ dout(5) << "prepare_beacon pending map now:" << dendl;
print_map(pending);
wait_for_finished_proposal(op, new FunctionContext([op, this](int r){
} else {
dout(10) << "prepare_offload_targets " << gid << " not in map" << dendl;
}
+ mon->no_reply(op);
return true;
}
bool MDSMonitor::fail_mds_gid(FSMap &fsmap, mds_gid_t gid)
{
const MDSMap::mds_info_t &info = fsmap.get_info_gid(gid);
- dout(10) << "fail_mds_gid " << gid << " mds." << info.name << " role " << info.rank << dendl;
+ dout(1) << "fail_mds_gid " << gid << " mds." << info.name << " role " << info.rank << dendl;
epoch_t blacklist_epoch = 0;
if (info.rank >= 0 && info.state != MDSMap::STATE_STANDBY_REPLAY) {
bool modified = fsmap.undamaged(role.fscid, role.rank);
if (modified) {
- dout(4) << "repaired: restoring rank " << role << dendl;
+ dout(1) << "repaired: restoring rank " << role << dendl;
} else {
- dout(4) << "repaired: no-op on rank " << role << dendl;
+ dout(1) << "repaired: no-op on rank " << role << dendl;
}
r = 0;
{
MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
- dout(10) << " replacing " << gid << " " << info.addr << " mds."
+ dout(1) << " replacing " << gid << " " << info.addr << " mds."
<< info.rank << "." << info.inc
<< " " << ceph_mds_state_name(info.state)
<< " with " << sgid << "/" << si.name << " " << si.addr << dendl;
*mds_propose = true;
} else if ((info.state == MDSMap::STATE_STANDBY_REPLAY ||
info.state == MDSMap::STATE_STANDBY) && may_replace) {
- dout(10) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank
+ dout(1) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank
<< "." << info.inc << " " << ceph_mds_state_name(info.state)
<< dendl;
mon->clog->info() << "Standby " << info.human_name() << " is not "
fail_mds_gid(fsmap, gid);
*mds_propose = true;
} else if (!info.laggy()) {
- dout(10) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc
+ dout(1) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc
<< " " << ceph_mds_state_name(info.state)
<< " laggy" << dendl;
fsmap.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info) {
g_conf->mon_force_standby_active);
if (sgid) {
const MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
- dout(0) << " taking over failed mds." << f << " with " << sgid
+ dout(1) << " taking over failed mds." << f << " with " << sgid
<< "/" << si.name << " " << si.addr << dendl;
mon->clog->info() << "Standby " << si.human_name()
<< " assigned to filesystem " << fs->mds_map.fs_name
// This case handles either local slowness (calls being delayed
// for whatever reason) or cluster election slowness (a long gap
// between calls while an election happened)
- dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
+ dout(1) << __func__ << ": resetting beacon timeouts due to mon delay "
"(slow election?) of " << now - last_tick << " seconds" << dendl;
for (auto &p : last_beacon) {
p.second.stamp = now;