health.decode(bl_i);
}
for (const auto &metric : health.metrics) {
- int const rank = info.rank;
+ const int rank = info.rank;
health_check_t *check = &new_checks.get_or_add(
mds_metric_name(metric.type),
metric.sev,
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return false;
}
- mon->clog->info() << "MDS daemon '" << m->get_name() << "' restarted";
+ const MDSMap::mds_info_t &existing_info =
+ pending_fsmap.get_info_gid(existing);
+ mon->clog->info() << existing_info.human_name() << " restarted";
fail_mds_gid(existing);
failed_mds = true;
}
<< " standby_for_rank=" << m->get_standby_for_rank()
<< dendl;
if (state == MDSMap::STATE_STOPPED) {
+ const auto fscid = pending_fsmap.mds_roles.at(gid);
+ auto fs = pending_fsmap.get_filesystem(fscid);
+ mon->clog->info() << info.human_name() << " finished "
+ << "deactivating rank " << info.rank << " in filesystem "
+ << fs->mds_map.fs_name << " (now has "
+ << fs->mds_map.get_num_in_mds() << " ranks)";
+
auto erased = pending_fsmap.stop(gid);
erased.push_back(gid);
pending_daemon_health_rm.insert(erased_gid);
}
}
+
+
} else if (state == MDSMap::STATE_DAMAGED) {
if (!mon->osdmon()->is_writeable()) {
dout(4) << __func__ << ": DAMAGED from rank " << info.rank
info->state = state;
info->state_seq = seq;
});
+
+ if (state == MDSMap::STATE_ACTIVE) {
+ auto fscid = pending_fsmap.mds_roles.at(gid);
+ auto fs = pending_fsmap.get_filesystem(fscid);
+ mon->clog->info() << info.human_name() << " is now active in "
+ << "filesystem " << fs->mds_map.fs_name << " as rank "
+ << info.rank;
+ }
}
}
op->mark_mdsmon_event(__func__);
MMDSBeacon *m = static_cast<MMDSBeacon*>(op->get_req());
dout(10) << "_updated " << m->get_orig_source() << " " << *m << dendl;
- mon->clog->info() << m->get_orig_source_inst() << " "
+ mon->clog->debug() << m->get_orig_source_inst() << " "
<< ceph_mds_state_name(m->get_state());
if (m->get_state() == MDSMap::STATE_STOPPED) {
health.decode(bl_i);
for (const auto &metric : health.metrics) {
- int const rank = info.rank;
+ const int rank = info.rank;
std::ostringstream message;
message << "mds" << rank << ": " << metric.message;
summary.push_back(std::make_pair(metric.sev, message.str()));
return MDS_GID_NONE;
}
-int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
+int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg,
+ MDSMap::mds_info_t *failed_info)
{
+ assert(failed_info != nullptr);
+
mds_gid_t gid = gid_from_arg(arg, ss);
if (gid == MDS_GID_NONE) {
return 0;
if (!mon->osdmon()->is_writeable()) {
return -EAGAIN;
}
+
+ // Take a copy of the info before removing the MDS from the map,
+ // so that the caller knows which mds (if any) they ended up removing.
+ *failed_info = pending_fsmap.get_info_gid(gid);
+
fail_mds_gid(gid);
ss << "failed mds gid " << gid;
assert(mon->osdmon()->is_writeable());
} else if (prefix == "mds fail") {
string who;
cmd_getval(g_ceph_context, cmdmap, "who", who);
- r = fail_mds(ss, who);
+
+ MDSMap::mds_info_t failed_info;
+ r = fail_mds(ss, who, &failed_info);
if (r < 0 && r == -EAGAIN) {
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return -EAGAIN; // don't propose yet; wait for message to be retried
+ } else if (r == 0) {
+ // Only log if we really did something (not when was already gone)
+ if (failed_info.global_id != MDS_GID_NONE) {
+ mon->clog->info() << failed_info.human_name() << " marked failed by "
+ << op->get_session()->entity_name;
+ }
}
} else if (prefix == "mds rm") {
mds_gid_t gid;
break;
}
- dout(1) << "adding standby " << pending_fsmap.get_info_gid(newgid).addr
+ const auto &new_info = pending_fsmap.get_info_gid(newgid);
+ dout(1) << "assigned standby " << new_info.addr
<< " as mds." << mds << dendl;
+
+ mon->clog->info() << new_info.human_name() << " assigned to "
+ "filesystem " << fs->mds_map.fs_name << " as rank "
+ << mds << " (now has " << fs->mds_map.get_num_in_mds()
+ << " ranks)";
pending_fsmap.promote(newgid, fs, mds);
do_propose = true;
}
<< " " << ceph_mds_state_name(info.state)
<< " with " << sgid << "/" << si.name << " " << si.addr << dendl;
- mon->clog->warn() << "MDS daemon '" << info.name << "'"
+ mon->clog->warn() << info.human_name()
<< " is not responding, replacing it "
<< "as rank " << info.rank
- << " with standby '" << si.name << "'";
+ << " with standby " << si.human_name();
// Remember what NS the old one was in
const fs_cluster_id_t fscid = pending_fsmap.mds_roles.at(gid);
dout(10) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank
<< "." << info.inc << " " << ceph_mds_state_name(info.state)
<< dendl;
- mon->clog->info() << "MDS standby '" << info.name
- << "' is not responding, removing it from the set of "
- << "standbys";
+ mon->clog->info() << "Standby " << info.human_name() << " is not "
+ "responding, dropping it";
fail_mds_gid(gid);
*mds_propose = true;
} else if (!info.laggy()) {
const MDSMap::mds_info_t si = pending_fsmap.get_info_gid(sgid);
dout(0) << " taking over failed mds." << f << " with " << sgid
<< "/" << si.name << " " << si.addr << dendl;
+ mon->clog->info() << "Standby " << si.human_name()
+ << " assigned to filesystem " << fs->mds_map.fs_name
+ << " as rank " << f;
+
pending_fsmap.promote(sgid, fs, f);
do_propose = true;
}