<< ").mds e" << fsmap.get_epoch() << " ";
}
+static const string MDS_METADATA_PREFIX("mds_metadata");
+static const string MDS_HEALTH_PREFIX("mds_health");
+
+
/*
* Specialized implementation of cmd_getval to allow us to parse
* out strongly-typedef'd types
return cmd_getval(cct, cmdmap, k, (int64_t&)val);
}
-static const string MDS_METADATA_PREFIX("mds_metadata");
-
-
// my methods
void MDSMonitor::print_map(FSMap &m, int dbl)
dout(10) << "create_initial" << dendl;
}
+void MDSMonitor::get_store_prefixes(std::set<string>& s)
+{
+ s.insert(service_name);
+ s.insert(MDS_METADATA_PREFIX);
+ s.insert(MDS_HEALTH_PREFIX);
+}
void MDSMonitor::update_from_paxos(bool *need_bootstrap)
{
pending_fsmap = fsmap;
pending_fsmap.epoch++;
+ if (mon->osdmon()->is_readable()) {
+ auto &osdmap = mon->osdmon()->osdmap;
+ pending_fsmap.sanitize([&osdmap](int64_t pool){return osdmap.have_pg_pool(pool);});
+ }
+
dout(10) << "create_pending e" << pending_fsmap.epoch << dendl;
}
health.decode(bl_i);
}
for (const auto &metric : health.metrics) {
- int const rank = info.rank;
+ const int rank = info.rank;
health_check_t *check = &new_checks.get_or_add(
mds_metric_name(metric.type),
metric.sev,
p.second.summary,
boost::regex("%isorare%"),
p.second.detail.size() > 1 ? "are" : "is");
+ p.second.summary = boost::regex_replace(
+ p.second.summary,
+ boost::regex("%hasorhave%"),
+ p.second.detail.size() > 1 ? "have" : "has");
}
encode_health(new_checks, t);
}
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return false;
}
- mon->clog->info() << "MDS daemon '" << m->get_name() << "' restarted";
+ const MDSMap::mds_info_t &existing_info =
+ pending_fsmap.get_info_gid(existing);
+ mon->clog->info() << existing_info.human_name() << " restarted";
fail_mds_gid(existing);
failed_mds = true;
}
if (leaderinfo && (leaderinfo->rank >= 0)) {
auto fscid = pending_fsmap.mds_roles.at(leaderinfo->global_id);
auto fs = pending_fsmap.get_filesystem(fscid);
- bool followable = fs->mds_map.is_followable(leaderinfo->rank);
- pending_fsmap.modify_daemon(gid, [fscid, leaderinfo, followable](
+ pending_fsmap.modify_daemon(gid, [fscid, leaderinfo](
MDSMap::mds_info_t *info) {
info->standby_for_rank = leaderinfo->rank;
info->standby_for_fscid = fscid;
<< " standby_for_rank=" << m->get_standby_for_rank()
<< dendl;
if (state == MDSMap::STATE_STOPPED) {
+ const auto fscid = pending_fsmap.mds_roles.at(gid);
+ auto fs = pending_fsmap.get_filesystem(fscid);
+
+ mon->clog->info() << info.human_name() << " finished "
+ << "deactivating rank " << info.rank << " in filesystem "
+ << fs->mds_map.fs_name << " (now has "
+ << fs->mds_map.get_num_in_mds() - 1 << " ranks)";
+
auto erased = pending_fsmap.stop(gid);
erased.push_back(gid);
pending_daemon_health_rm.insert(erased_gid);
}
}
+
+
} else if (state == MDSMap::STATE_DAMAGED) {
if (!mon->osdmon()->is_writeable()) {
dout(4) << __func__ << ": DAMAGED from rank " << info.rank
<< info.rank << " damaged" << dendl;
utime_t until = ceph_clock_now();
- until += g_conf->mds_blacklist_interval;
+ until += g_conf->get_val<double>("mon_mds_blacklist_interval");
const auto blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
request_proposal(mon->osdmon());
pending_fsmap.damaged(gid, blacklist_epoch);
<< ceph_mds_state_name(state) << dendl;
return true;
} else {
+ if (info.state != MDSMap::STATE_ACTIVE && state == MDSMap::STATE_ACTIVE) {
+ auto fscid = pending_fsmap.mds_roles.at(gid);
+ auto fs = pending_fsmap.get_filesystem(fscid);
+ mon->clog->info() << info.human_name() << " is now active in "
+ << "filesystem " << fs->mds_map.fs_name << " as rank "
+ << info.rank;
+ }
+
// Made it through special cases and validations, record the
// daemon's reported state to the FSMap.
pending_fsmap.modify_daemon(gid, [state, seq](MDSMap::mds_info_t *info) {
op->mark_mdsmon_event(__func__);
MMDSBeacon *m = static_cast<MMDSBeacon*>(op->get_req());
dout(10) << "_updated " << m->get_orig_source() << " " << *m << dendl;
- mon->clog->info() << m->get_orig_source_inst() << " "
+ mon->clog->debug() << m->get_orig_source_inst() << " "
<< ceph_mds_state_name(m->get_state());
if (m->get_state() == MDSMap::STATE_STOPPED) {
health.decode(bl_i);
for (const auto &metric : health.metrics) {
- int const rank = info.rank;
+ const int rank = info.rank;
std::ostringstream message;
message << "mds" << rank << ": " << metric.message;
summary.push_back(std::make_pair(metric.sev, message.str()));
} else {
mdsmap->print(ds);
r = 0;
- }
- if (r == 0) {
- rdata.append(ds);
- ss << "dumped fsmap epoch " << p->get_epoch();
}
+
+ rdata.append(ds);
+ ss << "dumped fsmap epoch " << p->get_epoch();
+
if (p != &fsmap) {
delete p;
}
} else {
p->print(ds);
r = 0;
- }
- if (r == 0) {
- rdata.append(ds);
- ss << "dumped fsmap epoch " << p->get_epoch();
}
+
+ rdata.append(ds);
+ ss << "dumped fsmap epoch " << p->get_epoch();
+
if (p != &fsmap)
delete p;
}
derr << "Unexpected error reading metadata: " << cpp_strerror(r)
<< dendl;
ss << get_err.str();
+ f->close_section();
break;
}
f->close_section();
epoch_t blacklist_epoch = 0;
if (info.rank >= 0 && info.state != MDSMap::STATE_STANDBY_REPLAY) {
utime_t until = ceph_clock_now();
- until += g_conf->mds_blacklist_interval;
+ until += g_conf->get_val<double>("mon_mds_blacklist_interval");
blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
}
return MDS_GID_NONE;
}
-int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
+int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg,
+ MDSMap::mds_info_t *failed_info)
{
+ assert(failed_info != nullptr);
+
mds_gid_t gid = gid_from_arg(arg, ss);
if (gid == MDS_GID_NONE) {
return 0;
if (!mon->osdmon()->is_writeable()) {
return -EAGAIN;
}
+
+ // Take a copy of the info before removing the MDS from the map,
+ // so that the caller knows which mds (if any) they ended up removing.
+ *failed_info = pending_fsmap.get_info_gid(gid);
+
fail_mds_gid(gid);
ss << "failed mds gid " << gid;
assert(mon->osdmon()->is_writeable());
return true;
}
+ bool batched_propose = false;
for (auto h : handlers) {
if (h->can_handle(prefix)) {
+ batched_propose = h->batched_propose();
+ if (batched_propose) {
+ paxos->plug();
+ }
r = h->handle(mon, pending_fsmap, op, cmdmap, ss);
+ if (batched_propose) {
+ paxos->unplug();
+ }
+
if (r == -EAGAIN) {
// message has been enqueued for retry; return.
dout(4) << __func__ << " enqueue for retry by prepare_command" << dendl;
// success.. delay reply
wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, r, rs,
get_last_committed() + 1));
+ if (batched_propose) {
+ force_immediate_propose();
+ }
return true;
} else {
// reply immediately
} else if (prefix == "mds fail") {
string who;
cmd_getval(g_ceph_context, cmdmap, "who", who);
- r = fail_mds(ss, who);
+
+ MDSMap::mds_info_t failed_info;
+ r = fail_mds(ss, who, &failed_info);
if (r < 0 && r == -EAGAIN) {
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return -EAGAIN; // don't propose yet; wait for message to be retried
+ } else if (r == 0) {
+ // Only log if we really did something (not when was already gone)
+ if (failed_info.global_id != MDS_GID_NONE) {
+ mon->clog->info() << failed_info.human_name() << " marked failed by "
+ << op->get_session()->entity_name;
+ }
}
} else if (prefix == "mds rm") {
mds_gid_t gid;
return 0;
}
-void MDSMonitor::count_metadata(const string& field, Formatter *f)
+void MDSMonitor::count_metadata(const string& field, map<string,int> *out)
{
- map<string,int> by_val;
map<mds_gid_t,Metadata> meta;
load_metadata(meta);
for (auto& p : meta) {
auto q = p.second.find(field);
if (q == p.second.end()) {
- by_val["unknown"]++;
+ (*out)["unknown"]++;
} else {
- by_val[q->second]++;
+ (*out)[q->second]++;
}
}
+}
+
+void MDSMonitor::count_metadata(const string& field, Formatter *f)
+{
+ map<string,int> by_val;
+ count_metadata(field, &by_val);
f->open_object_section(field.c_str());
for (auto& p : by_val) {
f->dump_int(p.first.c_str(), p.second);
break;
}
- dout(1) << "adding standby " << pending_fsmap.get_info_gid(newgid).addr
+ const auto &new_info = pending_fsmap.get_info_gid(newgid);
+ dout(1) << "assigned standby " << new_info.addr
<< " as mds." << mds << dendl;
+
+ mon->clog->info() << new_info.human_name() << " assigned to "
+ "filesystem " << fs->mds_map.fs_name << " as rank "
+ << mds << " (now has " << fs->mds_map.get_num_in_mds() + 1
+ << " ranks)";
pending_fsmap.promote(newgid, fs, mds);
do_propose = true;
}
* is available, fail this daemon (remove from map) and pass its
* role to another daemon.
*/
-void MDSMonitor::maybe_replace_gid(mds_gid_t gid,
- const beacon_info_t &beacon,
+void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info,
bool *mds_propose, bool *osd_propose)
{
assert(mds_propose != nullptr);
assert(osd_propose != nullptr);
- const MDSMap::mds_info_t info = pending_fsmap.get_info_gid(gid);
const auto fscid = pending_fsmap.mds_roles.at(gid);
- dout(10) << "no beacon from " << gid << " " << info.addr << " mds."
- << info.rank << "." << info.inc
- << " " << ceph_mds_state_name(info.state)
- << " since " << beacon.stamp << dendl;
-
// We will only take decisive action (replacing/removing a daemon)
// if we have some indicating that some other daemon(s) are successfully
// getting beacons through recently.
<< " " << ceph_mds_state_name(info.state)
<< " with " << sgid << "/" << si.name << " " << si.addr << dendl;
- mon->clog->warn() << "MDS daemon '" << info.name << "'"
+ mon->clog->warn() << info.human_name()
<< " is not responding, replacing it "
<< "as rank " << info.rank
- << " with standby '" << si.name << "'";
+ << " with standby " << si.human_name();
// Remember what NS the old one was in
const fs_cluster_id_t fscid = pending_fsmap.mds_roles.at(gid);
dout(10) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank
<< "." << info.inc << " " << ceph_mds_state_name(info.state)
<< dendl;
- mon->clog->info() << "MDS standby '" << info.name
- << "' is not responding, removing it from the set of "
- << "standbys";
+ mon->clog->info() << "Standby " << info.human_name() << " is not "
+ "responding, dropping it";
fail_mds_gid(gid);
*mds_propose = true;
} else if (!info.laggy()) {
const MDSMap::mds_info_t si = pending_fsmap.get_info_gid(sgid);
dout(0) << " taking over failed mds." << f << " with " << sgid
<< "/" << si.name << " " << si.addr << dendl;
+ mon->clog->info() << "Standby " << si.human_name()
+ << " assigned to filesystem " << fs->mds_map.fs_name
+ << " as rank " << f;
+
pending_fsmap.promote(sgid, fs, f);
do_propose = true;
}
}
}
- // If the OSDMap is writeable, we can blacklist things, so we can
- // try failing any laggy MDS daemons. Consider each one for failure.
- if (mon->osdmon()->is_writeable()) {
- bool propose_osdmap = false;
-
- map<mds_gid_t, beacon_info_t>::iterator p = last_beacon.begin();
- while (p != last_beacon.end()) {
- mds_gid_t gid = p->first;
- auto beacon_info = p->second;
- ++p;
-
- if (!pending_fsmap.gid_exists(gid)) {
- // clean it out
- last_beacon.erase(gid);
- continue;
- }
+ bool propose_osdmap = false;
+ bool osdmap_writeable = mon->osdmon()->is_writeable();
+ auto p = last_beacon.begin();
+ while (p != last_beacon.end()) {
+ mds_gid_t gid = p->first;
+ auto beacon_info = p->second;
+ ++p;
- if (beacon_info.stamp < cutoff) {
- maybe_replace_gid(gid, beacon_info, &do_propose, &propose_osdmap);
- }
+ if (!pending_fsmap.gid_exists(gid)) {
+ // clean it out
+ last_beacon.erase(gid);
+ continue;
}
- if (propose_osdmap) {
- request_proposal(mon->osdmon());
+ if (beacon_info.stamp < cutoff) {
+ auto &info = pending_fsmap.get_info_gid(gid);
+ dout(1) << "no beacon from mds." << info.rank << "." << info.inc
+ << " (gid: " << gid << " addr: " << info.addr
+ << " state: " << ceph_mds_state_name(info.state) << ")"
+ << " since " << beacon_info.stamp << dendl;
+ // If the OSDMap is writeable, we can blacklist things, so we can
+ // try failing any laggy MDS daemons. Consider each one for failure.
+ if (osdmap_writeable) {
+ maybe_replace_gid(gid, info, &do_propose, &propose_osdmap);
+ }
}
}
+ if (propose_osdmap) {
+ request_proposal(mon->osdmon());
+ }
for (auto i : pending_fsmap.filesystems) {
auto fs = i.second;
MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name)
: PaxosService(mn, p, service_name)
{
- handlers = FileSystemCommandHandler::load();
+ handlers = FileSystemCommandHandler::load(p);
}
void MDSMonitor::on_restart()