t->erase(MDS_HEALTH_PREFIX, stringify(*i));
}
pending_daemon_health_rm.clear();
- remove_from_metadata(t);
+ remove_from_metadata(pending, t);
// health
health_check_map_t new_checks;
version_t seq = m->get_seq();
dout(15) << "_note_beacon " << *m << " noting time" << dendl;
- last_beacon[gid].stamp = ceph_clock_now();
- last_beacon[gid].seq = seq;
+ auto &beacon = last_beacon[gid];
+ beacon.stamp = mono_clock::now();
+ beacon.seq = seq;
}
bool MDSMonitor::preprocess_beacon(MonOpRequestRef op)
MDSMap::mds_info_t info;
epoch_t effective_epoch = 0;
- const auto &fsmap = get_working_fsmap();
+ const auto &fsmap = get_fsmap();
// check privileges, ignore if fails
MonSession *session = m->get_session();
dout(7) << "mds_beacon " << *m << " is not in fsmap (state "
<< ceph_mds_state_name(state) << ")" << dendl;
+ /* We can't send an MDSMap this MDS was a part of because we no longer
+ * know which FS it was part of. Nor does this matter. Sending an empty
+ * MDSMap is sufficient for getting the MDS to respawn.
+ */
MDSMap null_map;
null_map.epoch = fsmap.epoch;
null_map.compat = fsmap.compat;
MMDSLoadTargets *m = static_cast<MMDSLoadTargets*>(op->get_req());
dout(10) << "preprocess_offload_targets " << *m << " from " << m->get_orig_source() << dendl;
- auto &fsmap = get_working_fsmap();
+ const auto &fsmap = get_fsmap();
// check privileges, ignore message if fails
MonSession *session = m->get_session();
if (!session)
- goto done;
+ goto ignore;
if (!session->is_capable("mds", MON_CAP_X)) {
dout(0) << "preprocess_offload_targets got MMDSLoadTargets from entity with insufficient caps "
<< session->caps << dendl;
- goto done;
+ goto ignore;
}
if (fsmap.gid_exists(m->global_id) &&
m->targets == fsmap.get_info_gid(m->global_id).export_targets)
- goto done;
+ goto ignore;
return false;
- done:
+ ignore:
+ mon->no_reply(op);
return true;
}
const MDSMap::mds_info_t &existing_info =
pending.get_info_gid(existing);
mon->clog->info() << existing_info.human_name() << " restarted";
- fail_mds_gid(existing);
+ fail_mds_gid(pending, existing);
failed_mds = true;
}
if (failed_mds) {
info.standby_for_name);
if (leaderinfo && (leaderinfo->rank >= 0)) {
const auto &fscid = pending.mds_roles.at(leaderinfo->global_id);
- const auto &fs = pending.get_filesystem(fscid);
pending.modify_daemon(gid, [fscid, leaderinfo](
MDSMap::mds_info_t *info) {
}
// initialize the beacon timer
- last_beacon[gid].stamp = ceph_clock_now();
- last_beacon[gid].seq = seq;
+ auto &beacon = last_beacon[gid];
+ beacon.stamp = mono_clock::now();
+ beacon.seq = seq;
// new incompat?
if (!pending.compat.writeable(m->get_compat())) {
return false;
}
- fail_mds_gid(gid);
+ fail_mds_gid(pending, gid);
assert(mon->osdmon()->is_writeable());
request_proposal(mon->osdmon());
stringstream ss, ds;
map<string, cmd_vartype> cmdmap;
- const auto &fsmap = get_working_fsmap();
+ const auto &fsmap = get_fsmap();
if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
// ss has reason for failure
cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
string format;
cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
- boost::scoped_ptr<Formatter> f(Formatter::create(format));
+ std::unique_ptr<Formatter> f(Formatter::create(format));
MonSession *session = m->get_session();
if (!session) {
int64_t epocharg;
epoch_t epoch;
- const FSMap *fsmapp = &get_fsmap();
+ const FSMap *fsmapp = &fsmap;
FSMap dummy;
if (cmd_getval(g_ceph_context, cmdmap, "epoch", epocharg)) {
epoch = epocharg;
f->open_object_section("mds");
f->dump_string("name", info.name);
std::ostringstream get_err;
- r = dump_metadata(info.name, f.get(), get_err);
+ r = dump_metadata(fsmap, info.name, f.get(), get_err);
if (r == -EINVAL || r == -ENOENT) {
// Drop error, list what metadata we do have
dout(1) << get_err.str() << dendl;
} else {
// Dump a single daemon's metadata
f->open_object_section("mds_metadata");
- r = dump_metadata(who, f.get(), ss);
+ r = dump_metadata(fsmap, who, f.get(), ss);
f->close_section();
}
f->flush(ds);
} else if (prefix == "fs ls") {
if (f) {
f->open_array_section("filesystems");
- {
- for (const auto &p : fsmap.filesystems) {
- const auto &fs = p.second;
- f->open_object_section("filesystem");
- {
- const MDSMap &mds_map = fs->mds_map;
- f->dump_string("name", mds_map.fs_name);
- /* Output both the names and IDs of pools, for use by
- * humans and machines respectively */
- f->dump_string("metadata_pool", mon->osdmon()->osdmap.get_pool_name(
- mds_map.metadata_pool));
- f->dump_int("metadata_pool_id", mds_map.metadata_pool);
- f->open_array_section("data_pool_ids");
- {
- for (auto dpi = mds_map.data_pools.begin();
- dpi != mds_map.data_pools.end(); ++dpi) {
- f->dump_int("data_pool_id", *dpi);
- }
- }
- f->close_section();
-
- f->open_array_section("data_pools");
- {
- for (auto dpi = mds_map.data_pools.begin();
- dpi != mds_map.data_pools.end(); ++dpi) {
- const auto &name = mon->osdmon()->osdmap.get_pool_name(
- *dpi);
- f->dump_string("data_pool", name);
- }
- }
+ for (const auto &p : fsmap.filesystems) {
+ const auto &fs = p.second;
+ f->open_object_section("filesystem");
+ {
+ const MDSMap &mds_map = fs->mds_map;
+ f->dump_string("name", mds_map.fs_name);
+ /* Output both the names and IDs of pools, for use by
+ * humans and machines respectively */
+ f->dump_string("metadata_pool", mon->osdmon()->osdmap.get_pool_name(
+ mds_map.metadata_pool));
+ f->dump_int("metadata_pool_id", mds_map.metadata_pool);
+ f->open_array_section("data_pool_ids");
+ for (const auto &id : mds_map.data_pools) {
+ f->dump_int("data_pool_id", id);
+ }
+ f->close_section();
- f->close_section();
+ f->open_array_section("data_pools");
+ for (const auto &id : mds_map.data_pools) {
+ const auto &name = mon->osdmon()->osdmap.get_pool_name(id);
+ f->dump_string("data_pool", name);
}
f->close_section();
}
+ f->close_section();
}
f->close_section();
f->flush(ds);
ds << "name: " << mds_map.fs_name << ", metadata pool: "
<< md_pool_name << ", data pools: [";
- for (auto dpi : mds_map.data_pools) {
- const string &pool_name = mon->osdmon()->osdmap.get_pool_name(dpi);
+ for (const auto &id : mds_map.data_pools) {
+ const string &pool_name = mon->osdmon()->osdmap.get_pool_name(id);
ds << pool_name << " ";
}
ds << "]" << std::endl;
return false;
}
-bool MDSMonitor::fail_mds_gid(mds_gid_t gid)
+bool MDSMonitor::fail_mds_gid(FSMap &fsmap, mds_gid_t gid)
{
- auto &pending = get_pending_fsmap_writeable();
-
- const MDSMap::mds_info_t &info = pending.get_info_gid(gid);
+ const MDSMap::mds_info_t &info = fsmap.get_info_gid(gid);
dout(10) << "fail_mds_gid " << gid << " mds." << info.name << " role " << info.rank << dendl;
epoch_t blacklist_epoch = 0;
blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
}
- pending.erase(gid, blacklist_epoch);
+ fsmap.erase(gid, blacklist_epoch);
last_beacon.erase(gid);
if (pending_daemon_health.count(gid)) {
pending_daemon_health.erase(gid);
return blacklist_epoch != 0;
}
-mds_gid_t MDSMonitor::gid_from_arg(const std::string& arg, std::ostream &ss)
+mds_gid_t MDSMonitor::gid_from_arg(const FSMap &fsmap, const std::string &arg, std::ostream &ss)
{
- const auto &fsmap = get_working_fsmap();
-
// Try parsing as a role
mds_role_t role;
std::ostringstream ignore_err; // Don't spam 'ss' with parse_role errors
- int r = parse_role(arg, &role, ignore_err);
+ int r = fsmap.parse_role(arg, &role, ignore_err);
if (r == 0) {
// See if a GID is assigned to this role
const auto &fs = fsmap.get_filesystem(role.fscid);
return MDS_GID_NONE;
}
-int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg,
- MDSMap::mds_info_t *failed_info)
+int MDSMonitor::fail_mds(FSMap &fsmap, std::ostream &ss,
+ const std::string &arg, MDSMap::mds_info_t *failed_info)
{
assert(failed_info != nullptr);
- mds_gid_t gid = gid_from_arg(arg, ss);
+ mds_gid_t gid = gid_from_arg(fsmap, arg, ss);
if (gid == MDS_GID_NONE) {
return 0;
}
// Take a copy of the info before removing the MDS from the map,
// so that the caller knows which mds (if any) they ended up removing.
- *failed_info = get_pending_fsmap().get_info_gid(gid);
+ *failed_info = fsmap.get_info_gid(gid);
- fail_mds_gid(gid);
+ fail_mds_gid(fsmap, gid);
ss << "failed mds gid " << gid;
assert(mon->osdmon()->is_writeable());
request_proposal(mon->osdmon());
}
}
- r = filesystem_command(op, prefix, cmdmap, ss);
+ r = filesystem_command(pending, op, prefix, cmdmap, ss);
if (r >= 0) {
goto out;
} else if (r == -EAGAIN) {
goto out;
}
- r = legacy_filesystem_command(op, prefix, cmdmap, ss);
+ r = legacy_filesystem_command(pending, op, prefix, cmdmap, ss);
if (r == -ENOSYS && ss.str().empty()) {
ss << "unrecognized command";
}
}
-
-/**
- * Given one of the following forms:
- * <fs name>:<rank>
- * <fs id>:<rank>
- * <rank>
- *
- * Parse into a mds_role_t. The rank-only form is only valid
- * if legacy_client_ns is set.
- */
-int MDSMonitor::parse_role(
- const std::string &role_str,
- mds_role_t *role,
- std::ostream &ss)
-{
- return get_working_fsmap().parse_role(role_str, role, ss);
-}
-
int MDSMonitor::filesystem_command(
+ FSMap &fsmap,
MonOpRequestRef op,
std::string const &prefix,
map<string, cmd_vartype> &cmdmap,
string whostr;
cmd_getval(g_ceph_context, cmdmap, "who", whostr);
- auto &pending = get_pending_fsmap_writeable();
if (prefix == "mds stop" ||
prefix == "mds deactivate") {
mds_role_t role;
- r = parse_role(whostr, &role, ss);
+ r = fsmap.parse_role(whostr, &role, ss);
if (r < 0 ) {
return r;
}
- const auto &fs = pending.get_filesystem(role.fscid);
+ const auto &fs = fsmap.get_filesystem(role.fscid);
if (!fs->mds_map.is_active(role.rank)) {
r = -EEXIST;
r = 0;
mds_gid_t gid = fs->mds_map.up.at(role.rank);
ss << "telling mds." << role << " "
- << pending.get_info_gid(gid).addr << " to deactivate";
+ << fsmap.get_info_gid(gid).addr << " to deactivate";
- pending.modify_daemon(gid, [](MDSMap::mds_info_t *info) {
+ fsmap.modify_daemon(gid, [](MDSMap::mds_info_t *info) {
info->state = MDSMap::STATE_STOPPING;
});
}
<< cmd_vartype_stringify(cmdmap["state"]) << "'";
return -EINVAL;
}
- if (pending.gid_exists(gid)) {
- pending.modify_daemon(gid, [state](MDSMap::mds_info_t *info) {
+ if (fsmap.gid_exists(gid)) {
+ fsmap.modify_daemon(gid, [state](MDSMap::mds_info_t *info) {
info->state = state;
});
ss << "set mds gid " << gid << " to state " << state << " "
cmd_getval(g_ceph_context, cmdmap, "who", who);
MDSMap::mds_info_t failed_info;
- r = fail_mds(ss, who, &failed_info);
+ r = fail_mds(fsmap, ss, who, &failed_info);
if (r < 0 && r == -EAGAIN) {
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return -EAGAIN; // don't propose yet; wait for message to be retried
<< cmd_vartype_stringify(cmdmap["gid"]) << "'";
return -EINVAL;
}
- if (!pending.gid_exists(gid)) {
+ if (!fsmap.gid_exists(gid)) {
ss << "mds gid " << gid << " dne";
r = 0;
} else {
- const auto &info = pending.get_info_gid(gid);
+ const auto &info = fsmap.get_info_gid(gid);
MDSMap::DaemonState state = info.state;
if (state > 0) {
ss << "cannot remove active mds." << info.name
<< " rank " << info.rank;
return -EBUSY;
} else {
- pending.erase(gid, {});
+ fsmap.erase(gid, {});
ss << "removed mds gid " << gid;
return 0;
}
std::string role_str;
cmd_getval(g_ceph_context, cmdmap, "who", role_str);
mds_role_t role;
- int r = parse_role(role_str, &role, ss);
+ int r = fsmap.parse_role(role_str, &role, ss);
if (r < 0) {
ss << "invalid role '" << role_str << "'";
return -EINVAL;
}
- pending.modify_filesystem(
+ fsmap.modify_filesystem(
role.fscid,
[role](std::shared_ptr<Filesystem> fs)
{
<< cmd_vartype_stringify(cmdmap["feature"]) << "'";
return -EINVAL;
}
- if (pending.compat.compat.contains(f)) {
+ if (fsmap.compat.compat.contains(f)) {
ss << "removing compat feature " << f;
- CompatSet modified = pending.compat;
+ CompatSet modified = fsmap.compat;
modified.compat.remove(f);
- pending.update_compat(modified);
+ fsmap.update_compat(modified);
} else {
- ss << "compat feature " << f << " not present in " << pending.compat;
+ ss << "compat feature " << f << " not present in " << fsmap.compat;
}
r = 0;
} else if (prefix == "mds compat rm_incompat") {
<< cmd_vartype_stringify(cmdmap["feature"]) << "'";
return -EINVAL;
}
- if (pending.compat.incompat.contains(f)) {
+ if (fsmap.compat.incompat.contains(f)) {
ss << "removing incompat feature " << f;
- CompatSet modified = pending.compat;
+ CompatSet modified = fsmap.compat;
modified.incompat.remove(f);
- pending.update_compat(modified);
+ fsmap.update_compat(modified);
} else {
- ss << "incompat feature " << f << " not present in " << pending.compat;
+ ss << "incompat feature " << f << " not present in " << fsmap.compat;
}
r = 0;
} else if (prefix == "mds repaired") {
std::string role_str;
cmd_getval(g_ceph_context, cmdmap, "rank", role_str);
mds_role_t role;
- r = parse_role(role_str, &role, ss);
+ r = fsmap.parse_role(role_str, &role, ss);
if (r < 0) {
return r;
}
- bool modified = pending.undamaged(role.fscid, role.rank);
+ bool modified = fsmap.undamaged(role.fscid, role.rank);
if (modified) {
dout(4) << "repaired: restoring rank " << role << dendl;
} else {
/**
* Helper to legacy_filesystem_command
*/
-void MDSMonitor::modify_legacy_filesystem(
+void MDSMonitor::modify_legacy_filesystem(FSMap &fsmap,
std::function<void(std::shared_ptr<Filesystem> )> fn)
{
- auto &pending_fsmap = get_pending_fsmap_writeable();
- pending_fsmap.modify_filesystem(
- pending_fsmap.legacy_client_fscid,
+ fsmap.modify_filesystem(
+ fsmap.legacy_client_fscid,
fn
);
}
* @retval < 0 An error has occurred; **ss** may have been set.
*/
int MDSMonitor::legacy_filesystem_command(
+ FSMap &fsmap,
MonOpRequestRef op,
std::string const &prefix,
map<string, cmd_vartype> &cmdmap,
string whostr;
cmd_getval(g_ceph_context, cmdmap, "who", whostr);
- auto &pending_fsmap = get_pending_fsmap_writeable();
-
- assert (pending_fsmap.legacy_client_fscid != FS_CLUSTER_ID_NONE);
+ assert (fsmap.legacy_client_fscid != FS_CLUSTER_ID_NONE);
if (prefix == "mds set_max_mds") {
// NOTE: deprecated by "fs set max_mds"
}
const MDSMap& mdsmap =
- pending_fsmap.filesystems.at(pending_fsmap.legacy_client_fscid)->mds_map;
+ fsmap.filesystems.at(fsmap.legacy_client_fscid)->mds_map;
if (!mdsmap.allows_multimds() &&
maxmds > mdsmap.get_max_mds() &&
return -EINVAL;
}
- modify_legacy_filesystem(
+ modify_legacy_filesystem(fsmap,
[maxmds](std::shared_ptr<Filesystem> fs)
{
fs->mds_map.set_max_mds(maxmds);
ss << "max_mds = " << maxmds;
} else if (prefix == "mds cluster_down") {
// NOTE: deprecated by "fs set cluster_down"
- modify_legacy_filesystem(
+ modify_legacy_filesystem(fsmap,
[](std::shared_ptr<Filesystem> fs)
{
fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
r = 0;
} else if (prefix == "mds cluster_up") {
// NOTE: deprecated by "fs set cluster_up"
- modify_legacy_filesystem(
+ modify_legacy_filesystem(fsmap,
[](std::shared_ptr<Filesystem> fs)
{
fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
if (is_mds) {
// What (if any) namespace are you assigned to?
auto mds_info = fsmap.get_mds_info();
- for (const auto &i : mds_info) {
- if (i.second.addr == sub->session->inst.addr) {
- mds_gid = i.first;
+ for (const auto &p : mds_info) {
+ if (p.second.addr == sub->session->inst.addr) {
+ mds_gid = p.first;
fscid = fsmap.mds_roles.at(mds_gid);
}
}
paxos->trigger_propose();
}
-void MDSMonitor::remove_from_metadata(MonitorDBStore::TransactionRef t)
+void MDSMonitor::remove_from_metadata(const FSMap &fsmap, MonitorDBStore::TransactionRef t)
{
bool update = false;
- for (map<mds_gid_t, Metadata>::iterator i = pending_metadata.begin();
- i != pending_metadata.end(); ) {
- if (!get_pending_fsmap().gid_exists(i->first)) {
- pending_metadata.erase(i++);
+ for (auto it = pending_metadata.begin(); it != pending_metadata.end(); ) {
+ if (!fsmap.gid_exists(it->first)) {
+ it = pending_metadata.erase(it);
update = true;
} else {
- ++i;
+ ++it;
}
}
if (!update)
return 0;
}
-void MDSMonitor::count_metadata(const string& field, map<string,int> *out)
+void MDSMonitor::count_metadata(const std::string &field, map<string,int> *out)
{
map<mds_gid_t,Metadata> meta;
load_metadata(meta);
}
}
-void MDSMonitor::count_metadata(const string& field, Formatter *f)
+void MDSMonitor::count_metadata(const std::string &field, Formatter *f)
{
map<string,int> by_val;
count_metadata(field, &by_val);
f->close_section();
}
-int MDSMonitor::dump_metadata(const std::string &who, Formatter *f, ostream& err)
+int MDSMonitor::dump_metadata(const FSMap& fsmap, const std::string &who,
+ Formatter *f, ostream& err)
{
assert(f);
- mds_gid_t gid = gid_from_arg(who, err);
+ mds_gid_t gid = gid_from_arg(fsmap, who, err);
if (gid == MDS_GID_NONE) {
return -EINVAL;
}
{
assert(f);
+ const auto &fsmap = get_fsmap();
+
map<mds_gid_t, Metadata> metadata;
if (int r = load_metadata(metadata)) {
return r;
}
map<string, list<int> > mdses; // hostname => rank
- for (map<mds_gid_t, Metadata>::iterator it = metadata.begin();
- it != metadata.end(); ++it) {
- const Metadata& m = it->second;
+ for (const auto &p : metadata) {
+ const mds_gid_t& gid = p.first;
+ const Metadata& m = p.second;
Metadata::const_iterator hostname = m.find("hostname");
if (hostname == m.end()) {
// not likely though
continue;
}
- const mds_gid_t gid = it->first;
- if (!get_fsmap().gid_exists(gid)) {
+ if (!fsmap.gid_exists(gid)) {
dout(5) << __func__ << ": GID " << gid << " not existent" << dendl;
continue;
}
- const MDSMap::mds_info_t& mds_info = get_fsmap().get_info_gid(gid);
+ const MDSMap::mds_info_t& mds_info = fsmap.get_info_gid(gid);
// FIXME: include filesystem name with rank here
mdses[hostname->second].push_back(mds_info.rank);
}
* If a cluster is undersized (with respect to max_mds), then
* attempt to find daemons to grow it.
*/
-bool MDSMonitor::maybe_expand_cluster(std::shared_ptr<Filesystem> &fs)
+bool MDSMonitor::maybe_expand_cluster(FSMap &fsmap, fs_cluster_id_t fscid)
{
- bool do_propose = false;
- auto &pending = get_pending_fsmap_writeable();
+ auto fs = fsmap.get_filesystem(fscid);
+ auto &mds_map = fs->mds_map;
if (fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
- return do_propose;
+ return false;
}
- while (fs->mds_map.get_num_in_mds() < size_t(fs->mds_map.get_max_mds()) &&
- !fs->mds_map.is_degraded()) {
+ int in = mds_map.get_num_in_mds();
+ int max = mds_map.get_max_mds();
+
+ dout(20) << __func__ << " in " << in << " max " << max << dendl;
+
+ if (in < max) {
mds_rank_t mds = mds_rank_t(0);
string name;
- while (fs->mds_map.is_in(mds)) {
+ while (mds_map.is_in(mds)) {
mds++;
}
- mds_gid_t newgid = pending.find_replacement_for({fs->fscid, mds},
+ mds_gid_t newgid = fsmap.find_replacement_for({fscid, mds},
name, g_conf->mon_force_standby_active);
if (newgid == MDS_GID_NONE) {
- break;
+ return false;
}
- const auto &new_info = pending.get_info_gid(newgid);
+ const auto &new_info = fsmap.get_info_gid(newgid);
dout(1) << "assigned standby " << new_info.addr
<< " as mds." << mds << dendl;
mon->clog->info() << new_info.human_name() << " assigned to "
- "filesystem " << fs->mds_map.fs_name << " as rank "
- << mds << " (now has " << fs->mds_map.get_num_in_mds() + 1
+ "filesystem " << mds_map.fs_name << " as rank "
+ << mds << " (now has " << mds_map.get_num_in_mds() + 1
<< " ranks)";
- pending.promote(newgid, fs, mds);
- do_propose = true;
+ fsmap.promote(newgid, fs, mds);
+ return true;
}
- return do_propose;
+ return false;
}
* is available, fail this daemon (remove from map) and pass its
* role to another daemon.
*/
-void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info,
- bool *mds_propose, bool *osd_propose)
+void MDSMonitor::maybe_replace_gid(FSMap &fsmap, mds_gid_t gid,
+ const MDSMap::mds_info_t& info, bool *mds_propose, bool *osd_propose)
{
assert(mds_propose != nullptr);
assert(osd_propose != nullptr);
- auto &pending = get_pending_fsmap_writeable();
- const auto fscid = pending.mds_roles.at(gid);
+ const auto fscid = fsmap.mds_roles.at(gid);
// We will only take decisive action (replacing/removing a daemon)
// if we have some indicating that some other daemon(s) are successfully
// getting beacons through recently.
- utime_t latest_beacon;
- for (const auto & i : last_beacon) {
- latest_beacon = MAX(i.second.stamp, latest_beacon);
+ mono_time latest_beacon = mono_clock::zero();
+ for (const auto &p : last_beacon) {
+ latest_beacon = std::max(p.second.stamp, latest_beacon);
}
- const bool may_replace = latest_beacon >
- (ceph_clock_now() -
- MAX(g_conf->mds_beacon_interval, g_conf->mds_beacon_grace * 0.5));
+ mono_time now = mono_clock::now();
+ chrono::duration<double> since = now-latest_beacon;
+ const bool may_replace = since.count() <
+ std::max(g_conf->mds_beacon_interval, g_conf->mds_beacon_grace * 0.5);
// are we in?
// and is there a non-laggy standby that can take over for us?
info.state != MDSMap::STATE_STANDBY &&
info.state != MDSMap::STATE_STANDBY_REPLAY &&
may_replace &&
- !pending.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
- (sgid = pending.find_replacement_for({fscid, info.rank}, info.name,
+ !fsmap.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
+ (sgid = fsmap.find_replacement_for({fscid, info.rank}, info.name,
g_conf->mon_force_standby_active)) != MDS_GID_NONE)
{
- MDSMap::mds_info_t si = pending.get_info_gid(sgid);
+ MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
dout(10) << " replacing " << gid << " " << info.addr << " mds."
<< info.rank << "." << info.inc
<< " " << ceph_mds_state_name(info.state)
<< " with standby " << si.human_name();
// Remember what NS the old one was in
- const fs_cluster_id_t fscid = pending.mds_roles.at(gid);
+ const fs_cluster_id_t fscid = fsmap.mds_roles.at(gid);
// Remove the old one
- *osd_propose |= fail_mds_gid(gid);
+ *osd_propose |= fail_mds_gid(fsmap, gid);
// Promote the replacement
- auto fs = pending.filesystems.at(fscid);
- pending.promote(sgid, fs, info.rank);
+ auto fs = fsmap.filesystems.at(fscid);
+ fsmap.promote(sgid, fs, info.rank);
*mds_propose = true;
} else if ((info.state == MDSMap::STATE_STANDBY_REPLAY ||
<< dendl;
mon->clog->info() << "Standby " << info.human_name() << " is not "
"responding, dropping it";
- fail_mds_gid(gid);
+ fail_mds_gid(fsmap, gid);
*mds_propose = true;
} else if (!info.laggy()) {
dout(10) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc
<< " " << ceph_mds_state_name(info.state)
<< " laggy" << dendl;
- pending.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info) {
+ fsmap.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info) {
info->laggy_since = ceph_clock_now();
});
*mds_propose = true;
}
}
-bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
+bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, std::shared_ptr<Filesystem> &fs)
{
assert(!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN));
- auto &pending = get_pending_fsmap_writeable();
-
bool do_propose = false;
// have a standby take over?
set<mds_rank_t>::iterator p = failed.begin();
while (p != failed.end()) {
mds_rank_t f = *p++;
- mds_gid_t sgid = pending.find_replacement_for({fs->fscid, f}, {},
+ mds_gid_t sgid = fsmap.find_replacement_for({fs->fscid, f}, {},
g_conf->mon_force_standby_active);
if (sgid) {
- const MDSMap::mds_info_t si = pending.get_info_gid(sgid);
+ const MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
dout(0) << " taking over failed mds." << f << " with " << sgid
<< "/" << si.name << " " << si.addr << dendl;
mon->clog->info() << "Standby " << si.human_name()
<< " assigned to filesystem " << fs->mds_map.fs_name
<< " as rank " << f;
- pending.promote(sgid, fs, f);
+ fsmap.promote(sgid, fs, f);
do_propose = true;
}
}
// them while perhaps-modifying standby_daemons during the loop
// (if we promote anyone they are removed from standby_daemons)
std::vector<mds_gid_t> standby_gids;
- for (const auto &j : pending.standby_daemons) {
+ for (const auto &j : fsmap.standby_daemons) {
standby_gids.push_back(j.first);
}
for (const auto &gid : standby_gids) {
- const auto &info = pending.standby_daemons.at(gid);
+ const auto &info = fsmap.standby_daemons.at(gid);
assert(info.state == MDSMap::STATE_STANDBY);
if (!info.standby_replay) {
// the standby_for_rank refers to: lookup via legacy_client_fscid
mds_role_t target_role = {
info.standby_for_fscid == FS_CLUSTER_ID_NONE ?
- pending.legacy_client_fscid : info.standby_for_fscid,
+ fsmap.legacy_client_fscid : info.standby_for_fscid,
info.standby_for_rank};
// It is possible that the map contains a standby_for_fscid
// that doesn't correspond to an existing filesystem, especially
// if we loaded from a version with a bug (#17466)
if (info.standby_for_fscid != FS_CLUSTER_ID_NONE
- && !pending.filesystem_exists(info.standby_for_fscid)) {
+ && !fsmap.filesystem_exists(info.standby_for_fscid)) {
derr << "gid " << gid << " has invalid standby_for_fscid "
<< info.standby_for_fscid << dendl;
continue;
// If we managed to resolve a full target role
if (target_role.fscid != FS_CLUSTER_ID_NONE) {
- const auto &fs = pending.get_filesystem(target_role.fscid);
+ const auto &fs = fsmap.get_filesystem(target_role.fscid);
if (fs->mds_map.is_followable(target_role.rank)) {
- do_propose |= try_standby_replay(
- info,
- *fs,
+ do_propose |= try_standby_replay(fsmap, info, *fs,
fs->mds_map.get_info(target_role.rank));
}
}
}
// check everyone
- for (const auto &p : pending.filesystems) {
+ for (const auto &p : fsmap.filesystems) {
if (info.standby_for_fscid != FS_CLUSTER_ID_NONE &&
info.standby_for_fscid != p.first)
continue;
continue; // we're supposed to follow someone else
}
- if (try_standby_replay(info, *fs, cand_info)) {
+ if (try_standby_replay(fsmap, info, *fs, cand_info)) {
assigned = true;
break;
}
// make sure mds's are still alive
// ...if i am an active leader
- if (!is_active()) return;
-
- dout(10) << get_working_fsmap() << dendl;
-
- if (!is_leader()) return;
+ if (!is_active() || !is_leader()) return;
auto &pending = get_pending_fsmap_writeable();
// expand mds cluster (add new nodes to @in)?
for (auto &p : pending.filesystems) {
- do_propose |= maybe_expand_cluster(p.second);
+ do_propose |= maybe_expand_cluster(pending, p.second->fscid);
}
- const auto now = ceph_clock_now();
- if (last_tick.is_zero()) {
+ mono_time now = mono_clock::now();
+ if (last_tick == decltype(last_tick)::min()) {
last_tick = now;
}
+ chrono::duration<double> since_last = now-last_tick;
- if (now - last_tick > (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) {
+ if (since_last.count() >
+ (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) {
// This case handles either local slowness (calls being delayed
// for whatever reason) or cluster election slowness (a long gap
// between calls while an election happened)
dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
"(slow election?) of " << now - last_tick << " seconds" << dendl;
- for (auto &i : last_beacon) {
- i.second.stamp = now;
+ for (auto &p : last_beacon) {
+ p.second.stamp = now;
}
}
last_tick = now;
- // check beacon timestamps
- utime_t cutoff = now;
- cutoff -= g_conf->mds_beacon_grace;
-
// make sure last_beacon is fully populated
for (auto &p : pending.mds_roles) {
auto &gid = p.first;
- if (last_beacon.count(gid) == 0) {
- last_beacon[gid].stamp = now;
- last_beacon[gid].seq = 0;
- }
+ last_beacon.emplace(std::piecewise_construct,
+ std::forward_as_tuple(gid),
+ std::forward_as_tuple(mono_clock::now(), 0));
}
+
+ // check beacon timestamps
bool propose_osdmap = false;
bool osdmap_writeable = mon->osdmon()->is_writeable();
- auto p = last_beacon.begin();
- while (p != last_beacon.end()) {
- mds_gid_t gid = p->first;
- auto beacon_info = p->second;
- ++p;
+ for (auto it = last_beacon.begin(); it != last_beacon.end(); ) {
+ mds_gid_t gid = it->first;
+ auto beacon_info = it->second;
+ chrono::duration<double> since_last = now-beacon_info.stamp;
if (!pending.gid_exists(gid)) {
// clean it out
- last_beacon.erase(gid);
+ it = last_beacon.erase(it);
continue;
}
- if (beacon_info.stamp < cutoff) {
+
+ if (since_last.count() >= g_conf->mds_beacon_grace) {
auto &info = pending.get_info_gid(gid);
dout(1) << "no beacon from mds." << info.rank << "." << info.inc
<< " (gid: " << gid << " addr: " << info.addr
<< " state: " << ceph_mds_state_name(info.state) << ")"
- << " since " << beacon_info.stamp << dendl;
+ << " since " << since_last.count() << "s" << dendl;
// If the OSDMap is writeable, we can blacklist things, so we can
// try failing any laggy MDS daemons. Consider each one for failure.
if (osdmap_writeable) {
- maybe_replace_gid(gid, info, &do_propose, &propose_osdmap);
+ maybe_replace_gid(pending, gid, info, &do_propose, &propose_osdmap);
}
}
+
+ ++it;
}
if (propose_osdmap) {
request_proposal(mon->osdmon());
for (auto &p : pending.filesystems) {
auto &fs = p.second;
if (!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
- do_propose |= maybe_promote_standby(fs);
+ do_propose |= maybe_promote_standby(pending, fs);
}
}
* ainfo: the would-be leader
*/
bool MDSMonitor::try_standby_replay(
+ FSMap &fsmap,
const MDSMap::mds_info_t& finfo,
const Filesystem &leader_fs,
const MDSMap::mds_info_t& ainfo)
} else {
// Assign the new role to the standby
dout(10) << " setting to follow mds rank " << ainfo.rank << dendl;
- get_pending_fsmap_writeable().assign_standby_replay(finfo.global_id, leader_fs.fscid, ainfo.rank);
+ fsmap.assign_standby_replay(finfo.global_id, leader_fs.fscid, ainfo.rank);
return true;
}
}
void MDSMonitor::on_restart()
{
// Clear out the leader-specific state.
- last_tick = utime_t();
+ last_tick = mono_clock::now();
last_beacon.clear();
}