#include <sstream>
using std::stringstream;
+#include "mon/health_check.h"
+
void Filesystem::dump(Formatter *f) const
{
f->dump_unsigned("max", fs->mds_map.max_mds);
}
} else {
- *out << "e" << get_epoch() << ":";
- if (filesystems.size() == 1) {
- auto fs = filesystems.begin()->second;
- *out << " " << fs->mds_map.up.size() << "/" << fs->mds_map.in.size() << "/"
- << fs->mds_map.max_mds << " up";
- } else {
- for (auto i : filesystems) {
- auto fs = i.second;
- *out << " " << fs->mds_map.fs_name << "-" << fs->mds_map.up.size() << "/"
- << fs->mds_map.in.size() << "/" << fs->mds_map.max_mds << " up";
- }
+ for (auto i : filesystems) {
+ auto fs = i.second;
+ *out << fs->mds_map.fs_name << "-" << fs->mds_map.up.size() << "/"
+ << fs->mds_map.in.size() << "/" << fs->mds_map.max_mds << " up ";
}
}
}
-void FSMap::create_filesystem(const std::string &name,
+void FSMap::create_filesystem(boost::string_view name,
int64_t metadata_pool, int64_t data_pool,
uint64_t features)
{
auto fs = std::make_shared<Filesystem>();
- fs->mds_map.fs_name = name;
+ fs->mds_map.epoch = epoch;
+ fs->mds_map.fs_name = std::string(name);
fs->mds_map.max_mds = 1;
- fs->mds_map.data_pools.insert(data_pool);
+ fs->mds_map.data_pools.push_back(data_pool);
fs->mds_map.metadata_pool = metadata_pool;
fs->mds_map.cas_pool = -1;
fs->mds_map.max_file_size = g_conf->mds_max_file_size;
new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted;
new_fs->mds_map.enabled = true;
+ // Remember mds ranks that have ever started. (They should load old inotable
+ // instead of creating new one if they start again.)
+ new_fs->mds_map.stopped.insert(fs->mds_map.in.begin(), fs->mds_map.in.end());
+ new_fs->mds_map.stopped.insert(fs->mds_map.stopped.begin(), fs->mds_map.stopped.end());
+ new_fs->mds_map.stopped.erase(mds_rank_t(0));
+
// Persist the new FSMap
filesystems[new_fs->fscid] = new_fs;
}
return changed;
}
+void FSMap::get_health_checks(health_check_map_t *checks) const
+{
+ mds_rank_t standby_count_wanted = 0;
+ for (const auto &i : filesystems) {
+ const auto &fs = i.second;
+ health_check_map_t fschecks;
+
+ fs->mds_map.get_health_checks(&fschecks);
+
+ // Some of the failed ranks might be transient (i.e. there are standbys
+ // ready to replace them). We will report only on "stuck" failed, i.e.
+ // ranks which are failed and have no standby replacement available.
+ std::set<mds_rank_t> stuck_failed;
+
+ for (const auto &rank : fs->mds_map.failed) {
+ const mds_gid_t replacement = find_replacement_for(
+ {fs->fscid, rank}, {}, g_conf->mon_force_standby_active);
+ if (replacement == MDS_GID_NONE) {
+ stuck_failed.insert(rank);
+ }
+ }
+
+ // FS_WITH_FAILED_MDS
+ if (!stuck_failed.empty()) {
+ health_check_t& fscheck = checks->get_or_add(
+ "FS_WITH_FAILED_MDS", HEALTH_WARN,
+ "%num% filesystem%plurals% %hasorhave% a failed mds daemon");
+ ostringstream ss;
+ ss << "fs " << fs->mds_map.fs_name << " has " << stuck_failed.size()
+ << " failed mds" << (stuck_failed.size() > 1 ? "s" : "");
+ fscheck.detail.push_back(ss.str()); }
+
+ checks->merge(fschecks);
+ standby_count_wanted = std::max(
+ standby_count_wanted,
+ fs->mds_map.get_standby_count_wanted((mds_rank_t)standby_daemons.size()));
+ }
+
+ // MDS_INSUFFICIENT_STANDBY
+ if (standby_count_wanted) {
+ std::ostringstream oss, dss;
+ oss << "insufficient standby MDS daemons available";
+ auto& d = checks->get_or_add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str());
+ dss << "have " << standby_daemons.size() << "; want " << standby_count_wanted
+ << " more";
+ d.detail.push_back(dss.str());
+ }
+}
+
void FSMap::encode(bufferlist& bl, uint64_t features) const
{
if (features & CEPH_FEATURE_SERVER_JEWEL) {
void FSMap::decode(bufferlist::iterator& p)
{
- // Because the mon used to store an MDSMap where we now
- // store an FSMap, FSMap knows how to decode the legacy
- // MDSMap format (it never needs to encode it though).
- MDSMap legacy_mds_map;
-
// The highest MDSMap encoding version before we changed the
// MDSMonitor to store an FSMap instead of an MDSMap was
// 5, so anything older than 6 is decoded as an MDSMap,
// and anything newer is decoded as an FSMap.
DECODE_START_LEGACY_COMPAT_LEN_16(7, 4, 4, p);
if (struct_v < 6) {
+ // Because the mon used to store an MDSMap where we now
+ // store an FSMap, FSMap knows how to decode the legacy
+ // MDSMap format (it never needs to encode it though).
+ MDSMap legacy_mds_map;
+
// Decoding an MDSMap (upgrade)
::decode(epoch, p);
::decode(legacy_mds_map.flags, p);
while (n--) {
__u32 m;
::decode(m, p);
- legacy_mds_map.data_pools.insert(m);
+ legacy_mds_map.data_pools.push_back(m);
}
__s32 s;
::decode(s, p);
DECODE_FINISH(p);
}
+void FSMap::sanitize(std::function<bool(int64_t pool)> pool_exists)
+{
+ for (auto &fs : filesystems) {
+ fs.second->mds_map.sanitize(pool_exists);
+ }
+}
void Filesystem::encode(bufferlist& bl, uint64_t features) const
{
}
int FSMap::parse_filesystem(
- std::string const &ns_str,
+ boost::string_view ns_str,
std::shared_ptr<const Filesystem> *result
) const
{
std::string ns_err;
- fs_cluster_id_t fscid = strict_strtol(ns_str.c_str(), 10, &ns_err);
+ std::string s(ns_str);
+ fs_cluster_id_t fscid = strict_strtol(s.c_str(), 10, &ns_err);
if (!ns_err.empty() || filesystems.count(fscid) == 0) {
for (auto &fs : filesystems) {
- if (fs.second->mds_map.fs_name == ns_str) {
+ if (fs.second->mds_map.fs_name == s) {
*result = std::const_pointer_cast<const Filesystem>(fs.second);
return 0;
}
mds_map.print(out);
}
-mds_gid_t FSMap::find_standby_for(mds_role_t role, const std::string& name) const
+mds_gid_t FSMap::find_standby_for(mds_role_t role, boost::string_view name) const
{
mds_gid_t result = MDS_GID_NONE;
return result;
}
-mds_gid_t FSMap::find_unused(fs_cluster_id_t fscid,
- bool force_standby_active) const {
+mds_gid_t FSMap::find_unused_for(mds_role_t role,
+ bool force_standby_active) const {
for (const auto &i : standby_daemons) {
const auto &gid = i.first;
const auto &info = i.second;
continue;
if (info.standby_for_fscid != FS_CLUSTER_ID_NONE &&
- info.standby_for_fscid != fscid)
+ info.standby_for_fscid != role.fscid)
+ continue;
+ if (info.standby_for_rank != MDS_RANK_NONE &&
+ info.standby_for_rank != role.rank)
continue;
// To be considered 'unused' a daemon must either not
return MDS_GID_NONE;
}
-mds_gid_t FSMap::find_replacement_for(mds_role_t role, const std::string& name,
+mds_gid_t FSMap::find_replacement_for(mds_role_t role, boost::string_view name,
bool force_standby_active) const {
const mds_gid_t standby = find_standby_for(role, name);
if (standby)
return standby;
else
- return find_unused(role.fscid, force_standby_active);
+ return find_unused_for(role, force_standby_active);
}
void FSMap::sanity() const
* if legacy_client_ns is set.
*/
int FSMap::parse_role(
- const std::string &role_str,
+ boost::string_view role_str,
mds_role_t *role,
std::ostream &ss) const
{
mds_rank_t rank;
std::string err;
- std::string rank_str = role_str.substr(rank_pos);
+ std::string rank_str(role_str.substr(rank_pos));
long rank_i = strict_strtol(rank_str.c_str(), 10, &err);
if (rank_i < 0 || !err.empty()) {
ss << "Invalid rank '" << rank_str << "'";