#undef dout_prefix
#define dout_prefix *_dout << "mds.beacon." << name << ' '
+using std::map;
+using std::string;
+
using namespace std::chrono_literals;
Beacon::Beacon(CephContext *cct, MonClient *monc, std::string_view name)
Dispatcher(cct),
beacon_interval(g_conf()->mds_beacon_interval),
monc(monc),
- name(name)
+ name(name),
+ compat(MDSMap::get_compat_set_all())
{
}
});
}
-bool Beacon::ms_can_fast_dispatch2(const Message::const_ref& m) const
+bool Beacon::ms_can_fast_dispatch2(const cref_t<Message>& m) const
{
return m->get_type() == MSG_MDS_BEACON;
}
-void Beacon::ms_fast_dispatch2(const Message::ref& m)
+void Beacon::ms_fast_dispatch2(const ref_t<Message>& m)
{
bool handled = ms_dispatch2(m);
ceph_assert(handled);
}
-bool Beacon::ms_dispatch2(const Message::ref& m)
+bool Beacon::ms_dispatch2(const ref_t<Message>& m)
{
if (m->get_type() == MSG_MDS_BEACON) {
if (m->get_connection()->get_peer_type() == CEPH_ENTITY_TYPE_MON) {
- handle_mds_beacon(MMDSBeacon::msgref_cast(m));
+ handle_mds_beacon(ref_cast<MMDSBeacon>(m));
}
return true;
}
*
* This function puts the passed message before returning
*/
-void Beacon::handle_mds_beacon(const MMDSBeacon::const_ref &m)
+void Beacon::handle_mds_beacon(const cref_t<MMDSBeacon> &m)
{
std::unique_lock lock(mutex);
ceph_assert(want_state != MDSMap::STATE_NULL);
- auto beacon = MMDSBeacon::create(
+ auto beacon = make_message<MMDSBeacon>(
monc->get_fsid(), mds_gid_t(monc->get_global_id()),
name,
epoch,
want_state,
last_seq,
CEPH_FEATURES_SUPPORTED_DEFAULT);
-
beacon->set_health(health);
beacon->set_compat(compat);
+ beacon->set_fs(g_conf().get_val<std::string>("mds_join_fs"));
// piggyback the sys info on beacon msg
if (want_state == MDSMap::STATE_BOOT) {
map<string, string> sys_info;
{
ceph_assert(mdsmap.get_epoch() >= epoch);
- if (mdsmap.get_epoch() != epoch) {
+ if (mdsmap.get_epoch() >= epoch) {
epoch = mdsmap.get_epoch();
- compat = MDSMap::get_compat_set_default();
- compat.merge(mdsmap.compat);
}
}
return false;
}
-void Beacon::set_want_state(const MDSMap &mdsmap, MDSMap::DaemonState const newstate)
+void Beacon::set_want_state(const MDSMap &mdsmap, MDSMap::DaemonState newstate)
{
std::unique_lock lock(mutex);
}
// I'm going to touch this MDS, so it must be locked
- ceph_assert(mds->mds_lock.is_locked_by_me());
+ ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock));
health.metrics.clear();
}
// Detect MDS_HEALTH_TRIM condition
- // Arbitrary factor of 2, indicates MDS is not trimming promptly
+ // Indicates MDS is not trimming promptly
{
- if (mds->mdlog->get_num_segments() > (size_t)(g_conf()->mds_log_max_segments * 2)) {
- std::ostringstream oss;
- oss << "Behind on trimming (" << mds->mdlog->get_num_segments()
+ if (mds->mdlog->get_num_segments() > (size_t)(g_conf()->mds_log_max_segments * g_conf().get_val<double>("mds_log_warn_factor"))) {
+ CachedStackStringStream css;
+ *css << "Behind on trimming (" << mds->mdlog->get_num_segments()
<< "/" << g_conf()->mds_log_max_segments << ")";
- MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv());
m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments());
m.metadata["max_segments"] = stringify(g_conf()->mds_log_max_segments);
health.metrics.push_back(m);
// Detect clients failing to respond to modifications to capabilities in
// CLIENT_CAPS messages.
{
- std::list<client_t> late_clients;
- mds->locker->get_late_revoking_clients(&late_clients,
- mds->mdsmap->get_session_timeout());
- std::list<MDSHealthMetric> late_cap_metrics;
-
- for (std::list<client_t>::iterator i = late_clients.begin(); i != late_clients.end(); ++i) {
+ auto&& late_clients = mds->locker->get_late_revoking_clients(mds->mdsmap->get_session_timeout());
+ std::vector<MDSHealthMetric> late_cap_metrics;
+ for (const auto& client : late_clients) {
// client_t is equivalent to session.info.inst.name.num
// Construct an entity_name_t to lookup into SessionMap
- entity_name_t ename(CEPH_ENTITY_TYPE_CLIENT, i->v);
+ entity_name_t ename(CEPH_ENTITY_TYPE_CLIENT, client.v);
Session const *s = mds->sessionmap.get_session(ename);
if (s == NULL) {
// Shouldn't happen, but not worth crashing if it does as this is
// just health-reporting code.
- derr << "Client ID without session: " << i->v << dendl;
+ derr << "Client ID without session: " << client.v << dendl;
continue;
}
- std::ostringstream oss;
- oss << "Client " << s->get_human_name() << " failing to respond to capability release";
- MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str());
- m.metadata["client_id"] = stringify(i->v);
- late_cap_metrics.push_back(m);
+ CachedStackStringStream css;
+ *css << "Client " << s->get_human_name() << " failing to respond to capability release";
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, css->strv());
+ m.metadata["client_id"] = stringify(client.v);
+ late_cap_metrics.emplace_back(std::move(m));
}
if (late_cap_metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) {
- health.metrics.splice(health.metrics.end(), late_cap_metrics);
+ auto&& m = late_cap_metrics;
+ health.metrics.insert(std::end(health.metrics), std::cbegin(m), std::cend(m));
} else {
- std::ostringstream oss;
- oss << "Many clients (" << late_cap_metrics.size()
+ CachedStackStringStream css;
+ *css << "Many clients (" << late_cap_metrics.size()
<< ") failing to respond to capability release";
- MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE_MANY, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE_MANY, HEALTH_WARN, css->strv());
m.metadata["client_count"] = stringify(late_cap_metrics.size());
- health.metrics.push_back(m);
- late_cap_metrics.clear();
+ health.metrics.push_back(std::move(m));
}
}
//
// Detect clients failing to advance their old_client_tid
{
- set<Session*> sessions;
+ std::set<Session*> sessions;
mds->sessionmap.get_client_session_set(sessions);
+ const auto min_caps_working_set = g_conf().get_val<uint64_t>("mds_min_caps_working_set");
const auto recall_warning_threshold = g_conf().get_val<Option::size_t>("mds_recall_warning_threshold");
const auto max_completed_requests = g_conf()->mds_max_completed_requests;
const auto max_completed_flushes = g_conf()->mds_max_completed_flushes;
- std::list<MDSHealthMetric> late_recall_metrics;
- std::list<MDSHealthMetric> large_completed_requests_metrics;
+ std::vector<MDSHealthMetric> late_recall_metrics;
+ std::vector<MDSHealthMetric> large_completed_requests_metrics;
for (auto& session : sessions) {
+ const uint64_t num_caps = session->get_num_caps();
const uint64_t recall_caps = session->get_recall_caps();
- if (recall_caps > recall_warning_threshold) {
+ if (recall_caps > recall_warning_threshold && num_caps > min_caps_working_set) {
dout(2) << "Session " << *session <<
" is not releasing caps fast enough. Recalled caps at " << recall_caps
<< " > " << recall_warning_threshold << " (mds_recall_warning_threshold)." << dendl;
- std::ostringstream oss;
- oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";
- MDSHealthMetric m(MDS_HEALTH_CLIENT_RECALL, HEALTH_WARN, oss.str());
+ CachedStackStringStream css;
+ *css << "Client " << session->get_human_name() << " failing to respond to cache pressure";
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_RECALL, HEALTH_WARN, css->strv());
m.metadata["client_id"] = stringify(session->get_client());
- late_recall_metrics.push_back(m);
+ late_recall_metrics.emplace_back(std::move(m));
}
if ((session->get_num_trim_requests_warnings() > 0 &&
session->get_num_completed_requests() >= max_completed_requests) ||
(session->get_num_trim_flushes_warnings() > 0 &&
session->get_num_completed_flushes() >= max_completed_flushes)) {
- std::ostringstream oss;
- oss << "Client " << session->get_human_name() << " failing to advance its oldest client/flush tid. ";
- MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID, HEALTH_WARN, oss.str());
+ CachedStackStringStream css;
+ *css << "Client " << session->get_human_name() << " failing to advance its oldest client/flush tid. ";
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID, HEALTH_WARN, css->strv());
m.metadata["client_id"] = stringify(session->get_client());
- large_completed_requests_metrics.push_back(m);
+ large_completed_requests_metrics.emplace_back(std::move(m));
}
}
if (late_recall_metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) {
- health.metrics.splice(health.metrics.end(), late_recall_metrics);
+ auto&& m = late_recall_metrics;
+ health.metrics.insert(std::end(health.metrics), std::cbegin(m), std::cend(m));
} else {
- std::ostringstream oss;
- oss << "Many clients (" << late_recall_metrics.size()
+ CachedStackStringStream css;
+ *css << "Many clients (" << late_recall_metrics.size()
<< ") failing to respond to cache pressure";
- MDSHealthMetric m(MDS_HEALTH_CLIENT_RECALL_MANY, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_RECALL_MANY, HEALTH_WARN, css->strv());
m.metadata["client_count"] = stringify(late_recall_metrics.size());
health.metrics.push_back(m);
late_recall_metrics.clear();
}
if (large_completed_requests_metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) {
- health.metrics.splice(health.metrics.end(), large_completed_requests_metrics);
+ auto&& m = large_completed_requests_metrics;
+ health.metrics.insert(std::end(health.metrics), std::cbegin(m), std::cend(m));
} else {
- std::ostringstream oss;
- oss << "Many clients (" << large_completed_requests_metrics.size()
+ CachedStackStringStream css;
+ *css << "Many clients (" << large_completed_requests_metrics.size()
<< ") failing to advance their oldest client/flush tid";
- MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID_MANY, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_OLDEST_TID_MANY, HEALTH_WARN, css->strv());
m.metadata["client_count"] = stringify(large_completed_requests_metrics.size());
health.metrics.push_back(m);
large_completed_requests_metrics.clear();
int slow = mds->get_mds_slow_req_count();
if (slow) {
dout(20) << slow << " slow request found" << dendl;
- std::ostringstream oss;
- oss << slow << " slow requests are blocked > " << g_conf()->mds_op_complaint_time << " secs";
+ CachedStackStringStream css;
+ *css << slow << " slow requests are blocked > " << g_conf()->mds_op_complaint_time << " secs";
- MDSHealthMetric m(MDS_HEALTH_SLOW_REQUEST, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_SLOW_REQUEST, HEALTH_WARN, css->strv());
health.metrics.push_back(m);
}
}
dout(20) << count << " slow metadata IOs found" << dendl;
auto oldest_secs = std::chrono::duration<double>(now - oldest).count();
- std::ostringstream oss;
- oss << count << " slow metadata IOs are blocked > " << complaint_time
+ CachedStackStringStream css;
+ *css << count << " slow metadata IOs are blocked > " << complaint_time
<< " secs, oldest blocked for " << (int64_t)oldest_secs << " secs";
- MDSHealthMetric m(MDS_HEALTH_SLOW_METADATA_IO, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_SLOW_METADATA_IO, HEALTH_WARN, css->strv());
health.metrics.push_back(m);
}
}
// Report if we have significantly exceeded our cache size limit
if (mds->mdcache->cache_overfull()) {
- std::ostringstream oss;
- oss << "MDS cache is too large (" << bytes2str(mds->mdcache->cache_size())
+ CachedStackStringStream css;
+ *css << "MDS cache is too large (" << bytes2str(mds->mdcache->cache_size())
<< "/" << bytes2str(mds->mdcache->cache_limit_memory()) << "); "
<< mds->mdcache->num_inodes_with_caps << " inodes in use by clients, "
<< mds->mdcache->get_num_strays() << " stray files";
- MDSHealthMetric m(MDS_HEALTH_CACHE_OVERSIZED, HEALTH_WARN, oss.str());
+ MDSHealthMetric m(MDS_HEALTH_CACHE_OVERSIZED, HEALTH_WARN, css->strv());
health.metrics.push_back(m);
}
}