#define dout_context g_ceph_context
+using std::list;
+using std::make_pair;
+using std::map;
+using std::pair;
+using std::ostream;
+using std::ostringstream;
+using std::set;
+using std::string;
+using std::stringstream;
+using std::vector;
+
+using ceph::bufferlist;
+using TOPNSPC::common::cmd_getval;
+
MEMPOOL_DEFINE_OBJECT_FACTORY(PGMapDigest, pgmap_digest, pgmap);
MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap, pgmap, pgmap);
MEMPOOL_DEFINE_OBJECT_FACTORY(PGMap::Incremental, pgmap_inc, pgmap);
uint32_t n = num_pg_by_state.size();
encode(n, bl);
for (auto p : num_pg_by_state) {
- encode((uint32_t)p.first, bl);
+ encode((int32_t)p.first, bl);
encode(p.second, bl);
}
}
DECODE_FINISH(p);
}
-void PGMapDigest::dump(Formatter *f) const
+void PGMapDigest::dump(ceph::Formatter *f) const
{
f->dump_unsigned("num_pg", num_pg);
f->dump_unsigned("num_pg_active", num_pg_active);
return ss.str();
}
-void PGMapDigest::print_summary(Formatter *f, ostream *out) const
+void PGMapDigest::print_summary(ceph::Formatter *f, ostream *out) const
{
if (f)
f->open_array_section("pgs_by_state");
// list is descending numeric order (by count)
- multimap<int,int> state_by_count; // count -> state
+ std::multimap<int,uint64_t> state_by_count; // count -> state
for (auto p = num_pg_by_state.begin();
p != num_pg_by_state.end();
++p) {
if (!f) {
unsigned max_width = 1;
- for (multimap<int,int>::reverse_iterator p = state_by_count.rbegin();
- p != state_by_count.rend();
- ++p)
+ for (auto p = state_by_count.rbegin(); p != state_by_count.rend(); ++p)
{
std::stringstream ss;
ss << p->first;
max_width = std::max<size_t>(ss.str().size(), max_width);
}
- for (multimap<int,int>::reverse_iterator p = state_by_count.rbegin();
- p != state_by_count.rend();
- ++p)
+ for (auto p = state_by_count.rbegin(); p != state_by_count.rend(); ++p)
{
if (pad) {
*out << " ";
*out << " cache: " << ss_cache_io.str() << "\n";
}
-void PGMapDigest::print_oneline_summary(Formatter *f, ostream *out) const
+void PGMapDigest::print_oneline_summary(ceph::Formatter *f, ostream *out) const
{
std::stringstream ss;
}
}
-void PGMapDigest::recovery_summary(Formatter *f, list<string> *psl,
+void PGMapDigest::recovery_summary(ceph::Formatter *f, list<string> *psl,
const pool_stat_t& pool_sum) const
{
if (pool_sum.stats.sum.num_objects_degraded && pool_sum.stats.sum.num_object_copies > 0) {
}
}
-void PGMapDigest::recovery_rate_summary(Formatter *f, ostream *out,
+void PGMapDigest::recovery_rate_summary(ceph::Formatter *f, ostream *out,
const pool_stat_t& delta_sum,
utime_t delta_stamp) const
{
}
}
-void PGMapDigest::overall_recovery_rate_summary(Formatter *f, ostream *out) const
+void PGMapDigest::overall_recovery_rate_summary(ceph::Formatter *f, ostream *out) const
{
recovery_rate_summary(f, out, pg_sum_delta, stamp_delta);
}
-void PGMapDigest::overall_recovery_summary(Formatter *f, list<string> *psl) const
+void PGMapDigest::overall_recovery_summary(ceph::Formatter *f, list<string> *psl) const
{
recovery_summary(f, psl, pg_sum);
}
-void PGMapDigest::pool_recovery_rate_summary(Formatter *f, ostream *out,
+void PGMapDigest::pool_recovery_rate_summary(ceph::Formatter *f, ostream *out,
uint64_t poolid) const
{
auto p = per_pool_sum_delta.find(poolid);
recovery_rate_summary(f, out, p->second.first, ts->second);
}
-void PGMapDigest::pool_recovery_summary(Formatter *f, list<string> *psl,
+void PGMapDigest::pool_recovery_summary(ceph::Formatter *f, list<string> *psl,
uint64_t poolid) const
{
auto p = pg_pool_sum.find(poolid);
recovery_summary(f, psl, p->second);
}
-void PGMapDigest::client_io_rate_summary(Formatter *f, ostream *out,
+void PGMapDigest::client_io_rate_summary(ceph::Formatter *f, ostream *out,
const pool_stat_t& delta_sum,
utime_t delta_stamp) const
{
}
}
-void PGMapDigest::overall_client_io_rate_summary(Formatter *f, ostream *out) const
+void PGMapDigest::overall_client_io_rate_summary(ceph::Formatter *f, ostream *out) const
{
client_io_rate_summary(f, out, pg_sum_delta, stamp_delta);
}
-void PGMapDigest::pool_client_io_rate_summary(Formatter *f, ostream *out,
+void PGMapDigest::pool_client_io_rate_summary(ceph::Formatter *f, ostream *out,
uint64_t poolid) const
{
auto p = per_pool_sum_delta.find(poolid);
client_io_rate_summary(f, out, p->second.first, ts->second);
}
-void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
+void PGMapDigest::cache_io_rate_summary(ceph::Formatter *f, ostream *out,
const pool_stat_t& delta_sum,
utime_t delta_stamp) const
{
}
}
-void PGMapDigest::overall_cache_io_rate_summary(Formatter *f, ostream *out) const
+void PGMapDigest::overall_cache_io_rate_summary(ceph::Formatter *f, ostream *out) const
{
cache_io_rate_summary(f, out, pg_sum_delta, stamp_delta);
}
-void PGMapDigest::pool_cache_io_rate_summary(Formatter *f, ostream *out,
+void PGMapDigest::pool_cache_io_rate_summary(ceph::Formatter *f, ostream *out,
uint64_t poolid) const
{
auto p = per_pool_sum_delta.find(poolid);
void PGMapDigest::dump_pool_stats_full(
const OSDMap &osd_map,
stringstream *ss,
- Formatter *f,
+ ceph::Formatter *f,
bool verbose) const
{
TextTable tbl;
tbl.define_column("POOL", TextTable::LEFT, TextTable::LEFT);
tbl.define_column("ID", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("STORED", TextTable::LEFT, TextTable::RIGHT);
+ if (verbose) {
+ tbl.define_column("(DATA)", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("(OMAP)", TextTable::LEFT, TextTable::RIGHT);
+ }
tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT);
+ if (verbose) {
+ tbl.define_column("(DATA)", TextTable::LEFT, TextTable::RIGHT);
+ tbl.define_column("(OMAP)", TextTable::LEFT, TextTable::RIGHT);
+ }
tbl.define_column("%USED", TextTable::LEFT, TextTable::RIGHT);
tbl.define_column("MAX AVAIL", TextTable::LEFT, TextTable::RIGHT);
}
float raw_used_rate = osd_map.pool_raw_used_rate(pool_id);
bool per_pool = use_per_pool_stats();
+ bool per_pool_omap = use_per_pool_omap_stats();
dump_object_stat_sum(tbl, f, stat, avail, raw_used_rate, verbose, per_pool,
- pool);
+ per_pool_omap, pool);
if (f) {
f->close_section(); // stats
f->close_section(); // pool
f->close_section();
else {
ceph_assert(ss != nullptr);
- *ss << "POOLS:\n";
- tbl.set_indent(4);
+ *ss << "--- POOLS ---\n";
*ss << tbl;
}
}
void PGMapDigest::dump_cluster_stats(stringstream *ss,
- Formatter *f,
+ ceph::Formatter *f,
bool verbose) const
{
if (f) {
f->dump_float("total_used_raw_ratio", osd_sum.statfs.get_used_raw_ratio());
f->dump_unsigned("num_osds", osd_sum.num_osds);
f->dump_unsigned("num_per_pool_osds", osd_sum.num_per_pool_osds);
+ f->dump_unsigned("num_per_pool_omap_osds", osd_sum.num_per_pool_omap_osds);
f->close_section();
f->open_object_section("stats_by_class");
for (auto& i : osd_sum_by_class) {
<< percentify(osd_sum.statfs.get_used_raw_ratio()*100.0)
<< TextTable::endrow;
- *ss << "RAW STORAGE:\n";
- tbl.set_indent(4);
+ *ss << "--- RAW STORAGE ---\n";
*ss << tbl;
}
}
void PGMapDigest::dump_object_stat_sum(
- TextTable &tbl, Formatter *f,
+ TextTable &tbl, ceph::Formatter *f,
const pool_stat_t &pool_stat, uint64_t avail,
- float raw_used_rate, bool verbose, bool per_pool,
+ float raw_used_rate, bool verbose, bool per_pool, bool per_pool_omap,
const pg_pool_t *pool)
{
const object_stat_sum_t &sum = pool_stat.stats.sum;
raw_used_rate *= (float)(sum.num_object_copies - sum.num_objects_degraded) / sum.num_object_copies;
}
- uint64_t used_bytes = pool_stat.get_allocated_bytes(per_pool);
+ uint64_t used_data_bytes = pool_stat.get_allocated_data_bytes(per_pool);
+ uint64_t used_omap_bytes = pool_stat.get_allocated_omap_bytes(per_pool_omap);
+ uint64_t used_bytes = used_data_bytes + used_omap_bytes;
float used = 0.0;
// note avail passed in is raw_avail, calc raw_used here.
}
auto avail_res = raw_used_rate ? avail / raw_used_rate : 0;
// an approximation for actually stored user data
- auto stored_normalized = pool_stat.get_user_bytes(raw_used_rate, per_pool);
+ auto stored_data_normalized = pool_stat.get_user_data_bytes(
+ raw_used_rate, per_pool);
+ auto stored_omap_normalized = pool_stat.get_user_omap_bytes(
+ raw_used_rate, per_pool_omap);
+ auto stored_normalized = stored_data_normalized + stored_omap_normalized;
+ // same, amplied by replication or EC
+ auto stored_raw = stored_normalized * raw_used_rate;
if (f) {
f->dump_int("stored", stored_normalized);
+ if (verbose) {
+ f->dump_int("stored_data", stored_data_normalized);
+ f->dump_int("stored_omap", stored_omap_normalized);
+ }
f->dump_int("objects", sum.num_objects);
f->dump_int("kb_used", shift_round_up(used_bytes, 10));
f->dump_int("bytes_used", used_bytes);
+ if (verbose) {
+ f->dump_int("data_bytes_used", used_data_bytes);
+ f->dump_int("omap_bytes_used", used_omap_bytes);
+ }
f->dump_float("percent_used", used);
f->dump_unsigned("max_avail", avail_res);
if (verbose) {
f->dump_int("compress_bytes_used", statfs.data_compressed_allocated);
f->dump_int("compress_under_bytes", statfs.data_compressed_original);
// Stored by user amplified by replication
- f->dump_int("stored_raw", pool_stat.get_user_bytes(1.0, per_pool));
+ f->dump_int("stored_raw", stored_raw);
}
} else {
tbl << stringify(byte_u_t(stored_normalized));
+ if (verbose) {
+ tbl << stringify(byte_u_t(stored_data_normalized));
+ tbl << stringify(byte_u_t(stored_omap_normalized));
+ }
tbl << stringify(si_u_t(sum.num_objects));
tbl << stringify(byte_u_t(used_bytes));
+ if (verbose) {
+ tbl << stringify(byte_u_t(used_data_bytes));
+ tbl << stringify(byte_u_t(used_omap_bytes));
+ }
tbl << percentify(used*100);
tbl << stringify(byte_u_t(avail_res));
if (verbose) {
// ---------------------
// PGMap
-void PGMap::Incremental::dump(Formatter *f) const
+void PGMap::Incremental::dump(ceph::Formatter *f) const
{
f->dump_unsigned("version", version);
f->dump_stream("stamp") << stamp;
calc_stats();
}
-void PGMap::dump(Formatter *f) const
+void PGMap::dump(ceph::Formatter *f, bool with_net) const
{
dump_basic(f);
dump_pg_stats(f, false);
dump_pool_stats(f);
- dump_osd_stats(f);
+ dump_osd_stats(f, with_net);
}
-void PGMap::dump_basic(Formatter *f) const
+void PGMap::dump_basic(ceph::Formatter *f) const
{
f->dump_unsigned("version", version);
f->dump_stream("stamp") << stamp;
dump_delta(f);
}
-void PGMap::dump_delta(Formatter *f) const
+void PGMap::dump_delta(ceph::Formatter *f) const
{
f->open_object_section("pg_stats_delta");
pg_sum_delta.dump(f);
f->close_section();
}
-void PGMap::dump_pg_stats(Formatter *f, bool brief) const
+void PGMap::dump_pg_stats(ceph::Formatter *f, bool brief) const
{
f->open_array_section("pg_stats");
for (auto i = pg_stat.begin();
f->close_section();
}
-void PGMap::dump_pool_stats(Formatter *f) const
+void PGMap::dump_pool_stats(ceph::Formatter *f) const
{
f->open_array_section("pool_stats");
for (auto p = pg_pool_sum.begin();
f->close_section();
}
-void PGMap::dump_osd_stats(Formatter *f, bool with_net) const
+void PGMap::dump_osd_stats(ceph::Formatter *f, bool with_net) const
{
f->open_array_section("osd_stats");
for (auto q = osd_stat.begin();
f->close_section();
}
+void PGMap::dump_osd_ping_times(ceph::Formatter *f) const
+{
+ f->open_array_section("osd_ping_times");
+ for (auto& [osd, stat] : osd_stat) {
+ f->open_object_section("osd_ping_time");
+ f->dump_int("osd", osd);
+ stat.dump_ping_time(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
void PGMap::dump_pg_stats_plain(
ostream& ss,
const mempool::pgmap::unordered_map<pg_t, pg_stat_t>& pg_stats,
return inactive || unclean || undersized || degraded || stale;
}
-void PGMap::dump_stuck(Formatter *f, int types, utime_t cutoff) const
+void PGMap::dump_stuck(ceph::Formatter *f, int types, utime_t cutoff) const
{
mempool::pgmap::unordered_map<pg_t, pg_stat_t> stuck_pg_stats;
get_stuck_stats(types, cutoff, stuck_pg_stats);
int PGMap::dump_stuck_pg_stats(
stringstream &ds,
- Formatter *f,
+ ceph::Formatter *f,
int threshold,
vector<string>& args) const
{
return 0;
}
-void PGMap::dump_osd_perf_stats(Formatter *f) const
+void PGMap::dump_osd_perf_stats(ceph::Formatter *f) const
{
f->open_array_section("osd_perf_infos");
for (auto i = osd_stat.begin();
(*ss) << tab;
}
-void PGMap::dump_osd_blocked_by_stats(Formatter *f) const
+void PGMap::dump_osd_blocked_by_stats(ceph::Formatter *f) const
{
f->open_array_section("osd_blocked_by_infos");
for (auto i = blocked_by_sum.begin();
}
}
-void PGMap::dump_filtered_pg_stats(Formatter *f, set<pg_t>& pgs) const
+void PGMap::dump_filtered_pg_stats(ceph::Formatter *f, set<pg_t>& pgs) const
{
f->open_array_section("pg_stats");
for (auto i = pgs.begin(); i != pgs.end(); ++i) {
reported << st.reported_epoch << ":" << st.reported_seq;
ostringstream upstr, actingstr;
- upstr << st.up << 'p' << st.up_primary;
- actingstr << st.acting << 'p' << st.acting_primary;
+ upstr << pg_vector_string(st.up) << 'p' << st.up_primary;
+ actingstr << pg_vector_string(st.acting) << 'p' << st.acting_primary;
tab << *i
<< st.stats.sum.num_objects
<< st.stats.sum.num_objects_degraded
}
void PGMap::dump_pool_stats_and_io_rate(int64_t poolid, const OSDMap &osd_map,
- Formatter *f,
+ ceph::Formatter *f,
stringstream *rs) const {
string pool_name = osd_map.get_pool_name(poolid);
if (f) {
}
}
+// Get crush parentage for an osd (skip root)
+set<std::string> PGMap::osd_parentage(const OSDMap& osdmap, int id) const
+{
+ set<std::string> reporters_by_subtree;
+ auto reporter_subtree_level = g_conf().get_val<string>("mon_osd_reporter_subtree_level");
+
+ auto loc = osdmap.crush->get_full_location(id);
+ for (auto& [parent_bucket_type, parent_id] : loc) {
+ // Should we show the root? Might not be too informative like "default"
+ if (parent_bucket_type != "root" &&
+ parent_bucket_type != reporter_subtree_level) {
+ reporters_by_subtree.insert(parent_id);
+ }
+ }
+ return reporters_by_subtree;
+}
+
void PGMap::get_health_checks(
CephContext *cct,
const OSDMap& osdmap,
ss << " since forever";
} else {
utime_t dur = now - since;
- ss << " for " << dur;
+ ss << " for " << utimespan_str(dur);
}
ss << ", current state " << pg_state_string(pg_info.state)
<< ", last acting " << pg_info.acting;
// Compose summary message saying how many PGs in what states led
// to this health check failing
std::vector<std::string> pg_msgs;
+ int64_t count = 0;
for (const auto &j : i.second.states) {
std::ostringstream msg;
msg << j.second << (j.second > 1 ? " pgs " : " pg ") << state_name(j.first);
pg_msgs.push_back(msg.str());
+ count += j.second;
}
summary += joinify(pg_msgs.begin(), pg_msgs.end(), std::string(", "));
-
-
health_check_t *check = &checks->add(
health_code,
sev,
- summary);
+ summary,
+ count);
// Compose list of PGs contributing to this health check failing
for (const auto &j : i.second.pg_messages) {
if (pg_sum.stats.sum.num_scrub_errors) {
ostringstream ss;
ss << pg_sum.stats.sum.num_scrub_errors << " scrub errors";
- checks->add("OSD_SCRUB_ERRORS", HEALTH_ERR, ss.str());
+ checks->add("OSD_SCRUB_ERRORS", HEALTH_ERR, ss.str(),
+ pg_sum.stats.sum.num_scrub_errors);
}
// LARGE_OMAP_OBJECTS
if (!detail.empty()) {
ostringstream ss;
ss << pg_sum.stats.sum.num_large_omap_objects << " large omap objects";
- auto& d = checks->add("LARGE_OMAP_OBJECTS", HEALTH_WARN, ss.str());
+ auto& d = checks->add("LARGE_OMAP_OBJECTS", HEALTH_WARN, ss.str(),
+ pg_sum.stats.sum.num_large_omap_objects);
stringstream tip;
tip << "Search the cluster log for 'Large omap object found' for more "
<< "details.";
if (!detail.empty()) {
ostringstream ss;
ss << num_pools << " cache pools at or near target size";
- auto& d = checks->add("CACHE_POOL_NEAR_FULL", HEALTH_WARN, ss.str());
+ auto& d = checks->add("CACHE_POOL_NEAR_FULL", HEALTH_WARN, ss.str(),
+ num_pools);
d.detail.swap(detail);
}
}
ostringstream ss;
ss << "too few PGs per OSD (" << per
<< " < min " << min_pg_per_osd << ")";
- checks->add("TOO_FEW_PGS", HEALTH_WARN, ss.str());
+ checks->add("TOO_FEW_PGS", HEALTH_WARN, ss.str(),
+ min_pg_per_osd - per);
}
}
ostringstream ss;
ss << "too many PGs per OSD (" << per
<< " > max " << max_pg_per_osd << ")";
- checks->add("TOO_MANY_PGS", HEALTH_WARN, ss.str());
+ checks->add("TOO_MANY_PGS", HEALTH_WARN, ss.str(),
+ per - max_pg_per_osd);
}
}
ostringstream ss;
ss << "OSD count " << osdmap.get_num_osds()
<< " < osd_pool_default_size " << osd_pool_default_size;
- checks->add("TOO_FEW_OSDS", HEALTH_WARN, ss.str());
+ checks->add("TOO_FEW_OSDS", HEALTH_WARN, ss.str(),
+ osd_pool_default_size - osdmap.get_num_osds());
}
// SLOW_PING_TIME
break;
}
max_detail--;
- ss << "Slow heartbeat ping on back interface from osd." << sback.from
+ ss << "Slow OSD heartbeats on back from osd." << sback.from
+ << " [" << osd_parentage(osdmap, sback.from) << "]"
<< (osdmap.is_down(sback.from) ? " (down)" : "")
<< " to osd." << sback.to
+ << " [" << osd_parentage(osdmap, sback.to) << "]"
<< (osdmap.is_down(sback.to) ? " (down)" : "")
<< " " << fixed_u_to_string(sback.pingtime, 3) << " msec"
<< (sback.improving ? " possibly improving" : "");
break;
}
max_detail--;
- ss << "Slow heartbeat ping on front interface from osd." << sfront.from
+ // Get crush parentage for each osd
+ ss << "Slow OSD heartbeats on front from osd." << sfront.from
+ << " [" << osd_parentage(osdmap, sfront.from) << "]"
<< (osdmap.is_down(sfront.from) ? " (down)" : "")
<< " to osd." << sfront.to
+ << " [" << osd_parentage(osdmap, sfront.to) << "]"
<< (osdmap.is_down(sfront.to) ? " (down)" : "")
<< " " << fixed_u_to_string(sfront.pingtime, 3) << " msec"
<< (sfront.improving ? " possibly improving" : "");
}
if (detail_back.size() != 0) {
ostringstream ss;
- ss << "Long heartbeat ping times on back interface seen, longest is "
- << fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << " msec";
- auto& d = checks->add("OSD_SLOW_PING_TIME_BACK", HEALTH_WARN, ss.str());
+ ss << "Slow OSD heartbeats on back (longest "
+ << fixed_u_to_string(back_sorted.rbegin()->pingtime, 3) << "ms)";
+ auto& d = checks->add("OSD_SLOW_PING_TIME_BACK", HEALTH_WARN, ss.str(),
+ back_sorted.size());
d.detail.swap(detail_back);
}
if (detail_front.size() != 0) {
ostringstream ss;
- ss << "Long heartbeat ping times on front interface seen, longest is "
- << fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << " msec";
- auto& d = checks->add("OSD_SLOW_PING_TIME_FRONT", HEALTH_WARN, ss.str());
+ ss << "Slow OSD heartbeats on front (longest "
+ << fixed_u_to_string(front_sorted.rbegin()->pingtime, 3) << "ms)";
+ auto& d = checks->add("OSD_SLOW_PING_TIME_FRONT", HEALTH_WARN, ss.str(),
+ front_sorted.size());
d.detail.swap(detail_front);
}
}
if (!pgp_detail.empty()) {
ostringstream ss;
ss << pgp_detail.size() << " pools have pg_num > pgp_num";
- auto& d = checks->add("SMALLER_PGP_NUM", HEALTH_WARN, ss.str());
+ auto& d = checks->add("SMALLER_PGP_NUM", HEALTH_WARN, ss.str(),
+ pgp_detail.size());
d.detail.swap(pgp_detail);
}
if (!many_detail.empty()) {
ostringstream ss;
ss << many_detail.size() << " pools have many more objects per pg than"
<< " average";
- auto& d = checks->add("MANY_OBJECTS_PER_PG", HEALTH_WARN, ss.str());
+ auto& d = checks->add("MANY_OBJECTS_PER_PG", HEALTH_WARN, ss.str(),
+ many_detail.size());
d.detail.swap(many_detail);
}
}
if (full_pools) {
ostringstream ss;
ss << full_pools << " pools full";
- auto& d = checks->add("POOL_FULL", HEALTH_ERR, ss.str());
+ auto& d = checks->add("POOL_FULL", HEALTH_ERR, ss.str(), full_pools);
d.detail.swap(full_detail);
}
if (nearfull_pools) {
ostringstream ss;
ss << nearfull_pools << " pools nearfull";
- auto& d = checks->add("POOL_NEAR_FULL", HEALTH_WARN, ss.str());
+ auto& d = checks->add("POOL_NEAR_FULL", HEALTH_WARN, ss.str(), nearfull_pools);
d.detail.swap(nearfull_detail);
}
}
ss << pg_sum.stats.sum.num_objects_misplaced
<< "/" << pg_sum.stats.sum.num_object_copies << " objects misplaced ("
<< b << "%)";
- checks->add("OBJECT_MISPLACED", HEALTH_WARN, ss.str());
+ checks->add("OBJECT_MISPLACED", HEALTH_WARN, ss.str(),
+ pg_sum.stats.sum.num_objects_misplaced);
}
// OBJECT_UNFOUND
ostringstream ss;
ss << pg_sum.stats.sum.num_objects_unfound
<< "/" << pg_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
- auto& d = checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str());
+ auto& d = checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str(),
+ pg_sum.stats.sum.num_objects_unfound);
for (auto& p : pg_stat) {
if (p.second.stats.sum.num_objects_unfound) {
// REQUEST_SLOW
// REQUEST_STUCK
// SLOW_OPS unifies them in mimic.
- if (osdmap.require_osd_release < CEPH_RELEASE_MIMIC &&
+ if (osdmap.require_osd_release < ceph_release_t::mimic &&
cct->_conf->mon_osd_warn_op_age > 0 &&
!osd_sum.op_queue_age_hist.h.empty() &&
osd_sum.op_queue_age_hist.upper_bound() / 1000.0 >
ostringstream ss;
ss << warn << " slow requests are blocked > "
<< cct->_conf->mon_osd_warn_op_age << " sec";
- auto& d = checks->add("REQUEST_SLOW", HEALTH_WARN, ss.str());
+ auto& d = checks->add("REQUEST_SLOW", HEALTH_WARN, ss.str(), warn);
d.detail.swap(warn_detail);
int left = max;
for (auto& p : warn_osd_by_max) {
ostringstream ss;
ss << error << " stuck requests are blocked > "
<< err_age << " sec";
- auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str());
+ auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str(), error);
d.detail.swap(error_detail);
int left = max;
for (auto& p : error_osd_by_max) {
}
for (auto& asum : os_alerts_sum) {
- string summary;
+ string summary = stringify(asum.second.first) + " OSD(s)";
if (asum.first == "BLUEFS_SPILLOVER") {
- summary = "BlueFS spillover detected";
+ summary += " experiencing BlueFS spillover";
} else if (asum.first == "BLUESTORE_NO_COMPRESSION") {
- summary = "BlueStore compression broken";
+ summary += " have broken BlueStore compression";
} else if (asum.first == "BLUESTORE_LEGACY_STATFS") {
- summary = "Legacy BlueStore stats reporting detected";
+ summary += " reporting legacy (not per-pool) BlueStore stats";
} else if (asum.first == "BLUESTORE_DISK_SIZE_MISMATCH") {
- summary = "BlueStore has dangerous mismatch between block device and free list sizes";
+ summary += " have dangerous mismatch between BlueStore block device and free list sizes";
+ } else if (asum.first == "BLUESTORE_NO_PER_POOL_OMAP") {
+ summary += " reporting legacy (not per-pool) BlueStore omap usage stats";
}
- summary += " on ";
- summary += stringify(asum.second.first);
- summary += " OSD(s)";
- auto& d = checks->add(asum.first, HEALTH_WARN, summary);
+ auto& d = checks->add(asum.first, HEALTH_WARN, summary, asum.second.first);
for (auto& s : asum.second.second) {
d.detail.push_back(s);
}
if (detail_total) {
ostringstream ss;
ss << detail_total << " pgs not scrubbed in time";
- auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str());
+ auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str(), detail_total);
if (!detail.empty()) {
d.detail.swap(detail);
if (deep_detail_total) {
ostringstream ss;
ss << deep_detail_total << " pgs not deep-scrubbed in time";
- auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str());
+ auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str(),
+ deep_detail_total);
if (!deep_detail.empty()) {
d.detail.swap(deep_detail);
}
if (!detail.empty()) {
ostringstream ss;
- ss << "application not enabled on " << detail.size() << " pool(s)";
- auto& d = checks->add("POOL_APP_NOT_ENABLED", HEALTH_WARN, ss.str());
+ ss << detail.size() << " pool(s) do not have an application enabled";
+ auto& d = checks->add("POOL_APP_NOT_ENABLED", HEALTH_WARN, ss.str(),
+ detail.size());
stringstream tip;
tip << "use 'ceph osd pool application enable <pool-name> "
<< "<app-name>', where <app-name> is 'cephfs', 'rbd', 'rgw', "
stringstream ss;
ss << "snap trim queue for " << snaptrimq_exceeded << " pg(s) >= " << snapthreshold << " (mon_osd_snap_trim_queue_warn_on)";
- auto& d = checks->add("PG_SLOW_SNAP_TRIMMING", HEALTH_WARN, ss.str());
+ auto& d = checks->add("PG_SLOW_SNAP_TRIMMING", HEALTH_WARN, ss.str(),
+ snaptrimq_exceeded);
detail.push_back("try decreasing \"osd snap trim sleep\" and/or increasing \"osd pg max concurrent snap trims\".");
d.detail.swap(detail);
}
}
}
+void PGMap::print_summary(ceph::Formatter *f, ostream *out) const
+{
+ if (f) {
+ f->open_array_section("pgs_by_pool_state");
+ for (auto& i: num_pg_by_pool_state) {
+ f->open_object_section("per_pool_pgs_by_state");
+ f->dump_int("pool_id", i.first);
+ f->open_array_section("pg_state_counts");
+ for (auto& j : i.second) {
+ f->open_object_section("pg_state_count");
+ f->dump_string("state_name", pg_state_string(j.first));
+ f->dump_int("count", j.second);
+ f->close_section();
+ }
+ f->close_section();
+ f->close_section();
+ }
+ f->close_section();
+ }
+ PGMapDigest::print_summary(f, out);
+}
+
int process_pg_map_command(
const string& orig_prefix,
const cmdmap_t& orig_cmdmap,
const PGMap& pg_map,
const OSDMap& osdmap,
- Formatter *f,
+ ceph::Formatter *f,
stringstream *ss,
bufferlist *odata)
{
string omap_stats_note =
"\n* NOTE: Omap statistics are gathered during deep scrub and "
- "may be inaccurate soon afterwards depending on utilisation. See "
+ "may be inaccurate soon afterwards depending on utilization. See "
"http://docs.ceph.com/docs/master/dev/placement-group/#omap-statistics "
"for further details.\n";
bool omap_stats_note_required = false;
} else if (prefix == "pg ls-by-pool") {
prefix = "pg ls";
string poolstr;
- cmd_getval(g_ceph_context, cmdmap, "poolstr", poolstr);
+ cmd_getval(cmdmap, "poolstr", poolstr);
int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str());
if (pool < 0) {
*ss << "pool " << poolstr << " does not exist";
string val;
vector<string> dumpcontents;
set<string> what;
- if (cmd_getval(g_ceph_context, cmdmap, "dumpcontents", dumpcontents)) {
+ if (cmd_getval(cmdmap, "dumpcontents", dumpcontents)) {
copy(dumpcontents.begin(), dumpcontents.end(),
inserter(what, what.end()));
}
int64_t pool = -1;
vector<string>states;
set<pg_t> pgs;
- cmd_getval(g_ceph_context, cmdmap, "pool", pool);
- cmd_getval(g_ceph_context, cmdmap, "osd", osd);
- cmd_getval(g_ceph_context, cmdmap, "states", states);
+ cmd_getval(cmdmap, "pool", pool);
+ cmd_getval(cmdmap, "osd", osd);
+ cmd_getval(cmdmap, "states", states);
if (pool >= 0 && !osdmap.have_pg_pool(pool)) {
*ss << "pool " << pool << " does not exist";
return -ENOENT;
if (prefix == "pg dump_stuck") {
vector<string> stuckop_vec;
- cmd_getval(g_ceph_context, cmdmap, "stuckops", stuckop_vec);
+ cmd_getval(cmdmap, "stuckops", stuckop_vec);
if (stuckop_vec.empty())
stuckop_vec.push_back("unclean");
int64_t threshold;
- cmd_getval(g_ceph_context, cmdmap, "threshold", threshold,
+ cmd_getval(cmdmap, "threshold", threshold,
g_conf().get_val<int64_t>("mon_pg_stuck_threshold"));
if (pg_map.dump_stuck_pg_stats(ds, f, (int)threshold, stuckop_vec) < 0) {
if (prefix == "pg debug") {
string debugop;
- cmd_getval(g_ceph_context, cmdmap, "debugop", debugop,
+ cmd_getval(cmdmap, "debugop", debugop,
string("unfound_objects_exist"));
if (debugop == "unfound_objects_exist") {
bool unfound_objects_exist = false;
mempool::osdmap::map<int32_t, uint32_t>* new_weights,
std::stringstream *ss,
std::string *out_str,
- Formatter *f)
+ ceph::Formatter *f)
{
if (oload <= 100) {
*ss << "You must give a percentage higher than 100. "