#include "Monitor.h"
#include "OSDMonitor.h"
#include "MonitorDBStore.h"
+#include "PGStatService.h"
#include "messages/MPGStats.h"
#include "messages/MPGStatsAck.h"
-#include "messages/MGetPoolStats.h"
-#include "messages/MGetPoolStatsReply.h"
-#include "messages/MStatfs.h"
-#include "messages/MStatfsReply.h"
#include "messages/MOSDPGCreate.h"
#include "messages/MMonCommand.h"
#include "messages/MOSDScrub.h"
update_logger();
- if (mon->is_leader())
+ if (mon->is_leader() &&
+ mon->osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS) {
mon->clog->info() << "pgmap " << pg_map;
+ }
}
void PGMonitor::update_logger()
{
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ return;
+ }
dout(10) << "update_logger" << dendl;
mon->cluster_logger->set(l_cluster_osd_bytes, pg_map.osd_sum.kb * 1024ull);
void PGMonitor::tick()
{
if (!is_active()) return;
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ return;
+ }
handle_osd_timeouts();
void PGMonitor::update_from_paxos(bool *need_bootstrap)
{
+ if (did_delete)
+ return;
+
+ if (get_value("deleted")) {
+ did_delete = true;
+ dout(10) << __func__ << " deleted, clearing in-memory PGMap" << dendl;
+ pg_map = PGMap();
+ pending_inc = PGMap::Incremental();
+ pgservice.reset();
+ last_osd_report.clear();
+ return;
+ }
+
version_t version = get_last_committed();
if (version == pg_map.version)
return;
void PGMonitor::upgrade_format()
{
unsigned current = 1;
- assert(format_version <= current);
- if (format_version == current)
- return;
-
- dout(1) << __func__ << " to " << current << dendl;
-
- // upgrade by dirtying it all
- pg_map.dirty_all(pending_inc);
-
- format_version = current;
- propose_pending();
+ assert(format_version == current);
}
void PGMonitor::post_paxos_update()
{
+ if (did_delete)
+ return;
dout(10) << __func__ << dendl;
OSDMap& osdmap = mon->osdmon()->osdmap;
if (mon->monmap->get_required_features().contains_all(
{
if (!mon->is_leader())
return;
+ if (did_delete)
+ return;
utime_t now(ceph_clock_now());
utime_t timeo(g_conf->mon_osd_report_timeout, 0);
void PGMonitor::create_pending()
{
+ if (did_delete)
+ return;
+ do_delete = false;
pending_inc = PGMap::Incremental();
pending_inc.version = pg_map.version + 1;
if (pg_map.version == 0) {
}
pool_stat_t pg_sum_old = pg_map.pg_sum;
- ceph::unordered_map<uint64_t, pool_stat_t> pg_pool_sum_old;
+ mempool::pgmap::unordered_map<uint64_t, pool_stat_t> pg_pool_sum_old;
// pgs
set<int64_t> deleted_pools;
void PGMonitor::encode_pending(MonitorDBStore::TransactionRef t)
{
+ if (did_delete)
+ return;
+
+ string prefix = pgmap_meta_prefix;
+ if (do_delete) {
+ dout(1) << __func__ << " clearing pgmap data at v" << pending_inc.version
+ << dendl;
+ do_delete = false;
+ for (auto key : { "version", "stamp", "last_osdmap_epoch",
+ "last_pg_scan", "full_ratio", "nearfull_ratio" }) {
+ t->erase(prefix, key);
+ }
+ for (auto& p : pg_map.pg_stat) {
+ t->erase(prefix, stringify(p.first));
+ }
+ for (auto& p : pg_map.osd_stat) {
+ t->erase(prefix, stringify(p.first));
+ }
+ put_last_committed(t, pending_inc.version);
+ put_value(t, "deleted", 1);
+ return;
+ }
+
+ assert(mon->osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS ||
+ pending_inc.version == 1 /* rebuild-mondb.yaml case */);
+
version_t version = pending_inc.version;
dout(10) << __func__ << " v " << version << dendl;
assert(get_last_committed() + 1 == version);
uint64_t features = mon->get_quorum_con_features();
- string prefix = pgmap_meta_prefix;
-
t->put(prefix, "version", pending_inc.version);
{
bufferlist bl;
bool PGMonitor::preprocess_query(MonOpRequestRef op)
{
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ return false;
+ }
+
op->mark_pgmon_event(__func__);
PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
dout(10) << "preprocess_query " << *m << " from " << m->get_orig_source_inst() << dendl;
switch (m->get_type()) {
- case CEPH_MSG_STATFS:
- handle_statfs(op);
- return true;
-
- case MSG_GETPOOLSTATS:
- return preprocess_getpoolstats(op);
-
case MSG_PGSTATS:
return preprocess_pg_stats(op);
bool PGMonitor::prepare_update(MonOpRequestRef op)
{
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ return false;
+ }
+
op->mark_pgmon_event(__func__);
PaxosServiceMessage *m = static_cast<PaxosServiceMessage*>(op->get_req());
dout(10) << "prepare_update " << *m << " from " << m->get_orig_source_inst() << dendl;
}
}
-void PGMonitor::handle_statfs(MonOpRequestRef op)
-{
- op->mark_pgmon_event(__func__);
- MStatfs *statfs = static_cast<MStatfs*>(op->get_req());
- // check caps
- MonSession *session = statfs->get_session();
- if (!session)
- return;
-
- if (!session->is_capable("pg", MON_CAP_R)) {
- dout(0) << "MStatfs received from entity with insufficient privileges "
- << session->caps << dendl;
- return;
- }
-
- if (statfs->fsid != mon->monmap->fsid) {
- dout(0) << "handle_statfs on fsid " << statfs->fsid
- << " != " << mon->monmap->fsid << dendl;
- return;
- }
-
-
- dout(10) << "handle_statfs " << *statfs
- << " from " << statfs->get_orig_source() << dendl;
-
- // fill out stfs
- MStatfsReply *reply = new MStatfsReply(mon->monmap->fsid, statfs->get_tid(),
- get_last_committed());
-
- // these are in KB.
- reply->h.st.kb = pg_map.osd_sum.kb;
- reply->h.st.kb_used = pg_map.osd_sum.kb_used;
- reply->h.st.kb_avail = pg_map.osd_sum.kb_avail;
- reply->h.st.num_objects = pg_map.pg_sum.stats.sum.num_objects;
-
- // reply
- mon->send_reply(op, reply);
-}
-
-bool PGMonitor::preprocess_getpoolstats(MonOpRequestRef op)
-{
- op->mark_pgmon_event(__func__);
- MGetPoolStats *m = static_cast<MGetPoolStats*>(op->get_req());
- MGetPoolStatsReply *reply;
-
- MonSession *session = m->get_session();
- if (!session)
- goto out;
- if (!session->is_capable("pg", MON_CAP_R)) {
- dout(0) << "MGetPoolStats received from entity with insufficient caps "
- << session->caps << dendl;
- goto out;
- }
-
- if (m->fsid != mon->monmap->fsid) {
- dout(0) << "preprocess_getpoolstats on fsid " << m->fsid << " != " << mon->monmap->fsid << dendl;
- goto out;
- }
-
- reply = new MGetPoolStatsReply(m->fsid, m->get_tid(), get_last_committed());
-
- for (list<string>::iterator p = m->pools.begin();
- p != m->pools.end();
- ++p) {
- int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(p->c_str());
- if (poolid < 0)
- continue;
- if (pg_map.pg_pool_sum.count(poolid) == 0)
- continue;
- reply->pool_stats[*p] = pg_map.pg_pool_sum[poolid];
- }
-
- mon->send_reply(op, reply);
-
-out:
- return true;
-}
-
-
bool PGMonitor::preprocess_pg_stats(MonOpRequestRef op)
{
op->mark_pgmon_event(__func__);
// osd stat
if (mon->osdmon()->osdmap.is_in(from)) {
- pending_inc.update_stat(from, stats->epoch, stats->osd_stat);
+ pending_inc.update_stat(from, stats->epoch, std::move(stats->osd_stat));
} else {
pending_inc.update_stat(from, stats->epoch, osd_stat_t());
}
if (mon->is_peon())
return; // whatever.
+ if (did_delete)
+ return;
+
if (pg_map.last_osdmap_epoch >= epoch) {
dout(10) << __func__ << " already seen " << pg_map.last_osdmap_epoch
<< " >= " << epoch << dendl;
return;
}
+ const OSDMap& osdmap = mon->osdmon()->osdmap;
+ if (!did_delete && osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ // delete all my data
+ dout(1) << __func__ << " will clear pg_map data" << dendl;
+ do_delete = true;
+ propose_pending();
+ return;
+ }
+
// osds that went up or down
set<int> need_check_down_pg_osds;
// apply latest map(s)
- const OSDMap& osdmap = mon->osdmon()->osdmap;
epoch = std::max(epoch, osdmap.get_epoch());
for (epoch_t e = pg_map.last_osdmap_epoch+1;
e <= epoch;
return last + 1;
}
-void PGMonitor::dump_info(Formatter *f) const
-{
- f->open_object_section("pgmap");
- pg_map.dump(f);
- f->close_section();
-
- f->dump_unsigned("pgmap_first_committed", get_first_committed());
- f->dump_unsigned("pgmap_last_committed", get_last_committed());
-}
-
bool PGMonitor::preprocess_command(MonOpRequestRef op)
{
op->mark_pgmon_event(__func__);
r = -ENOENT;
goto reply;
}
- if (pg_map.pg_stat[pgid].acting_primary == -1) {
+ int osd = pg_map.pg_stat[pgid].acting_primary;
+ if (osd == -1) {
ss << "pg " << pgid << " has no primary osd";
r = -EAGAIN;
goto reply;
}
- int osd = pg_map.pg_stat[pgid].acting_primary;
if (!mon->osdmon()->osdmap.is_up(osd)) {
ss << "pg " << pgid << " primary osd." << osd << " not up";
r = -EAGAIN;
goto update;
} else if (prefix == "pg set_full_ratio" ||
prefix == "pg set_nearfull_ratio") {
- if (mon->osdmon()->osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) {
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
ss << "please use the new luminous interfaces"
<< " ('osd set-full-ratio' and 'osd set-nearfull-ratio')";
r = -EPERM;
return true;
}
-// Only called with a single bit set in "what"
-static void note_stuck_detail(int what,
- ceph::unordered_map<pg_t,pg_stat_t>& stuck_pgs,
- list<pair<health_status_t,string> > *detail)
-{
- for (ceph::unordered_map<pg_t,pg_stat_t>::iterator p = stuck_pgs.begin();
- p != stuck_pgs.end();
- ++p) {
- ostringstream ss;
- utime_t since;
- const char *whatname = 0;
- switch (what) {
- case PGMap::STUCK_INACTIVE:
- since = p->second.last_active;
- whatname = "inactive";
- break;
- case PGMap::STUCK_UNCLEAN:
- since = p->second.last_clean;
- whatname = "unclean";
- break;
- case PGMap::STUCK_DEGRADED:
- since = p->second.last_undegraded;
- whatname = "degraded";
- break;
- case PGMap::STUCK_UNDERSIZED:
- since = p->second.last_fullsized;
- whatname = "undersized";
- break;
- case PGMap::STUCK_STALE:
- since = p->second.last_unstale;
- whatname = "stale";
- break;
- default:
- ceph_abort();
- }
- ss << "pg " << p->first << " is stuck " << whatname;
- if (since == utime_t()) {
- ss << " since forever";
- } else {
- utime_t dur = ceph_clock_now() - since;
- ss << " for " << dur;
- }
- ss << ", current state " << pg_state_string(p->second.state)
- << ", last acting " << p->second.acting;
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
-}
-
-int PGMonitor::_warn_slow_request_histogram(const pow2_hist_t& h, string suffix,
- list<pair<health_status_t,string> >& summary,
- list<pair<health_status_t,string> > *detail) const
-{
- if (h.h.empty())
- return 0;
-
- unsigned sum = 0;
- for (unsigned i = h.h.size() - 1; i > 0; --i) {
- float ub = (float)(1 << i) / 1000.0;
- if (ub < g_conf->mon_osd_max_op_age)
- break;
- ostringstream ss;
- if (h.h[i]) {
- ss << h.h[i] << " ops are blocked > " << ub << " sec" << suffix;
- if (detail)
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- sum += h.h[i];
- }
- }
- return sum;
-}
-
-namespace {
- enum class scrubbed_or_deepscrubbed_t { SCRUBBED, DEEPSCRUBBED };
-
- void print_unscrubbed_detailed(const std::pair<const pg_t,pg_stat_t> &pg_entry,
- list<pair<health_status_t,string> > *detail,
- scrubbed_or_deepscrubbed_t how_scrubbed) {
-
- std::stringstream ss;
- const auto& pg_stat(pg_entry.second);
-
- ss << "pg " << pg_entry.first << " is not ";
- if (how_scrubbed == scrubbed_or_deepscrubbed_t::SCRUBBED) {
- ss << "scrubbed, last_scrub_stamp "
- << pg_stat.last_scrub_stamp;
- } else if (how_scrubbed == scrubbed_or_deepscrubbed_t::DEEPSCRUBBED) {
- ss << "deep-scrubbed, last_deep_scrub_stamp "
- << pg_stat.last_deep_scrub_stamp;
- }
-
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
-
-
- using pg_stat_map_t = const ceph::unordered_map<pg_t,pg_stat_t>;
-
- void print_unscrubbed_pgs(pg_stat_map_t& pg_stats,
- list<pair<health_status_t,string> > &summary,
- list<pair<health_status_t,string> > *detail,
- const CephContext* cct) {
- if (cct->_conf->mon_warn_not_scrubbed == 0 &&
- cct->_conf->mon_warn_not_deep_scrubbed == 0)
- return;
-
- int pgs_count = 0;
- const utime_t now = ceph_clock_now();
- for (const auto& pg_entry : pg_stats) {
- const auto& pg_stat(pg_entry.second);
- const utime_t time_since_ls = now - pg_stat.last_scrub_stamp;
- const utime_t time_since_lds = now - pg_stat.last_deep_scrub_stamp;
-
- const int mon_warn_not_scrubbed =
- cct->_conf->mon_warn_not_scrubbed + cct->_conf->mon_scrub_interval;
-
- const int mon_warn_not_deep_scrubbed =
- cct->_conf->mon_warn_not_deep_scrubbed + cct->_conf->osd_deep_scrub_interval;
-
- bool not_scrubbed = (time_since_ls >= mon_warn_not_scrubbed &&
- cct->_conf->mon_warn_not_scrubbed != 0);
-
- bool not_deep_scrubbed = (time_since_lds >= mon_warn_not_deep_scrubbed &&
- cct->_conf->mon_warn_not_deep_scrubbed != 0);
-
- if (detail != nullptr) {
- if (not_scrubbed) {
- print_unscrubbed_detailed(pg_entry,
- detail,
- scrubbed_or_deepscrubbed_t::SCRUBBED);
- }
- if (not_deep_scrubbed) {
- print_unscrubbed_detailed(pg_entry,
- detail,
- scrubbed_or_deepscrubbed_t::DEEPSCRUBBED);
- }
- }
- if (not_scrubbed || not_deep_scrubbed) {
- ++pgs_count;
- }
- }
-
- if (pgs_count > 0) {
- std::stringstream ss;
- ss << pgs_count << " unscrubbed pgs";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- }
-
- }
-}
-
void PGMonitor::get_health(list<pair<health_status_t,string> >& summary,
list<pair<health_status_t,string> > *detail,
CephContext *cct) const
{
- map<string,int> note;
- ceph::unordered_map<int,int>::const_iterator p = pg_map.num_pg_by_state.begin();
- ceph::unordered_map<int,int>::const_iterator p_end = pg_map.num_pg_by_state.end();
- for (; p != p_end; ++p) {
- if (p->first & PG_STATE_STALE)
- note["stale"] += p->second;
- if (p->first & PG_STATE_DOWN)
- note["down"] += p->second;
- if (p->first & PG_STATE_UNDERSIZED)
- note["undersized"] += p->second;
- if (p->first & PG_STATE_DEGRADED)
- note["degraded"] += p->second;
- if (p->first & PG_STATE_INCONSISTENT)
- note["inconsistent"] += p->second;
- if (p->first & PG_STATE_PEERING)
- note["peering"] += p->second;
- if (p->first & PG_STATE_REPAIR)
- note["repair"] += p->second;
- if (p->first & PG_STATE_RECOVERING)
- note["recovering"] += p->second;
- if (p->first & PG_STATE_RECOVERY_WAIT)
- note["recovery_wait"] += p->second;
- if (p->first & PG_STATE_INCOMPLETE)
- note["incomplete"] += p->second;
- if (p->first & PG_STATE_BACKFILL_WAIT)
- note["backfill_wait"] += p->second;
- if (p->first & PG_STATE_BACKFILL)
- note["backfilling"] += p->second;
- if (p->first & PG_STATE_BACKFILL_TOOFULL)
- note["backfill_toofull"] += p->second;
- if (p->first & PG_STATE_RECOVERY_TOOFULL)
- note["recovery_toofull"] += p->second;
- }
-
- ceph::unordered_map<pg_t, pg_stat_t> stuck_pgs;
- utime_t now(ceph_clock_now());
- utime_t cutoff = now - utime_t(g_conf->mon_pg_stuck_threshold, 0);
- uint64_t num_inactive_pgs = 0;
-
- if (detail) {
-
- // we need to collect details of stuck pgs, first do a quick check
- // whether this will yield any results
- if (pg_map.get_stuck_counts(cutoff, note)) {
-
- // there are stuck pgs. gather details for specified statuses
- // only if we know that there are pgs stuck in that status
-
- if (note.find("stuck inactive") != note.end()) {
- pg_map.get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs);
- note["stuck inactive"] = stuck_pgs.size();
- num_inactive_pgs += stuck_pgs.size();
- note_stuck_detail(PGMap::STUCK_INACTIVE, stuck_pgs, detail);
- stuck_pgs.clear();
- }
-
- if (note.find("stuck unclean") != note.end()) {
- pg_map.get_stuck_stats(PGMap::STUCK_UNCLEAN, cutoff, stuck_pgs);
- note["stuck unclean"] = stuck_pgs.size();
- note_stuck_detail(PGMap::STUCK_UNCLEAN, stuck_pgs, detail);
- stuck_pgs.clear();
- }
-
- if (note.find("stuck undersized") != note.end()) {
- pg_map.get_stuck_stats(PGMap::STUCK_UNDERSIZED, cutoff, stuck_pgs);
- note["stuck undersized"] = stuck_pgs.size();
- note_stuck_detail(PGMap::STUCK_UNDERSIZED, stuck_pgs, detail);
- stuck_pgs.clear();
- }
-
- if (note.find("stuck degraded") != note.end()) {
- pg_map.get_stuck_stats(PGMap::STUCK_DEGRADED, cutoff, stuck_pgs);
- note["stuck degraded"] = stuck_pgs.size();
- note_stuck_detail(PGMap::STUCK_DEGRADED, stuck_pgs, detail);
- stuck_pgs.clear();
- }
-
- if (note.find("stuck stale") != note.end()) {
- pg_map.get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs);
- note["stuck stale"] = stuck_pgs.size();
- num_inactive_pgs += stuck_pgs.size();
- note_stuck_detail(PGMap::STUCK_STALE, stuck_pgs, detail);
- }
- }
- } else {
- pg_map.get_stuck_counts(cutoff, note);
- map<string,int>::const_iterator p = note.find("stuck inactive");
- if (p != note.end())
- num_inactive_pgs += p->second;
- p = note.find("stuck stale");
- if (p != note.end())
- num_inactive_pgs += p->second;
- }
-
- if (g_conf->mon_pg_min_inactive > 0 && num_inactive_pgs >= g_conf->mon_pg_min_inactive) {
- ostringstream ss;
- ss << num_inactive_pgs << " pgs are stuck inactive for more than " << g_conf->mon_pg_stuck_threshold << " seconds";
- summary.push_back(make_pair(HEALTH_ERR, ss.str()));
- }
-
- if (!note.empty()) {
- for (map<string,int>::iterator p = note.begin(); p != note.end(); ++p) {
- ostringstream ss;
- ss << p->second << " pgs " << p->first;
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- if (detail) {
- for (ceph::unordered_map<pg_t,pg_stat_t>::const_iterator p = pg_map.pg_stat.begin();
- p != pg_map.pg_stat.end();
- ++p) {
- if ((p->second.state & (PG_STATE_STALE |
- PG_STATE_DOWN |
- PG_STATE_UNDERSIZED |
- PG_STATE_DEGRADED |
- PG_STATE_INCONSISTENT |
- PG_STATE_PEERING |
- PG_STATE_REPAIR |
- PG_STATE_RECOVERING |
- PG_STATE_RECOVERY_WAIT |
- PG_STATE_RECOVERY_TOOFULL |
- PG_STATE_INCOMPLETE |
- PG_STATE_BACKFILL_WAIT |
- PG_STATE_BACKFILL |
- PG_STATE_BACKFILL_TOOFULL)) &&
- stuck_pgs.count(p->first) == 0) {
- ostringstream ss;
- ss << "pg " << p->first << " is " << pg_state_string(p->second.state);
- ss << ", acting " << p->second.acting;
- if (p->second.stats.sum.num_objects_unfound)
- ss << ", " << p->second.stats.sum.num_objects_unfound << " unfound";
- if (p->second.state & PG_STATE_INCOMPLETE) {
- const pg_pool_t *pi = mon->osdmon()->osdmap.get_pg_pool(p->first.pool());
- if (pi && pi->min_size > 1) {
- ss << " (reducing pool " << mon->osdmon()->osdmap.get_pool_name(p->first.pool())
- << " min_size from " << (int)pi->min_size << " may help; search ceph.com/docs for 'incomplete')";
- }
- }
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
- }
- }
-
- // slow requests
- if (g_conf->mon_osd_max_op_age > 0 &&
- pg_map.osd_sum.op_queue_age_hist.upper_bound() > g_conf->mon_osd_max_op_age) {
- unsigned sum = _warn_slow_request_histogram(pg_map.osd_sum.op_queue_age_hist, "", summary, NULL);
- if (sum > 0) {
- ostringstream ss;
- ss << sum << " requests are blocked > " << g_conf->mon_osd_max_op_age << " sec";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
-
- if (detail) {
- unsigned num_slow_osds = 0;
- // do per-osd warnings
- for (ceph::unordered_map<int32_t,osd_stat_t>::const_iterator p = pg_map.osd_stat.begin();
- p != pg_map.osd_stat.end();
- ++p) {
- if (_warn_slow_request_histogram(p->second.op_queue_age_hist,
- string(" on osd.") + stringify(p->first),
- summary, detail))
- ++num_slow_osds;
- }
- ostringstream ss2;
- ss2 << num_slow_osds << " osds have slow requests";
- summary.push_back(make_pair(HEALTH_WARN, ss2.str()));
- detail->push_back(make_pair(HEALTH_WARN, ss2.str()));
- }
- }
- }
-
- if (g_conf->mon_warn_osd_usage_min_max_delta) {
- float max_osd_usage = 0.0, min_osd_usage = 1.0;
- for (auto p = pg_map.osd_stat.begin(); p != pg_map.osd_stat.end(); ++p) {
- // kb should never be 0, but avoid divide by zero in case of corruption
- if (p->second.kb <= 0)
- continue;
- float usage = ((float)p->second.kb_used) / ((float)p->second.kb);
- if (usage > max_osd_usage)
- max_osd_usage = usage;
- if (usage < min_osd_usage)
- min_osd_usage = usage;
- }
- float diff = max_osd_usage - min_osd_usage;
- if (diff > g_conf->mon_warn_osd_usage_min_max_delta) {
- ostringstream ss;
- ss << "difference between min (" << roundf(min_osd_usage*1000.0)/100.0
- << "%) and max (" << roundf(max_osd_usage*1000.0)/100.0
- << "%) osd usage " << roundf(diff*1000.0)/100.0 << "% > "
- << roundf(g_conf->mon_warn_osd_usage_min_max_delta*1000.0)/100.0
- << " (mon_warn_osd_usage_min_max_delta)";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- if (detail)
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
-
- // recovery
- list<string> sl;
- pg_map.overall_recovery_summary(NULL, &sl);
- for (list<string>::iterator p = sl.begin(); p != sl.end(); ++p) {
- summary.push_back(make_pair(HEALTH_WARN, "recovery " + *p));
- if (detail)
- detail->push_back(make_pair(HEALTH_WARN, "recovery " + *p));
- }
-
- // full/nearfull
- if (!mon->osdmon()->osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) {
+ // legacy pre-luminous full/nearfull
+ if (mon->osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS) {
check_full_osd_health(summary, detail, pg_map.full_osds, "full",
HEALTH_ERR);
check_full_osd_health(summary, detail, pg_map.nearfull_osds, "near full",
HEALTH_WARN);
+ pg_map.get_health(cct, mon->osdmon()->osdmap, summary, detail);
}
-
- // near-target max pools
- auto& pools = mon->osdmon()->osdmap.get_pools();
- for (auto p = pools.begin();
- p != pools.end(); ++p) {
- if ((!p->second.target_max_objects && !p->second.target_max_bytes) ||
- !pg_map.pg_pool_sum.count(p->first))
- continue;
- bool nearfull = false;
- const string& name = mon->osdmon()->osdmap.get_pool_name(p->first);
- const pool_stat_t& st = pg_map.get_pg_pool_sum_stat(p->first);
- uint64_t ratio = p->second.cache_target_full_ratio_micro +
- ((1000000 - p->second.cache_target_full_ratio_micro) *
- g_conf->mon_cache_target_full_warn_ratio);
- if (p->second.target_max_objects && (uint64_t)(st.stats.sum.num_objects - st.stats.sum.num_objects_hit_set_archive) >
- p->second.target_max_objects * (ratio / 1000000.0)) {
- nearfull = true;
- if (detail) {
- ostringstream ss;
- ss << "cache pool '" << name << "' with "
- << si_t(st.stats.sum.num_objects)
- << " objects at/near target max "
- << si_t(p->second.target_max_objects) << " objects";
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
- if (p->second.target_max_bytes && (uint64_t)(st.stats.sum.num_bytes - st.stats.sum.num_bytes_hit_set_archive) >
- p->second.target_max_bytes * (ratio / 1000000.0)) {
- nearfull = true;
- if (detail) {
- ostringstream ss;
- ss << "cache pool '" << name
- << "' with " << si_t(st.stats.sum.num_bytes)
- << "B at/near target max "
- << si_t(p->second.target_max_bytes) << "B";
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
- if (nearfull) {
- ostringstream ss;
- ss << "'" << name << "' at/near target max";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
-
- // scrub
- if (pg_map.pg_sum.stats.sum.num_scrub_errors) {
- ostringstream ss;
- ss << pg_map.pg_sum.stats.sum.num_scrub_errors << " scrub errors";
- summary.push_back(make_pair(HEALTH_ERR, ss.str()));
- if (detail) {
- detail->push_back(make_pair(HEALTH_ERR, ss.str()));
- }
- }
-
- // pg skew
- int num_in = mon->osdmon()->osdmap.get_num_in_osds();
- int sum_pg_up = MAX(pg_map.pg_sum.up, static_cast<int32_t>(pg_map.pg_stat.size()));
- if (num_in && g_conf->mon_pg_warn_min_per_osd > 0) {
- int per = sum_pg_up / num_in;
- if (per < g_conf->mon_pg_warn_min_per_osd && per) {
- ostringstream ss;
- ss << "too few PGs per OSD (" << per << " < min " << g_conf->mon_pg_warn_min_per_osd << ")";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- if (detail)
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
- if (num_in && g_conf->mon_pg_warn_max_per_osd > 0) {
- int per = sum_pg_up / num_in;
- if (per > g_conf->mon_pg_warn_max_per_osd) {
- ostringstream ss;
- ss << "too many PGs per OSD (" << per << " > max " << g_conf->mon_pg_warn_max_per_osd << ")";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- if (detail)
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
- if (!pg_map.pg_stat.empty()) {
- for (ceph::unordered_map<int,pool_stat_t>::const_iterator p = pg_map.pg_pool_sum.begin();
- p != pg_map.pg_pool_sum.end();
- ++p) {
- const pg_pool_t *pi = mon->osdmon()->osdmap.get_pg_pool(p->first);
- if (!pi)
- continue; // in case osdmap changes haven't propagated to PGMap yet
- const string& name = mon->osdmon()->osdmap.get_pool_name(p->first);
- if (pi->get_pg_num() > pi->get_pgp_num() &&
- !(name.find(".DELETED") != string::npos &&
- g_conf->mon_fake_pool_delete)) {
- ostringstream ss;
- ss << "pool " << name << " pg_num "
- << pi->get_pg_num() << " > pgp_num " << pi->get_pgp_num();
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- if (detail)
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- int average_objects_per_pg = pg_map.pg_sum.stats.sum.num_objects / pg_map.pg_stat.size();
- if (average_objects_per_pg > 0 &&
- pg_map.pg_sum.stats.sum.num_objects >= g_conf->mon_pg_warn_min_objects &&
- p->second.stats.sum.num_objects >= g_conf->mon_pg_warn_min_pool_objects) {
- int objects_per_pg = p->second.stats.sum.num_objects / pi->get_pg_num();
- float ratio = (float)objects_per_pg / (float)average_objects_per_pg;
- if (g_conf->mon_pg_warn_max_object_skew > 0 &&
- ratio > g_conf->mon_pg_warn_max_object_skew) {
- ostringstream ss;
- ss << "pool " << name << " has many more objects per pg than average (too few pgs?)";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
- if (detail) {
- ostringstream ss;
- ss << "pool " << name << " objects per pg ("
- << objects_per_pg << ") is more than " << ratio << " times cluster average ("
- << average_objects_per_pg << ")";
- detail->push_back(make_pair(HEALTH_WARN, ss.str()));
- }
- }
- }
- }
- }
-
- print_unscrubbed_pgs(pg_map.pg_stat, summary, detail, cct);
-
}
void PGMonitor::check_full_osd_health(list<pair<health_status_t,string> >& summary,
list<pair<health_status_t,string> > *detail,
- const set<int>& s, const char *desc,
+ const mempool::pgmap::set<int>& s, const char *desc,
health_status_t sev) const
{
if (!s.empty()) {
void PGMonitor::check_subs()
{
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ return;
+ }
+
dout(10) << __func__ << dendl;
const string type = "osd_pg_creates";
}
return true;
}
+
+class PGMonStatService : public MonPGStatService, public PGMapStatService {
+ PGMonitor *pgmon;
+public:
+ PGMonStatService(const PGMap& o, PGMonitor *pgm)
+ : MonPGStatService(), PGMapStatService(o), pgmon(pgm) {}
+
+
+ bool is_readable() const override { return pgmon->is_readable(); }
+
+ unsigned maybe_add_creating_pgs(epoch_t scan_epoch,
+ const mempool::osdmap::map<int64_t,pg_pool_t>& pools,
+ creating_pgs_t *pending_creates) const override
+ {
+ if (pgmap.last_pg_scan < scan_epoch) {
+ return 0;
+ }
+ unsigned added = 0;
+ for (auto& pgid : pgmap.creating_pgs) {
+ if (!pools.count(pgid.pool())) {
+ continue;
+ }
+ auto st = pgmap.pg_stat.find(pgid);
+ assert(st != pgmap.pg_stat.end());
+ auto created = make_pair(st->second.created,
+ st->second.last_scrub_stamp);
+ // no need to add the pg, if it already exists in creating_pgs
+ if (pending_creates->pgs.emplace(pgid, created).second) {
+ added++;
+ }
+ }
+ return added;
+ }
+ void maybe_trim_creating_pgs(creating_pgs_t *creates) const override {
+ auto p = creates->pgs.begin();
+ while (p != creates->pgs.end()) {
+ auto q = pgmap.pg_stat.find(p->first);
+ if (q != pgmap.pg_stat.end() &&
+ !(q->second.state & PG_STATE_CREATING)) {
+ p = creates->pgs.erase(p);
+ creates->created_pools.insert(q->first.pool());
+ } else {
+ ++p;
+ }
+ }
+ }
+ void dump_info(Formatter *f) const override {
+ f->dump_object("pgmap", pgmap);
+ f->dump_unsigned("pgmap_first_committed", pgmon->get_first_committed());
+ f->dump_unsigned("pgmap_last_committed", pgmon->get_last_committed());
+ }
+ int process_pg_command(const string& prefix,
+ const map<string,cmd_vartype>& cmdmap,
+ const OSDMap& osdmap,
+ Formatter *f,
+ stringstream *ss,
+ bufferlist *odata) const override {
+ return process_pg_map_command(prefix, cmdmap, pgmap, osdmap, f, ss, odata);
+ }
+
+ int reweight_by_utilization(const OSDMap &osd_map,
+ int oload,
+ double max_changef,
+ int max_osds,
+ bool by_pg, const set<int64_t> *pools,
+ bool no_increasing,
+ mempool::osdmap::map<int32_t, uint32_t>* new_weights,
+ std::stringstream *ss,
+ std::string *out_str,
+ Formatter *f) const override {
+ return reweight::by_utilization(osd_map, pgmap, oload, max_changef,
+ max_osds, by_pg, pools, no_increasing,
+ new_weights, ss, out_str, f);
+ }
+};
+
+MonPGStatService *PGMonitor::get_pg_stat_service()
+{
+ if (!pgservice) {
+ pgservice.reset(new PGMonStatService(pg_map, this));
+ }
+ return pgservice.get();
+}
+
+PGMonitor::PGMonitor(Monitor *mn, Paxos *p, const string& service_name)
+ : PaxosService(mn, p, service_name),
+ pgmap_meta_prefix("pgmap_meta"),
+ pgmap_pg_prefix("pgmap_pg"),
+ pgmap_osd_prefix("pgmap_osd")
+{}
+
+PGMonitor::~PGMonitor() = default;