]> git.proxmox.com Git - ceph.git/blame - ceph/src/mgr/DaemonHealthMetricCollector.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / mgr / DaemonHealthMetricCollector.cc
CommitLineData
f67539c2 1#include <fmt/format.h>
11fdf7f2
TL
2
3#include "include/health.h"
4#include "include/types.h"
5#include "DaemonHealthMetricCollector.h"
6
11fdf7f2
TL
7namespace {
8
20effc67
TL
9using std::unique_ptr;
10using std::vector;
11using std::ostringstream;
12
11fdf7f2
TL
13class SlowOps final : public DaemonHealthMetricCollector {
14 bool _is_relevant(daemon_metric type) const override {
15 return type == daemon_metric::SLOW_OPS;
16 }
17 health_check_t& _get_check(health_check_map_t& cm) const override {
9f95a23c 18 return cm.get_or_add("SLOW_OPS", HEALTH_WARN, "", 1);
11fdf7f2
TL
19 }
20 bool _update(const DaemonKey& daemon,
21 const DaemonHealthMetric& metric) override {
22 auto num_slow = metric.get_n1();
23 auto blocked_time = metric.get_n2();
24 value.n1 += num_slow;
25 value.n2 = std::max(value.n2, blocked_time);
26 if (num_slow || blocked_time) {
27 daemons.push_back(daemon);
28 return true;
29 } else {
30 return false;
31 }
32 }
33 void _summarize(health_check_t& check) const override {
34 if (daemons.empty()) {
35 return;
36 }
f67539c2
TL
37 // Note this message format is used in mgr/prometheus, so any change in format
38 // requires a corresponding change in the mgr/prometheus module.
11fdf7f2
TL
39 ostringstream ss;
40 if (daemons.size() > 1) {
41 if (daemons.size() > 10) {
42 ss << "daemons " << vector<DaemonKey>(daemons.begin(), daemons.begin()+10)
43 << "..." << " have slow ops.";
44 } else {
45 ss << "daemons " << daemons << " have slow ops.";
46 }
47 } else {
48 ss << daemons.front() << " has slow ops";
49 }
f67539c2
TL
50 check.summary =
51 fmt::format("{} slow ops, oldest one blocked for {} sec, {}",
52 value.n1, value.n2, ss.str());
11fdf7f2
TL
53 // No detail
54 }
55 vector<DaemonKey> daemons;
56};
57
58
59class PendingPGs final : public DaemonHealthMetricCollector {
60 bool _is_relevant(daemon_metric type) const override {
61 return type == daemon_metric::PENDING_CREATING_PGS;
62 }
63 health_check_t& _get_check(health_check_map_t& cm) const override {
9f95a23c 64 return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, "", 1);
11fdf7f2
TL
65 }
66 bool _update(const DaemonKey& osd,
67 const DaemonHealthMetric& metric) override {
68 value.n += metric.get_n();
69 if (metric.get_n()) {
70 osds.push_back(osd);
71 return true;
72 } else {
73 return false;
74 }
75 }
76 void _summarize(health_check_t& check) const override {
77 if (osds.empty()) {
78 return;
79 }
f67539c2 80 check.summary = fmt::format("{} PGs pending on creation", value.n);
11fdf7f2
TL
81 ostringstream ss;
82 if (osds.size() > 1) {
83 ss << "osds " << osds << " have pending PGs.";
84 } else {
85 ss << osds.front() << " has pending PGs";
86 }
87 check.detail.push_back(ss.str());
88 }
89 vector<DaemonKey> osds;
90};
91
92} // anonymous namespace
93
94unique_ptr<DaemonHealthMetricCollector>
95DaemonHealthMetricCollector::create(daemon_metric m)
96{
97 switch (m) {
98 case daemon_metric::SLOW_OPS:
f67539c2 99 return std::make_unique<SlowOps>();
11fdf7f2 100 case daemon_metric::PENDING_CREATING_PGS:
f67539c2 101 return std::make_unique<PendingPGs>();
11fdf7f2 102 default:
f67539c2 103 return {};
11fdf7f2
TL
104 }
105}