1 #include "DaemonMetricCollector.h"
2 #include "common/admin_socket_client.h"
3 #include "common/debug.h"
4 #include "common/hostname.h"
5 #include "common/perf_counters.h"
6 #include "global/global_init.h"
7 #include "global/global_context.h"
8 #include "common/split.h"
9 #include "include/common_fwd.h"
12 #include <boost/json/src.hpp>
22 #define dout_context g_ceph_context
23 #define dout_subsys ceph_subsys_ceph_exporter
25 using json_object
= boost::json::object
;
26 using json_value
= boost::json::value
;
27 using json_array
= boost::json::array
;
29 void DaemonMetricCollector::request_loop(boost::asio::steady_timer
&timer
) {
30 timer
.async_wait([&](const boost::system::error_code
&e
) {
31 std::cerr
<< e
<< std::endl
;
34 auto stats_period
= g_conf().get_val
<int64_t>("exporter_stats_period");
35 // time to wait before sending requests again
36 timer
.expires_from_now(std::chrono::seconds(stats_period
));
41 void DaemonMetricCollector::main() {
42 // time to wait before sending requests again
44 boost::asio::io_service io
;
45 boost::asio::steady_timer timer
{io
, std::chrono::seconds(0)};
50 std::string
DaemonMetricCollector::get_metrics() {
51 const std::lock_guard
<std::mutex
> lock(metrics_mutex
);
56 void add_metric(std::unique_ptr
<MetricsBuilder
> &builder
, T value
,
57 std::string name
, std::string description
, std::string mtype
,
59 builder
->add(std::to_string(value
), name
, description
, mtype
, labels
);
62 void add_double_or_int_metric(std::unique_ptr
<MetricsBuilder
> &builder
,
63 json_value value
, std::string name
,
64 std::string description
, std::string mtype
,
66 if (value
.is_int64()) {
67 int64_t v
= value
.as_int64();
68 add_metric(builder
, v
, name
, description
, mtype
, labels
);
69 } else if (value
.is_double()) {
70 double v
= value
.as_double();
71 add_metric(builder
, v
, name
, description
, mtype
, labels
);
75 std::string
boost_string_to_std(boost::json::string js
) {
76 std::string
res(js
.data());
80 std::string
quote(std::string value
) { return "\"" + value
+ "\""; }
82 bool is_hyphen(char ch
) { return ch
== '-'; }
84 void DaemonMetricCollector::dump_asok_metrics() {
85 BlockTimer
timer(__FILE__
, __FUNCTION__
);
87 std::vector
<std::pair
<std::string
, int>> daemon_pids
;
89 bool sort
= g_conf().get_val
<bool>("exporter_sort_metrics");
91 builder
= std::unique_ptr
<OrderedMetricsBuilder
>(new OrderedMetricsBuilder());
93 builder
= std::unique_ptr
<UnorderedMetricsBuilder
>(new UnorderedMetricsBuilder());
95 for (auto &[daemon_name
, sock_client
] : clients
) {
97 sock_client
.ping(&ok
);
101 std::string perf_dump_response
= asok_request(sock_client
, "perf dump", daemon_name
);
102 if (perf_dump_response
.size() == 0) {
105 std::string perf_schema_response
= asok_request(sock_client
, "perf schema", daemon_name
);
106 if (perf_schema_response
.size() == 0) {
109 std::string config_show
= asok_request(sock_client
, "config show", daemon_name
);
110 json_object pid_file_json
= boost::json::parse(config_show
).as_object();
111 std::string pid_path
=
112 boost_string_to_std(pid_file_json
["pid_file"].as_string());
113 std::string pid_str
= read_file_to_string(pid_path
);
114 if (!pid_path
.size()) {
117 daemon_pids
.push_back({daemon_name
, std::stoi(pid_str
)});
118 json_object dump
= boost::json::parse(perf_dump_response
).as_object();
119 json_object schema
= boost::json::parse(perf_schema_response
).as_object();
120 for (auto &perf
: schema
) {
121 auto sv
= perf
.key();
122 std::string perf_group
= {sv
.begin(), sv
.end()};
123 json_object perf_group_object
= perf
.value().as_object();
124 for (auto &perf_counter
: perf_group_object
) {
125 auto sv1
= perf_counter
.key();
126 std::string perf_name
= {sv1
.begin(), sv1
.end()};
127 json_object perf_info
= perf_counter
.value().as_object();
128 auto prio_limit
= g_conf().get_val
<int64_t>("exporter_prio_limit");
129 if (perf_info
["priority"].as_int64() <
133 std::string name
= "ceph_" + perf_group
+ "_" + perf_name
;
134 std::replace_if(name
.begin(), name
.end(), is_hyphen
, '_');
136 // FIXME: test this, based on mgr_module perfpath_to_path_labels
137 auto labels_and_name
= get_labels_and_metric_name(daemon_name
, name
);
138 labels_t labels
= labels_and_name
.first
;
139 name
= labels_and_name
.second
;
141 json_value perf_values
= dump
[perf_group
].as_object()[perf_name
];
142 dump_asok_metric(perf_info
, perf_values
, name
, labels
);
146 dout(10) << "Perf counters retrieved for " << clients
.size() << " daemons." << dendl
;
147 // get time spent on this function
149 std::string
scrap_desc("Time spent scraping and transforming perfcounters to metrics");
150 labels_t scrap_labels
;
151 scrap_labels
["host"] = quote(ceph_get_hostname());
152 scrap_labels
["function"] = quote(__FUNCTION__
);
153 add_metric(builder
, timer
.get_ms(), "ceph_exporter_scrape_time", scrap_desc
,
154 "gauge", scrap_labels
);
156 const std::lock_guard
<std::mutex
> lock(metrics_mutex
);
157 get_process_metrics(daemon_pids
);
158 metrics
= builder
->dump();
161 std::vector
<std::string
> read_proc_stat_file(std::string path
) {
162 std::string stat
= read_file_to_string(path
);
163 std::vector
<std::string
> strings
;
164 auto parts
= ceph::split(stat
);
165 strings
.assign(parts
.begin(), parts
.end());
169 struct pstat
read_pid_stat(int pid
) {
170 std::string
stat_path("/proc/" + std::to_string(pid
) + "/stat");
171 std::vector
<std::string
> stats
= read_proc_stat_file(stat_path
);
173 stat
.minflt
= std::stoul(stats
[9]);
174 stat
.majflt
= std::stoul(stats
[11]);
175 stat
.utime
= std::stoul(stats
[13]);
176 stat
.stime
= std::stoul(stats
[14]);
177 stat
.num_threads
= std::stoul(stats
[19]);
178 stat
.start_time
= std::stoul(stats
[21]);
179 stat
.vm_size
= std::stoul(stats
[22]);
180 stat
.resident_size
= std::stoi(stats
[23]);
184 void DaemonMetricCollector::get_process_metrics(std::vector
<std::pair
<std::string
, int>> daemon_pids
) {
185 std::string
path("/proc");
186 std::stringstream ss
;
187 for (auto &[daemon_name
, pid
] : daemon_pids
) {
188 std::vector
<std::string
> uptimes
= read_proc_stat_file("/proc/uptime");
189 struct pstat stat
= read_pid_stat(pid
);
190 int clk_tck
= sysconf(_SC_CLK_TCK
);
191 double start_time_seconds
= stat
.start_time
/ (double)clk_tck
;
192 double user_time
= stat
.utime
/ (double)clk_tck
;
193 double kernel_time
= stat
.stime
/ (double)clk_tck
;
194 double total_time_seconds
= user_time
+ kernel_time
;
195 double uptime
= std::stod(uptimes
[0]);
196 double elapsed_time
= uptime
- start_time_seconds
;
197 double idle_time
= elapsed_time
- total_time_seconds
;
198 double usage
= total_time_seconds
* 100 / elapsed_time
;
201 labels
["ceph_daemon"] = quote(daemon_name
);
202 add_metric(builder
, stat
.minflt
, "ceph_exporter_minflt_total",
203 "Number of minor page faults of daemon", "counter", labels
);
204 add_metric(builder
, stat
.majflt
, "ceph_exporter_majflt_total",
205 "Number of major page faults of daemon", "counter", labels
);
206 add_metric(builder
, stat
.num_threads
, "ceph_exporter_num_threads",
207 "Number of threads used by daemon", "gauge", labels
);
208 add_metric(builder
, usage
, "ceph_exporter_cpu_usage", "CPU usage of a daemon",
211 std::string cpu_time_desc
= "Process time in kernel/user/idle mode";
212 labels_t cpu_total_labels
;
213 cpu_total_labels
["ceph_daemon"] = quote(daemon_name
);
214 cpu_total_labels
["mode"] = quote("kernel");
215 add_metric(builder
, kernel_time
, "ceph_exporter_cpu_total", cpu_time_desc
,
216 "counter", cpu_total_labels
);
217 cpu_total_labels
["mode"] = quote("user");
218 add_metric(builder
, user_time
, "ceph_exporter_cpu_total", cpu_time_desc
,
219 "counter", cpu_total_labels
);
220 cpu_total_labels
["mode"] = quote("idle");
221 add_metric(builder
, idle_time
, "ceph_exporter_cpu_total", cpu_time_desc
,
222 "counter", cpu_total_labels
);
223 add_metric(builder
, stat
.vm_size
, "ceph_exporter_vm_size", "Virtual memory used in a daemon",
225 add_metric(builder
, stat
.resident_size
, "ceph_exporter_resident_size",
226 "Resident memory in a daemon", "gauge", labels
);
230 std::string
DaemonMetricCollector::asok_request(AdminSocketClient
&asok
,
231 std::string command
, std::string daemon_name
) {
232 std::string
request("{\"prefix\": \"" + command
+ "\"}");
233 std::string response
;
234 std::string err
= asok
.do_request(request
, &response
);
235 if (err
.length() > 0 || response
.substr(0, 5) == "ERROR") {
236 dout(1) << "command " << command
<< "failed for daemon " << daemon_name
237 << "with error: " << err
<< dendl
;
243 std::pair
<labels_t
, std::string
>
244 DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name
,
245 std::string metric_name
) {
246 std::string new_metric_name
;
248 new_metric_name
= metric_name
;
249 if (daemon_name
.find("rgw") != std::string::npos
) {
250 std::string tmp
= daemon_name
.substr(16, std::string::npos
);
251 std::string::size_type pos
= tmp
.find('.');
252 labels
["instance_id"] = quote("rgw." + tmp
.substr(0, pos
));
254 labels
["ceph_daemon"] = quote(daemon_name
);
255 if (daemon_name
.find("rbd-mirror") != std::string::npos
) {
256 std::regex
re("^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
257 ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
259 if (std::regex_search(daemon_name
, match
, re
) == true) {
260 new_metric_name
= "ceph_rbd_mirror_image_" + match
.str(4);
261 labels
["pool"] = quote(match
.str(1));
262 labels
["namespace"] = quote(match
.str(2));
263 labels
["image"] = quote(match
.str(3));
267 return {labels
, new_metric_name
};
271 perf_values can be either a int/double or a json_object. Since
272 json_value is a wrapper of both we use that class.
274 void DaemonMetricCollector::dump_asok_metric(json_object perf_info
,
275 json_value perf_values
,
278 int64_t type
= perf_info
["type"].as_int64();
279 std::string metric_type
=
280 boost_string_to_std(perf_info
["metric_type"].as_string());
281 std::string description
=
282 boost_string_to_std(perf_info
["description"].as_string());
284 if (type
& PERFCOUNTER_LONGRUNAVG
) {
285 int64_t count
= perf_values
.as_object()["avgcount"].as_int64();
286 add_metric(builder
, count
, name
+ "_count", description
, metric_type
,
288 json_value sum_value
= perf_values
.as_object()["sum"];
289 add_double_or_int_metric(builder
, sum_value
, name
+ "_sum", description
,
290 metric_type
, labels
);
291 } else if (type
& PERFCOUNTER_TIME
) {
292 if (perf_values
.is_int64()) {
293 double value
= perf_values
.as_int64() / 1000000000.0f
;
294 add_metric(builder
, value
, name
, description
, metric_type
, labels
);
295 } else if (perf_values
.is_double()) {
296 double value
= perf_values
.as_double() / 1000000000.0f
;
297 add_metric(builder
, value
, name
, description
, metric_type
, labels
);
300 add_double_or_int_metric(builder
, perf_values
, name
, description
,
301 metric_type
, labels
);
305 void DaemonMetricCollector::update_sockets() {
306 std::string sock_dir
= g_conf().get_val
<std::string
>("exporter_sock_dir");
308 std::filesystem::path sock_path
= sock_dir
;
309 if(!std::filesystem::is_directory(sock_path
.parent_path())) {
310 dout(1) << "ERROR: No such directory exist" << sock_dir
<< dendl
;
313 for (const auto &entry
:
314 std::filesystem::directory_iterator(sock_dir
)) {
315 if (entry
.path().extension() == ".asok") {
316 std::string daemon_socket_name
= entry
.path().filename().string();
317 std::string daemon_name
=
318 daemon_socket_name
.substr(0, daemon_socket_name
.size() - 5);
319 if (clients
.find(daemon_name
) == clients
.end() &&
320 !(daemon_name
.find("mgr") != std::string::npos
) &&
321 !(daemon_name
.find("ceph-exporter") != std::string::npos
)) {
322 AdminSocketClient
sock(entry
.path().string());
323 clients
.insert({daemon_name
, std::move(sock
)});
329 void OrderedMetricsBuilder::add(std::string value
, std::string name
,
330 std::string description
, std::string mtype
,
333 if (metrics
.find(name
) == metrics
.end()) {
334 Metric
metric(name
, mtype
, description
);
335 metrics
[name
] = std::move(metric
);
337 Metric
&metric
= metrics
[name
];
338 metric
.add(labels
, value
);
341 std::string
OrderedMetricsBuilder::dump() {
342 for (auto &[name
, metric
] : metrics
) {
343 out
+= metric
.dump() + "\n";
348 void UnorderedMetricsBuilder::add(std::string value
, std::string name
,
349 std::string description
, std::string mtype
,
352 Metric
metric(name
, mtype
, description
);
353 metric
.add(labels
, value
);
354 out
+= metric
.dump() + "\n\n";
357 std::string
UnorderedMetricsBuilder::dump() { return out
; }
359 void Metric::add(labels_t labels
, std::string value
) {
361 entry
.labels
= labels
;
363 entries
.push_back(entry
);
366 std::string
Metric::dump() {
367 std::stringstream metric_ss
;
368 metric_ss
<< "# HELP " << name
<< " " << description
<< "\n";
369 metric_ss
<< "# TYPE " << name
<< " " << mtype
<< "\n";
370 for (auto &entry
: entries
) {
371 std::stringstream labels_ss
;
373 for (auto &[label_name
, label_value
] : entry
.labels
) {
374 labels_ss
<< label_name
<< "=" << label_value
;
375 if (i
< entry
.labels
.size() - 1) {
380 metric_ss
<< name
<< "{" << labels_ss
.str() << "} " << entry
.value
;
381 if (&entry
!= &entries
.back()) {
385 return metric_ss
.str();
388 DaemonMetricCollector
&collector_instance() {
389 static DaemonMetricCollector instance
;