1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/ceph/ceph_debug.h>
4 #include <linux/types.h>
5 #include <linux/percpu_counter.h>
6 #include <linux/math64.h>
9 #include "mds_client.h"
11 static bool ceph_mdsc_send_metrics(struct ceph_mds_client
*mdsc
,
12 struct ceph_mds_session
*s
)
14 struct ceph_metric_head
*head
;
15 struct ceph_metric_cap
*cap
;
16 struct ceph_metric_read_latency
*read
;
17 struct ceph_metric_write_latency
*write
;
18 struct ceph_metric_metadata_latency
*meta
;
19 struct ceph_metric_dlease
*dlease
;
20 struct ceph_opened_files
*files
;
21 struct ceph_pinned_icaps
*icaps
;
22 struct ceph_opened_inodes
*inodes
;
23 struct ceph_read_io_size
*rsize
;
24 struct ceph_write_io_size
*wsize
;
25 struct ceph_client_metric
*m
= &mdsc
->metric
;
26 u64 nr_caps
= atomic64_read(&m
->total_caps
);
27 u32 header_len
= sizeof(struct ceph_metric_header
);
34 len
= sizeof(*head
) + sizeof(*cap
) + sizeof(*read
) + sizeof(*write
)
35 + sizeof(*meta
) + sizeof(*dlease
) + sizeof(*files
)
36 + sizeof(*icaps
) + sizeof(*inodes
) + sizeof(*rsize
)
39 msg
= ceph_msg_new(CEPH_MSG_CLIENT_METRICS
, len
, GFP_NOFS
, true);
41 pr_err("send metrics to mds%d, failed to allocate message\n",
46 head
= msg
->front
.iov_base
;
48 /* encode the cap metric */
49 cap
= (struct ceph_metric_cap
*)(head
+ 1);
50 cap
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO
);
52 cap
->header
.compat
= 1;
53 cap
->header
.data_len
= cpu_to_le32(sizeof(*cap
) - header_len
);
54 cap
->hit
= cpu_to_le64(percpu_counter_sum(&m
->i_caps_hit
));
55 cap
->mis
= cpu_to_le64(percpu_counter_sum(&m
->i_caps_mis
));
56 cap
->total
= cpu_to_le64(nr_caps
);
59 /* encode the read latency metric */
60 read
= (struct ceph_metric_read_latency
*)(cap
+ 1);
61 read
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY
);
63 read
->header
.compat
= 1;
64 read
->header
.data_len
= cpu_to_le32(sizeof(*read
) - header_len
);
65 sum
= m
->read_latency_sum
;
66 jiffies_to_timespec64(sum
, &ts
);
67 read
->sec
= cpu_to_le32(ts
.tv_sec
);
68 read
->nsec
= cpu_to_le32(ts
.tv_nsec
);
71 /* encode the write latency metric */
72 write
= (struct ceph_metric_write_latency
*)(read
+ 1);
73 write
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY
);
74 write
->header
.ver
= 1;
75 write
->header
.compat
= 1;
76 write
->header
.data_len
= cpu_to_le32(sizeof(*write
) - header_len
);
77 sum
= m
->write_latency_sum
;
78 jiffies_to_timespec64(sum
, &ts
);
79 write
->sec
= cpu_to_le32(ts
.tv_sec
);
80 write
->nsec
= cpu_to_le32(ts
.tv_nsec
);
83 /* encode the metadata latency metric */
84 meta
= (struct ceph_metric_metadata_latency
*)(write
+ 1);
85 meta
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY
);
87 meta
->header
.compat
= 1;
88 meta
->header
.data_len
= cpu_to_le32(sizeof(*meta
) - header_len
);
89 sum
= m
->metadata_latency_sum
;
90 jiffies_to_timespec64(sum
, &ts
);
91 meta
->sec
= cpu_to_le32(ts
.tv_sec
);
92 meta
->nsec
= cpu_to_le32(ts
.tv_nsec
);
95 /* encode the dentry lease metric */
96 dlease
= (struct ceph_metric_dlease
*)(meta
+ 1);
97 dlease
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE
);
98 dlease
->header
.ver
= 1;
99 dlease
->header
.compat
= 1;
100 dlease
->header
.data_len
= cpu_to_le32(sizeof(*dlease
) - header_len
);
101 dlease
->hit
= cpu_to_le64(percpu_counter_sum(&m
->d_lease_hit
));
102 dlease
->mis
= cpu_to_le64(percpu_counter_sum(&m
->d_lease_mis
));
103 dlease
->total
= cpu_to_le64(atomic64_read(&m
->total_dentries
));
106 sum
= percpu_counter_sum(&m
->total_inodes
);
108 /* encode the opened files metric */
109 files
= (struct ceph_opened_files
*)(dlease
+ 1);
110 files
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES
);
111 files
->header
.ver
= 1;
112 files
->header
.compat
= 1;
113 files
->header
.data_len
= cpu_to_le32(sizeof(*files
) - header_len
);
114 files
->opened_files
= cpu_to_le64(atomic64_read(&m
->opened_files
));
115 files
->total
= cpu_to_le64(sum
);
118 /* encode the pinned icaps metric */
119 icaps
= (struct ceph_pinned_icaps
*)(files
+ 1);
120 icaps
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS
);
121 icaps
->header
.ver
= 1;
122 icaps
->header
.compat
= 1;
123 icaps
->header
.data_len
= cpu_to_le32(sizeof(*icaps
) - header_len
);
124 icaps
->pinned_icaps
= cpu_to_le64(nr_caps
);
125 icaps
->total
= cpu_to_le64(sum
);
128 /* encode the opened inodes metric */
129 inodes
= (struct ceph_opened_inodes
*)(icaps
+ 1);
130 inodes
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES
);
131 inodes
->header
.ver
= 1;
132 inodes
->header
.compat
= 1;
133 inodes
->header
.data_len
= cpu_to_le32(sizeof(*inodes
) - header_len
);
134 inodes
->opened_inodes
= cpu_to_le64(percpu_counter_sum(&m
->opened_inodes
));
135 inodes
->total
= cpu_to_le64(sum
);
138 /* encode the read io size metric */
139 rsize
= (struct ceph_read_io_size
*)(inodes
+ 1);
140 rsize
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES
);
141 rsize
->header
.ver
= 1;
142 rsize
->header
.compat
= 1;
143 rsize
->header
.data_len
= cpu_to_le32(sizeof(*rsize
) - header_len
);
144 rsize
->total_ops
= cpu_to_le64(m
->total_reads
);
145 rsize
->total_size
= cpu_to_le64(m
->read_size_sum
);
148 /* encode the write io size metric */
149 wsize
= (struct ceph_write_io_size
*)(rsize
+ 1);
150 wsize
->header
.type
= cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES
);
151 wsize
->header
.ver
= 1;
152 wsize
->header
.compat
= 1;
153 wsize
->header
.data_len
= cpu_to_le32(sizeof(*wsize
) - header_len
);
154 wsize
->total_ops
= cpu_to_le64(m
->total_writes
);
155 wsize
->total_size
= cpu_to_le64(m
->write_size_sum
);
158 put_unaligned_le32(items
, &head
->num
);
159 msg
->front
.iov_len
= len
;
160 msg
->hdr
.version
= cpu_to_le16(1);
161 msg
->hdr
.compat_version
= cpu_to_le16(1);
162 msg
->hdr
.front_len
= cpu_to_le32(msg
->front
.iov_len
);
163 dout("client%llu send metrics to mds%d\n",
164 ceph_client_gid(mdsc
->fsc
->client
), s
->s_mds
);
165 ceph_con_send(&s
->s_con
, msg
);
171 static void metric_get_session(struct ceph_mds_client
*mdsc
)
173 struct ceph_mds_session
*s
;
176 mutex_lock(&mdsc
->mutex
);
177 for (i
= 0; i
< mdsc
->max_sessions
; i
++) {
178 s
= __ceph_lookup_mds_session(mdsc
, i
);
183 * Skip it if MDS doesn't support the metric collection,
184 * or the MDS will close the session's socket connection
185 * directly when it get this message.
187 if (check_session_state(s
) &&
188 test_bit(CEPHFS_FEATURE_METRIC_COLLECT
, &s
->s_features
)) {
189 mdsc
->metric
.session
= s
;
193 ceph_put_mds_session(s
);
195 mutex_unlock(&mdsc
->mutex
);
198 static void metric_delayed_work(struct work_struct
*work
)
200 struct ceph_client_metric
*m
=
201 container_of(work
, struct ceph_client_metric
, delayed_work
.work
);
202 struct ceph_mds_client
*mdsc
=
203 container_of(m
, struct ceph_mds_client
, metric
);
208 if (!m
->session
|| !check_session_state(m
->session
)) {
210 ceph_put_mds_session(m
->session
);
213 metric_get_session(mdsc
);
216 ceph_mdsc_send_metrics(mdsc
, m
->session
);
217 metric_schedule_delayed(m
);
221 int ceph_metric_init(struct ceph_client_metric
*m
)
228 atomic64_set(&m
->total_dentries
, 0);
229 ret
= percpu_counter_init(&m
->d_lease_hit
, 0, GFP_KERNEL
);
233 ret
= percpu_counter_init(&m
->d_lease_mis
, 0, GFP_KERNEL
);
235 goto err_d_lease_mis
;
237 atomic64_set(&m
->total_caps
, 0);
238 ret
= percpu_counter_init(&m
->i_caps_hit
, 0, GFP_KERNEL
);
242 ret
= percpu_counter_init(&m
->i_caps_mis
, 0, GFP_KERNEL
);
246 spin_lock_init(&m
->read_metric_lock
);
247 m
->read_latency_sq_sum
= 0;
248 m
->read_latency_min
= KTIME_MAX
;
249 m
->read_latency_max
= 0;
251 m
->read_latency_sum
= 0;
252 m
->read_size_min
= U64_MAX
;
253 m
->read_size_max
= 0;
254 m
->read_size_sum
= 0;
256 spin_lock_init(&m
->write_metric_lock
);
257 m
->write_latency_sq_sum
= 0;
258 m
->write_latency_min
= KTIME_MAX
;
259 m
->write_latency_max
= 0;
261 m
->write_latency_sum
= 0;
262 m
->write_size_min
= U64_MAX
;
263 m
->write_size_max
= 0;
264 m
->write_size_sum
= 0;
266 spin_lock_init(&m
->metadata_metric_lock
);
267 m
->metadata_latency_sq_sum
= 0;
268 m
->metadata_latency_min
= KTIME_MAX
;
269 m
->metadata_latency_max
= 0;
270 m
->total_metadatas
= 0;
271 m
->metadata_latency_sum
= 0;
273 atomic64_set(&m
->opened_files
, 0);
274 ret
= percpu_counter_init(&m
->opened_inodes
, 0, GFP_KERNEL
);
276 goto err_opened_inodes
;
277 ret
= percpu_counter_init(&m
->total_inodes
, 0, GFP_KERNEL
);
279 goto err_total_inodes
;
282 INIT_DELAYED_WORK(&m
->delayed_work
, metric_delayed_work
);
287 percpu_counter_destroy(&m
->opened_inodes
);
289 percpu_counter_destroy(&m
->i_caps_mis
);
291 percpu_counter_destroy(&m
->i_caps_hit
);
293 percpu_counter_destroy(&m
->d_lease_mis
);
295 percpu_counter_destroy(&m
->d_lease_hit
);
300 void ceph_metric_destroy(struct ceph_client_metric
*m
)
305 percpu_counter_destroy(&m
->total_inodes
);
306 percpu_counter_destroy(&m
->opened_inodes
);
307 percpu_counter_destroy(&m
->i_caps_mis
);
308 percpu_counter_destroy(&m
->i_caps_hit
);
309 percpu_counter_destroy(&m
->d_lease_mis
);
310 percpu_counter_destroy(&m
->d_lease_hit
);
312 cancel_delayed_work_sync(&m
->delayed_work
);
314 ceph_put_mds_session(m
->session
);
317 #define METRIC_UPDATE_MIN_MAX(min, max, new) \
319 if (unlikely(new < min)) \
321 if (unlikely(new > max)) \
325 static inline void __update_stdev(ktime_t total
, ktime_t lsum
,
326 ktime_t
*sq_sump
, ktime_t lat
)
330 if (unlikely(total
== 1))
333 /* the sq is (lat - old_avg) * (lat - new_avg) */
334 avg
= DIV64_U64_ROUND_CLOSEST((lsum
- lat
), (total
- 1));
336 avg
= DIV64_U64_ROUND_CLOSEST(lsum
, total
);
337 sq
= sq
* (lat
- avg
);
341 void ceph_update_read_metrics(struct ceph_client_metric
*m
,
342 ktime_t r_start
, ktime_t r_end
,
343 unsigned int size
, int rc
)
345 ktime_t lat
= ktime_sub(r_end
, r_start
);
348 if (unlikely(rc
< 0 && rc
!= -ENOENT
&& rc
!= -ETIMEDOUT
))
351 spin_lock(&m
->read_metric_lock
);
352 total
= ++m
->total_reads
;
353 m
->read_size_sum
+= size
;
354 m
->read_latency_sum
+= lat
;
355 METRIC_UPDATE_MIN_MAX(m
->read_size_min
,
358 METRIC_UPDATE_MIN_MAX(m
->read_latency_min
,
361 __update_stdev(total
, m
->read_latency_sum
,
362 &m
->read_latency_sq_sum
, lat
);
363 spin_unlock(&m
->read_metric_lock
);
366 void ceph_update_write_metrics(struct ceph_client_metric
*m
,
367 ktime_t r_start
, ktime_t r_end
,
368 unsigned int size
, int rc
)
370 ktime_t lat
= ktime_sub(r_end
, r_start
);
373 if (unlikely(rc
&& rc
!= -ETIMEDOUT
))
376 spin_lock(&m
->write_metric_lock
);
377 total
= ++m
->total_writes
;
378 m
->write_size_sum
+= size
;
379 m
->write_latency_sum
+= lat
;
380 METRIC_UPDATE_MIN_MAX(m
->write_size_min
,
383 METRIC_UPDATE_MIN_MAX(m
->write_latency_min
,
384 m
->write_latency_max
,
386 __update_stdev(total
, m
->write_latency_sum
,
387 &m
->write_latency_sq_sum
, lat
);
388 spin_unlock(&m
->write_metric_lock
);
391 void ceph_update_metadata_metrics(struct ceph_client_metric
*m
,
392 ktime_t r_start
, ktime_t r_end
,
395 ktime_t lat
= ktime_sub(r_end
, r_start
);
398 if (unlikely(rc
&& rc
!= -ENOENT
))
401 spin_lock(&m
->metadata_metric_lock
);
402 total
= ++m
->total_metadatas
;
403 m
->metadata_latency_sum
+= lat
;
404 METRIC_UPDATE_MIN_MAX(m
->metadata_latency_min
,
405 m
->metadata_latency_max
,
407 __update_stdev(total
, m
->metadata_latency_sum
,
408 &m
->metadata_latency_sq_sum
, lat
);
409 spin_unlock(&m
->metadata_metric_lock
);