]> git.proxmox.com Git - ceph.git/blob - ceph/monitoring/ceph-mixin/dashboards/host.libsonnet
3e0b31f2c4595fe110ffdc2b0f8bb390f8aee4e4
[ceph.git] / ceph / monitoring / ceph-mixin / dashboards / host.libsonnet
1 local g = import 'grafonnet/grafana.libsonnet';
2
3 (import 'utils.libsonnet') {
4 'hosts-overview.json':
5 $.dashboardSchema(
6 'Host Overview',
7 '',
8 'y0KGL0iZz',
9 'now-1h',
10 '30s',
11 16,
12 $._config.dashboardTags,
13 '',
14 )
15 .addRequired(
16 type='grafana', id='grafana', name='Grafana', version='5.3.2'
17 )
18 .addRequired(
19 type='panel', id='graph', name='Graph', version='5.0.0'
20 )
21 .addRequired(
22 type='panel', id='singlestat', name='Singlestat', version='5.0.0'
23 )
24 .addAnnotation(
25 $.addAnnotationSchema(
26 1,
27 '-- Grafana --',
28 true,
29 true,
30 'rgba(0, 211, 255, 1)',
31 'Annotations & Alerts',
32 'dashboard'
33 )
34 )
35 .addTemplate(
36 g.template.datasource('datasource',
37 'prometheus',
38 'default',
39 label='Data Source')
40 )
41 .addTemplate(
42 $.addClusterTemplate()
43 )
44 .addTemplate(
45 $.addJobTemplate()
46 )
47 .addTemplate(
48 $.addTemplateSchema('osd_hosts',
49 '$datasource',
50 'label_values(ceph_disk_occupation{%(matchers)s}, exported_instance)' % $.matchers(),
51 1,
52 true,
53 1,
54 null,
55 '([^.]*).*')
56 )
57 .addTemplate(
58 $.addTemplateSchema('mon_hosts',
59 '$datasource',
60 'label_values(ceph_mon_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
61 1,
62 true,
63 1,
64 null,
65 'mon.(.*)')
66 )
67 .addTemplate(
68 $.addTemplateSchema('mds_hosts',
69 '$datasource',
70 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(),
71 1,
72 true,
73 1,
74 null,
75 'mds.(.*)')
76 )
77 .addTemplate(
78 $.addTemplateSchema('rgw_hosts',
79 '$datasource',
80 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
81 1,
82 true,
83 1,
84 null,
85 'rgw.(.*)')
86 )
87 .addPanels([
88 $.simpleSingleStatPanel(
89 'none',
90 'OSD Hosts',
91 '',
92 'current',
93 'count(sum by (hostname) (ceph_osd_metadata{%(matchers)s}))' % $.matchers(),
94 true,
95 'time_series',
96 0,
97 0,
98 4,
99 5
100 ),
101 $.simpleSingleStatPanel(
102 'percentunit',
103 'AVG CPU Busy',
104 'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster',
105 'current',
106 |||
107 avg(1 - (
108 avg by(instance) (
109 rate(node_cpu_seconds_total{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval]) or
110 rate(node_cpu{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval])
111 )
112 ))
113 |||,
114 true,
115 'time_series',
116 4,
117 0,
118 4,
119 5
120 ),
121 $.simpleSingleStatPanel(
122 'percentunit',
123 'AVG RAM Utilization',
124 'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)',
125 'current',
126 |||
127 avg ((
128 (
129 node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
130 node_memory_MemTotal_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
131 ) - ((
132 node_memory_MemFree{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
133 node_memory_MemFree_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) +
134 (
135 node_memory_Cached{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
136 node_memory_Cached_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
137 ) + (
138 node_memory_Buffers{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
139 node_memory_Buffers_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
140 ) + (
141 node_memory_Slab{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
142 node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
143 )
144 )
145 ) / (
146 node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
147 node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"}
148 ))
149 |||,
150 true,
151 'time_series',
152 8,
153 0,
154 4,
155 5
156 ),
157 $.simpleSingleStatPanel(
158 'none',
159 'Physical IOPS',
160 'IOPS Load at the device as reported by the OS on all OSD hosts',
161 'current',
162 |||
163 sum ((
164 rate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[$__rate_interval]) or
165 rate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[$__rate_interval])
166 ) + (
167 rate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[$__rate_interval]) or
168 rate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[$__rate_interval])
169 ))
170 |||,
171 true,
172 'time_series',
173 12,
174 0,
175 4,
176 5
177 ),
178 $.simpleSingleStatPanel(
179 'percent',
180 'AVG Disk Utilization',
181 'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)',
182 'current',
183 |||
184 avg (
185 label_replace(
186 (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or
187 (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),
188 "instance", "$1", "instance", "([^.:]*).*"
189 ) * on(instance, device) group_left(ceph_daemon) label_replace(
190 label_replace(
191 ceph_disk_occupation_human{%(matchers)s, instance=~"($osd_hosts).*"},
192 "device", "$1", "device", "/dev/(.*)"
193 ), "instance", "$1", "instance", "([^.:]*).*"
194 )
195 )
196 ||| % $.matchers(),
197 true,
198 'time_series',
199 16,
200 0,
201 4,
202 5
203 ),
204 $.simpleSingleStatPanel(
205 'bytes',
206 'Network Load',
207 'Total send/receive network load across all hosts in the ceph cluster',
208 'current',
209 |||
210 sum (
211 (
212 rate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
213 rate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
214 ) unless on (device, instance)
215 label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
216 ) +
217 sum (
218 (
219 rate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
220 rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
221 ) unless on (device, instance)
222 label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
223 )
224 |||,
225 true,
226 'time_series',
227 20,
228 0,
229 4,
230 5
231 ),
232 $.simpleGraphPanel(
233 {},
234 'CPU Busy - Top 10 Hosts',
235 'Show the top 10 busiest hosts by cpu',
236 'percent',
237 null,
238 0,
239 |||
240 topk(10,
241 100 * (
242 1 - (
243 avg by(instance) (
244 rate(node_cpu_seconds_total{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval]) or
245 rate(node_cpu{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval])
246 )
247 )
248 )
249 )
250 |||,
251 '{{instance}}',
252 0,
253 5,
254 12,
255 9
256 ),
257 $.simpleGraphPanel(
258 {},
259 'Network Load - Top 10 Hosts',
260 'Top 10 hosts by network load',
261 'Bps',
262 null,
263 0,
264 |||
265 topk(10, (sum by(instance) (
266 (
267 rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
268 rate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
269 ) +
270 (
271 rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
272 rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
273 ) unless on (device, instance)
274 label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)"))
275 ))
276 |||,
277 '{{instance}}',
278 12,
279 5,
280 12,
281 9
282 ),
283 ]),
284 'host-details.json':
285 $.dashboardSchema(
286 'Host Details',
287 '',
288 'rtOg0AiWz',
289 'now-1h',
290 '30s',
291 16,
292 $._config.dashboardTags + ['overview'],
293 ''
294 )
295 .addRequired(
296 type='grafana', id='grafana', name='Grafana', version='5.3.2'
297 )
298 .addRequired(
299 type='panel', id='graph', name='Graph', version='5.0.0'
300 )
301 .addRequired(
302 type='panel', id='singlestat', name='Singlestat', version='5.0.0'
303 )
304 .addAnnotation(
305 $.addAnnotationSchema(
306 1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard'
307 )
308 )
309 .addTemplate(
310 g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
311 )
312 .addTemplate(
313 $.addClusterTemplate()
314 )
315 .addTemplate(
316 $.addJobTemplate()
317 )
318 .addTemplate(
319 $.addTemplateSchema('ceph_hosts',
320 '$datasource',
321 'label_values({%(clusterMatcher)s}, instance)' % $.matchers(),
322 1,
323 false,
324 3,
325 'Hostname',
326 '([^.:]*).*')
327 )
328 .addPanels([
329 $.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
330 $.simpleSingleStatPanel(
331 'none',
332 'OSDs',
333 '',
334 'current',
335 "count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % $.matchers(),
336 null,
337 'time_series',
338 0,
339 1,
340 3,
341 5
342 ),
343 $.simpleGraphPanel(
344 {
345 interrupt: '#447EBC',
346 steal: '#6D1F62',
347 system: '#890F02',
348 user: '#3F6833',
349 wait: '#C15C17',
350 },
351 'CPU Utilization',
352 "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
353 'percent',
354 '% Utilization',
355 null,
356 |||
357 sum by (mode) (
358 rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval]) or
359 rate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval])
360 ) / (
361 scalar(
362 sum(rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
363 rate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]))
364 ) * 100
365 )
366 |||,
367 '{{mode}}',
368 3,
369 1,
370 6,
371 10
372 ),
373 $.simpleGraphPanel(
374 {
375 Available: '#508642',
376 Free: '#508642',
377 Total: '#bf1b00',
378 Used: '#bf1b00',
379 total: '#bf1b00',
380 used: '#0a50a1',
381 },
382 'RAM Usage',
383 '',
384 'bytes',
385 'RAM used',
386 null,
387 |||
388 node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or
389 node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
390 |||,
391 'Free',
392 9,
393 1,
394 6,
395 10
396 )
397 .addTargets(
398 [
399 $.addTargetSchema(
400 |||
401 node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or
402 node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
403 |||,
404 'total'
405 ),
406 $.addTargetSchema(
407 |||
408 (
409 node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or
410 node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
411 ) + (
412 node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or
413 node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
414 ) + (
415 node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or
416 node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
417 )
418 |||,
419 'buffers/cache'
420 ),
421 $.addTargetSchema(
422 |||
423 (
424 node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or
425 node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
426 ) - (
427 (
428 node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or
429 node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
430 ) + (
431 node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or
432 node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
433 ) + (
434 node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or
435 node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
436 ) +
437 (
438 node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or
439 node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
440 )
441 )
442 |||,
443 'used'
444 ),
445 ]
446 )
447 .addSeriesOverride(
448 {
449 alias: 'total',
450 color: '#bf1b00',
451 fill: 0,
452 linewidth: 2,
453 stack: false,
454 }
455 ),
456 $.simpleGraphPanel(
457 {},
458 'Network Load',
459 "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
460 'decbytes',
461 'Send (-) / Receive (+)',
462 null,
463 |||
464 sum by (device) (
465 rate(
466 node_network_receive_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]) or
467 rate(node_network_receive_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]
468 )
469 )
470 |||,
471 '{{device}}.rx',
472 15,
473 1,
474 6,
475 10
476 )
477 .addTargets(
478 [
479 $.addTargetSchema(
480 |||
481 sum by (device) (
482 rate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]) or
483 rate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval])
484 )
485 |||,
486 '{{device}}.tx'
487 ),
488 ]
489 )
490 .addSeriesOverride(
491 { alias: '/.*tx/', transform: 'negative-Y' }
492 ),
493 $.simpleGraphPanel(
494 {},
495 'Network drop rate',
496 '',
497 'pps',
498 'Send (-) / Receive (+)',
499 null,
500 |||
501 rate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
502 rate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
503 |||,
504 '{{device}}.rx',
505 21,
506 1,
507 3,
508 5
509 )
510 .addTargets(
511 [
512 $.addTargetSchema(
513 |||
514 rate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
515 rate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
516 |||,
517 '{{device}}.tx'
518 ),
519 ]
520 )
521 .addSeriesOverride(
522 {
523 alias: '/.*tx/',
524 transform: 'negative-Y',
525 }
526 ),
527 $.simpleSingleStatPanel(
528 'bytes',
529 'Raw Capacity',
530 'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.',
531 'current',
532 |||
533 sum(
534 ceph_osd_stat_bytes{%(matchers)s} and
535 on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"}
536 )
537 ||| % $.matchers(),
538 null,
539 'time_series',
540 0,
541 6,
542 3,
543 5
544 ),
545 $.simpleGraphPanel(
546 {},
547 'Network error rate',
548 '',
549 'pps',
550 'Send (-) / Receive (+)',
551 null,
552 |||
553 rate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
554 rate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
555 |||,
556 '{{device}}.rx',
557 21,
558 6,
559 3,
560 5
561 )
562 .addTargets(
563 [$.addTargetSchema(
564 |||
565 rate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
566 rate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
567 |||,
568 '{{device}}.tx'
569 )]
570 )
571 .addSeriesOverride(
572 {
573 alias: '/.*tx/',
574 transform: 'negative-Y',
575 }
576 ),
577 $.addRowSchema(false,
578 true,
579 'OSD Disk Performance Statistics') + { gridPos: { x: 0, y: 11, w: 24, h: 1 } },
580 $.simpleGraphPanel(
581 {},
582 '$ceph_hosts Disk IOPS',
583 "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
584 'ops',
585 'Read (-) / Write (+)',
586 null,
587 |||
588 label_replace(
589 (
590 rate(node_disk_writes_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
591 rate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
592 ), "instance", "$1", "instance", "([^:.]*).*"
593 ) * on(instance, device) group_left(ceph_daemon) label_replace(
594 label_replace(
595 ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"
596 ), "instance", "$1", "instance", "([^:.]*).*"
597 )
598 ||| % $.matchers(),
599 '{{device}}({{ceph_daemon}}) writes',
600 0,
601 12,
602 11,
603 9
604 )
605 .addTargets(
606 [
607 $.addTargetSchema(
608 |||
609 label_replace(
610 (
611 rate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
612 rate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
613 ), "instance", "$1", "instance", "([^:.]*).*"
614 ) * on(instance, device) group_left(ceph_daemon) label_replace(
615 label_replace(
616 ceph_disk_occupation_human{%(matchers)s},"device", "$1", "device", "/dev/(.*)"
617 ), "instance", "$1", "instance", "([^:.]*).*"
618 )
619 ||| % $.matchers(),
620 '{{device}}({{ceph_daemon}}) reads'
621 ),
622 ]
623 )
624 .addSeriesOverride(
625 { alias: '/.*reads/', transform: 'negative-Y' }
626 ),
627 $.simpleGraphPanel(
628 {},
629 '$ceph_hosts Throughput by Disk',
630 'For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id',
631 'Bps',
632 'Read (-) / Write (+)',
633 null,
634 |||
635 label_replace(
636 (
637 rate(node_disk_bytes_written{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
638 rate(node_disk_written_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
639 ), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device)
640 group_left(ceph_daemon) label_replace(
641 label_replace(ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"),
642 "instance", "$1", "instance", "([^:.]*).*"
643 )
644 ||| % $.matchers(),
645 '{{device}}({{ceph_daemon}}) write',
646 12,
647 12,
648 11,
649 9
650 )
651 .addTargets(
652 [$.addTargetSchema(
653 |||
654 label_replace(
655 (
656 rate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
657 rate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
658 ),
659 "instance", "$1", "instance", "([^:.]*).*") * on(instance, device)
660 group_left(ceph_daemon) label_replace(
661 label_replace(ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"),
662 "instance", "$1", "instance", "([^:.]*).*"
663 )
664 ||| % $.matchers(),
665 '{{device}}({{ceph_daemon}}) read'
666 )]
667 )
668 .addSeriesOverride(
669 { alias: '/.*read/', transform: 'negative-Y' }
670 ),
671 $.simpleGraphPanel(
672 {},
673 '$ceph_hosts Disk Latency',
674 "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
675 's',
676 '',
677 null,
678 |||
679 max by(instance, device) (label_replace(
680 (rate(node_disk_write_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) /
681 clamp_min(rate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]), 0.001) or
682 (rate(node_disk_read_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) /
683 clamp_min(rate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]), 0.001),
684 "instance", "$1", "instance", "([^:.]*).*"
685 )) * on(instance, device) group_left(ceph_daemon) label_replace(
686 label_replace(
687 ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"},
688 "device", "$1", "device", "/dev/(.*)"
689 ), "instance", "$1", "instance", "([^:.]*).*"
690 )
691 ||| % $.matchers(),
692 '{{device}}({{ceph_daemon}})',
693 0,
694 21,
695 11,
696 9
697 ),
698 $.simpleGraphPanel(
699 {},
700 '$ceph_hosts Disk utilization',
701 'Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.',
702 'percent',
703 '%Util',
704 null,
705 |||
706 label_replace(
707 (
708 (rate(node_disk_io_time_ms{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) / 10) or
709 rate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) * 100
710 ), "instance", "$1", "instance", "([^:.]*).*"
711 ) * on(instance, device) group_left(ceph_daemon) label_replace(
712 label_replace(ceph_disk_occupation_human{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"},
713 "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"
714 )
715 ||| % $.matchers(),
716 '{{device}}({{ceph_daemon}})',
717 12,
718 21,
719 11,
720 9
721 ),
722 ]),
723 }