11 "id": "grafana-piechart-panel",
33 "datasource": "-- Grafana --",
36 "iconColor": "rgba(0, 211, 255, 1)",
37 "name": "Annotations & Alerts",
48 "hideControls": false,
59 "datasource": "$datasource",
71 "alignAsTable": false,
85 "nullPointMode": "null",
91 "seriesOverrides": [ ],
97 "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
98 "format": "time_series",
100 "legendFormat": "AVG read",
104 "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
105 "format": "time_series",
107 "legendFormat": "MAX read",
111 "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n",
112 "format": "time_series",
114 "legendFormat": "@95%ile",
121 "title": "OSD Read Latencies",
125 "value_type": "individual"
156 "datasource": "$datasource",
157 "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster",
175 "rgba(245, 54, 54, 0.9)",
176 "rgba(237, 129, 40, 0.89)",
177 "rgba(50, 172, 45, 0.97)"
179 "dateFormat": "YYYY-MM-DD HH:mm:ss",
182 "pattern": "ceph_daemon",
189 "alias": "Latency (ms)",
192 "rgba(245, 54, 54, 0.9)",
193 "rgba(237, 129, 40, 0.89)",
194 "rgba(50, 172, 45, 0.97)"
196 "dateFormat": "YYYY-MM-DD HH:mm:ss",
209 "rgba(245, 54, 54, 0.9)",
210 "rgba(237, 129, 40, 0.89)",
211 "rgba(50, 172, 45, 0.97)"
213 "dateFormat": "YYYY-MM-DD HH:mm:ss",
225 "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
235 "title": "Highest READ Latencies",
236 "transform": "table",
241 "@95%ile write": "#e0752d"
246 "datasource": "$datasource",
258 "alignAsTable": false,
272 "nullPointMode": "null",
278 "seriesOverrides": [ ],
281 "steppedLine": false,
284 "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n",
285 "format": "time_series",
287 "legendFormat": "AVG write",
291 "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n",
292 "format": "time_series",
294 "legendFormat": "MAX write",
298 "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n",
299 "format": "time_series",
301 "legendFormat": "@95%ile write",
308 "title": "OSD Write Latencies",
312 "value_type": "individual"
343 "datasource": "$datasource",
344 "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster",
362 "rgba(245, 54, 54, 0.9)",
363 "rgba(237, 129, 40, 0.89)",
364 "rgba(50, 172, 45, 0.97)"
366 "dateFormat": "YYYY-MM-DD HH:mm:ss",
369 "pattern": "ceph_daemon",
376 "alias": "Latency (ms)",
379 "rgba(245, 54, 54, 0.9)",
380 "rgba(237, 129, 40, 0.89)",
381 "rgba(50, 172, 45, 0.97)"
383 "dateFormat": "YYYY-MM-DD HH:mm:ss",
396 "rgba(245, 54, 54, 0.9)",
397 "rgba(237, 129, 40, 0.89)",
398 "rgba(50, 172, 45, 0.97)"
400 "dateFormat": "YYYY-MM-DD HH:mm:ss",
412 "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n",
422 "title": "Highest WRITE Latencies",
423 "transform": "table",
428 "datasource": "$datasource",
442 "legendType": "Under graph",
446 "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})",
447 "format": "time_series",
449 "legendFormat": "{{device_class}}",
453 "title": "OSD Types Summary",
454 "type": "grafana-piechart-panel",
455 "valueName": "current"
459 "Non-Encrypted": "#E5AC0E"
461 "datasource": "$datasource",
475 "legendType": "Under graph",
479 "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})",
480 "format": "time_series",
482 "legendFormat": "bluestore",
486 "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})",
487 "format": "time_series",
489 "legendFormat": "filestore",
493 "title": "OSD Objectstore Types",
494 "type": "grafana-piechart-panel",
495 "valueName": "current"
499 "datasource": "$datasource",
500 "description": "The pie chart shows the various OSD sizes used within the cluster",
513 "legendType": "Under graph",
517 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)",
518 "format": "time_series",
520 "legendFormat": "<1TB",
524 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)",
525 "format": "time_series",
527 "legendFormat": "<2TB",
531 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)",
532 "format": "time_series",
534 "legendFormat": "<3TB",
538 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)",
539 "format": "time_series",
541 "legendFormat": "<4TB",
545 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)",
546 "format": "time_series",
548 "legendFormat": "<6TB",
552 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)",
553 "format": "time_series",
555 "legendFormat": "<8TB",
559 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)",
560 "format": "time_series",
562 "legendFormat": "<10TB",
566 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)",
567 "format": "time_series",
569 "legendFormat": "<12TB",
573 "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)",
574 "format": "time_series",
576 "legendFormat": "<12TB+",
580 "title": "OSD Size Summary",
581 "type": "grafana-piechart-panel",
582 "valueName": "current"
589 "datasource": "$datasource",
600 "alignAsTable": false,
614 "nullPointMode": "null",
620 "seriesOverrides": [ ],
623 "steppedLine": false,
626 "expr": "ceph_osd_numpg{job=~\"$job\"}",
627 "format": "time_series",
630 "legendFormat": "PGs per OSD",
637 "title": "Distribution of PGs per OSD",
641 "value_type": "individual"
654 "label": "# of OSDs",
671 "cacheTimeout": null,
672 "colorBackground": false,
676 "rgba(237, 129, 40, 0.89)",
679 "datasource": "$datasource",
680 "description": "This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster",
681 "format": "percentunit",
686 "thresholdLabels": false,
687 "thresholdMarkers": true
701 "name": "value to text",
705 "name": "range to text",
709 "maxDataPoints": 100,
710 "nullPointMode": "connected",
713 "postfixFontSize": "50%",
715 "prefixFontSize": "50%",
724 "fillColor": "rgba(31, 118, 189, 0.18)",
726 "lineColor": "rgb(31, 120, 193)",
732 "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n",
733 "format": "time_series",
740 "title": "OSD onode Hits Ratio",
741 "type": "singlestat",
742 "valueFontSize": "80%",
750 "valueName": "current"
764 "repeatIteration": null,
767 "title": "R/W Profile",
776 "datasource": "$datasource",
777 "description": "Show the read/write workload profile overtime",
788 "alignAsTable": false,
802 "nullPointMode": "null",
808 "seriesOverrides": [ ],
811 "steppedLine": false,
814 "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))",
815 "format": "time_series",
817 "legendFormat": "Reads",
821 "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))",
822 "format": "time_series",
824 "legendFormat": "Writes",
831 "title": "Read/Write Profile",
835 "value_type": "individual"
866 "datasource": "$datasource",
867 "description": "This table shows the 10 OSDs with the highest number of slow ops",
885 "rgba(245, 54, 54, 0.9)",
886 "rgba(237, 129, 40, 0.89)",
887 "rgba(50, 172, 45, 0.97)"
889 "dateFormat": "YYYY-MM-DD HH:mm:ss",
892 "pattern": "ceph_daemon",
902 "rgba(245, 54, 54, 0.9)",
903 "rgba(237, 129, 40, 0.89)",
904 "rgba(50, 172, 45, 0.97)"
906 "dateFormat": "YYYY-MM-DD HH:mm:ss",
919 "rgba(245, 54, 54, 0.9)",
920 "rgba(237, 129, 40, 0.89)",
921 "rgba(50, 172, 45, 0.97)"
923 "dateFormat": "YYYY-MM-DD HH:mm:ss",
935 "expr": "topk(10,\n (ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\"})\n)\n",
945 "title": "Top Slow Ops",
946 "transform": "table",
965 "label": "Data Source",
966 "name": "datasource",
968 "query": "prometheus",
976 "datasource": "$datasource",
983 "query": "label_values(ceph_osd_metadata, cluster)",
987 "tagValuesQuery": "",
996 "datasource": "$datasource",
1003 "query": "label_values(ceph_osd_metadata{}, job)",
1007 "tagValuesQuery": "",
1020 "refresh_intervals": [
1045 "title": "OSD Overview",