27 "datasource": "-- Grafana --",
30 "iconColor": "rgba(0, 211, 255, 1)",
31 "name": "Annotations & Alerts",
40 "iteration": 1557393917915,
45 "colorBackground": false,
49 "rgba(237, 129, 40, 0.89)",
52 "datasource": "$datasource",
58 "thresholdLabels": false,
59 "thresholdMarkers": true
73 "name": "value to text",
77 "name": "range to text",
82 "nullPointMode": "connected",
85 "postfixFontSize": "50%",
87 "prefixFontSize": "50%",
96 "fillColor": "rgba(31, 118, 189, 0.18)",
98 "lineColor": "rgb(31, 120, 193)",
104 "expr": "count(sum by (hostname) (ceph_osd_metadata))",
105 "format": "time_series",
112 "title": "OSD Hosts",
113 "type": "singlestat",
114 "valueFontSize": "80%",
122 "valueName": "current"
125 "cacheTimeout": null,
126 "colorBackground": false,
130 "rgba(237, 129, 40, 0.89)",
133 "datasource": "$datasource",
134 "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
136 "format": "percentunit",
141 "thresholdLabels": false,
142 "thresholdMarkers": true
156 "name": "value to text",
160 "name": "range to text",
164 "maxDataPoints": 100,
165 "nullPointMode": "connected",
168 "postfixFontSize": "50%",
170 "prefixFontSize": "50%",
179 "fillColor": "rgba(31, 118, 189, 0.18)",
181 "lineColor": "rgb(31, 120, 193)",
187 "expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )",
188 "format": "time_series",
195 "title": "AVG CPU Busy",
196 "type": "singlestat",
197 "valueFontSize": "80%",
205 "valueName": "current"
208 "cacheTimeout": null,
209 "colorBackground": false,
213 "rgba(237, 129, 40, 0.89)",
216 "datasource": "$datasource",
217 "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
219 "format": "percentunit",
224 "thresholdLabels": false,
225 "thresholdMarkers": true
239 "name": "value to text",
243 "name": "range to text",
247 "maxDataPoints": 100,
248 "nullPointMode": "connected",
251 "postfixFontSize": "50%",
253 "prefixFontSize": "50%",
262 "fillColor": "rgba(31, 118, 189, 0.18)",
264 "lineColor": "rgb(31, 120, 193)",
270 "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))",
271 "format": "time_series",
278 "title": "AVG RAM Utilization",
279 "type": "singlestat",
280 "valueFontSize": "80%",
288 "valueName": "current"
291 "cacheTimeout": null,
292 "colorBackground": false,
296 "rgba(237, 129, 40, 0.89)",
299 "datasource": "$datasource",
300 "description": "IOPS Load at the device as reported by the OS on all OSD hosts",
306 "thresholdLabels": false,
307 "thresholdMarkers": true
321 "name": "value to text",
325 "name": "range to text",
329 "maxDataPoints": 100,
330 "nullPointMode": "connected",
333 "postfixFontSize": "50%",
335 "prefixFontSize": "50%",
344 "fillColor": "rgba(31, 118, 189, 0.18)",
346 "lineColor": "rgb(31, 120, 193)",
352 "expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))",
353 "format": "time_series",
360 "title": "Physical IOPS",
361 "type": "singlestat",
362 "valueFontSize": "80%",
370 "valueName": "current"
373 "cacheTimeout": null,
374 "colorBackground": false,
378 "rgba(237, 129, 40, 0.89)",
381 "datasource": "$datasource",
382 "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)",
388 "thresholdLabels": false,
389 "thresholdMarkers": true
403 "name": "value to text",
407 "name": "range to text",
411 "maxDataPoints": 100,
412 "nullPointMode": "connected",
415 "postfixFontSize": "50%",
417 "prefixFontSize": "50%",
426 "fillColor": "rgba(31, 118, 189, 0.18)",
428 "lineColor": "rgb(31, 120, 193)",
434 "expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device, ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)",
435 "format": "time_series",
442 "title": "AVG Disk Utilization",
443 "type": "singlestat",
444 "valueFontSize": "80%",
452 "valueName": "current"
455 "cacheTimeout": null,
456 "colorBackground": false,
460 "rgba(237, 129, 40, 0.89)",
463 "datasource": "$datasource",
465 "description": "Total send/receive network load across all hosts in the ceph cluster",
471 "thresholdLabels": false,
472 "thresholdMarkers": true
486 "name": "value to text",
490 "name": "range to text",
494 "maxDataPoints": 100,
495 "nullPointMode": "connected",
498 "postfixFontSize": "50%",
500 "prefixFontSize": "50%",
509 "fillColor": "rgba(31, 118, 189, 0.18)",
511 "lineColor": "rgb(31, 120, 193)",
517 "expr": "sum (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\nsum (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n )",
518 "format": "time_series",
525 "title": "Network Load",
526 "type": "singlestat",
527 "valueFontSize": "80%",
535 "valueName": "current"
542 "datasource": "$datasource",
543 "description": "Show the top 10 busiest hosts by cpu",
564 "nullPointMode": "null",
569 "seriesOverrides": [],
572 "steppedLine": false,
575 "expr": "topk(10,100 * ( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)",
576 "format": "time_series",
578 "legendFormat": "{{instance}}",
586 "title": "CPU Busy - Top 10 Hosts",
590 "value_type": "individual"
629 "datasource": "$datasource",
630 "description": "Top 10 hosts by network load",
651 "nullPointMode": "null",
656 "seriesOverrides": [],
659 "steppedLine": false,
662 "expr": "topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)",
663 "format": "time_series",
665 "legendFormat": "{{instance}}",
673 "title": "Network Load - Top 10 Hosts",
677 "value_type": "individual"
724 "label": "Data Source",
725 "name": "datasource",
727 "query": "prometheus",
730 "skipUrlSync": false,
736 "datasource": "$datasource",
743 "query": "label_values(ceph_disk_occupation, exported_instance)",
745 "regex": "([^.]*).*",
746 "skipUrlSync": false,
748 "tagValuesQuery": "",
757 "datasource": "$datasource",
764 "query": "label_values(ceph_mon_metadata, ceph_daemon)",
767 "skipUrlSync": false,
769 "tagValuesQuery": "",
778 "datasource": "$datasource",
785 "query": "label_values(ceph_mds_inodes, ceph_daemon)",
788 "skipUrlSync": false,
790 "tagValuesQuery": "",
799 "datasource": "$datasource",
806 "query": "label_values(ceph_rgw_qlen, ceph_daemon)",
809 "skipUrlSync": false,
811 "tagValuesQuery": "",
824 "refresh_intervals": [
849 "title": "Host Overview",