27 "datasource": "-- Grafana --",
30 "iconColor": "rgba(0, 211, 255, 1)",
31 "name": "Annotations & Alerts",
40 "iteration": 1557393917915,
45 "colorBackground": false,
49 "rgba(237, 129, 40, 0.89)",
52 "datasource": "$datasource",
58 "thresholdLabels": false,
59 "thresholdMarkers": true
73 "name": "value to text",
77 "name": "range to text",
82 "nullPointMode": "connected",
85 "postfixFontSize": "50%",
87 "prefixFontSize": "50%",
96 "fillColor": "rgba(31, 118, 189, 0.18)",
98 "lineColor": "rgb(31, 120, 193)",
104 "expr": "count(sum by (hostname) (ceph_osd_metadata))",
105 "format": "time_series",
112 "title": "OSD Hosts",
113 "type": "singlestat",
114 "valueFontSize": "80%",
122 "valueName": "current"
125 "cacheTimeout": null,
126 "colorBackground": false,
130 "rgba(237, 129, 40, 0.89)",
133 "datasource": "$datasource",
135 "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
137 "format": "percentunit",
142 "thresholdLabels": false,
143 "thresholdMarkers": true
157 "name": "value to text",
161 "name": "range to text",
165 "maxDataPoints": 100,
166 "nullPointMode": "connected",
169 "postfixFontSize": "50%",
171 "prefixFontSize": "50%",
180 "fillColor": "rgba(31, 118, 189, 0.18)",
182 "lineColor": "rgb(31, 120, 193)",
188 "expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )",
189 "format": "time_series",
196 "title": "AVG CPU Busy",
197 "type": "singlestat",
198 "valueFontSize": "80%",
206 "valueName": "current"
209 "cacheTimeout": null,
210 "colorBackground": false,
214 "rgba(237, 129, 40, 0.89)",
217 "datasource": "$datasource",
219 "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
221 "format": "percentunit",
226 "thresholdLabels": false,
227 "thresholdMarkers": true
241 "name": "value to text",
245 "name": "range to text",
249 "maxDataPoints": 100,
250 "nullPointMode": "connected",
253 "postfixFontSize": "50%",
255 "prefixFontSize": "50%",
264 "fillColor": "rgba(31, 118, 189, 0.18)",
266 "lineColor": "rgb(31, 120, 193)",
272 "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))",
273 "format": "time_series",
280 "title": "AVG RAM Utilization",
281 "type": "singlestat",
282 "valueFontSize": "80%",
290 "valueName": "current"
293 "cacheTimeout": null,
294 "colorBackground": false,
298 "rgba(237, 129, 40, 0.89)",
301 "datasource": "$datasource",
302 "description": "IOPS Load at the device as reported by the OS on all OSD hosts",
308 "thresholdLabels": false,
309 "thresholdMarkers": true
323 "name": "value to text",
327 "name": "range to text",
331 "maxDataPoints": 100,
332 "nullPointMode": "connected",
335 "postfixFontSize": "50%",
337 "prefixFontSize": "50%",
346 "fillColor": "rgba(31, 118, 189, 0.18)",
348 "lineColor": "rgb(31, 120, 193)",
354 "expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))",
355 "format": "time_series",
362 "title": "Physical IOPS",
363 "type": "singlestat",
364 "valueFontSize": "80%",
372 "valueName": "current"
375 "cacheTimeout": null,
376 "colorBackground": false,
380 "rgba(237, 129, 40, 0.89)",
383 "datasource": "$datasource",
384 "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)",
390 "thresholdLabels": false,
391 "thresholdMarkers": true
405 "name": "value to text",
409 "name": "range to text",
413 "maxDataPoints": 100,
414 "nullPointMode": "connected",
417 "postfixFontSize": "50%",
419 "prefixFontSize": "50%",
428 "fillColor": "rgba(31, 118, 189, 0.18)",
430 "lineColor": "rgb(31, 120, 193)",
436 "expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)",
437 "format": "time_series",
444 "title": "AVG Disk Utilization",
445 "type": "singlestat",
446 "valueFontSize": "80%",
454 "valueName": "current"
457 "cacheTimeout": null,
458 "colorBackground": false,
462 "rgba(237, 129, 40, 0.89)",
465 "datasource": "$datasource",
467 "description": "Total send/receive network load across all hosts in the ceph cluster",
473 "thresholdLabels": false,
474 "thresholdMarkers": true
488 "name": "value to text",
492 "name": "range to text",
496 "maxDataPoints": 100,
497 "nullPointMode": "connected",
500 "postfixFontSize": "50%",
502 "prefixFontSize": "50%",
511 "fillColor": "rgba(31, 118, 189, 0.18)",
513 "lineColor": "rgb(31, 120, 193)",
519 "expr": "sum (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\nsum (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n )",
520 "format": "time_series",
527 "title": "Network Load",
528 "type": "singlestat",
529 "valueFontSize": "80%",
537 "valueName": "current"
544 "datasource": "$datasource",
545 "description": "Show the top 10 busiest hosts by cpu",
566 "nullPointMode": "null",
571 "seriesOverrides": [],
574 "steppedLine": false,
577 "expr": "topk(10,( 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )\n)",
578 "format": "time_series",
580 "legendFormat": "{{instance}}",
588 "title": "CPU Busy - Top 10 Hosts",
592 "value_type": "individual"
631 "datasource": "$datasource",
632 "description": "Top 10 hosts by network load",
653 "nullPointMode": "null",
658 "seriesOverrides": [],
661 "steppedLine": false,
664 "expr": "topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)",
665 "format": "time_series",
667 "legendFormat": "{{instance}}",
675 "title": "Network Load - Top 10 Hosts",
679 "value_type": "individual"
726 "label": "Data Source",
727 "name": "datasource",
729 "query": "prometheus",
732 "skipUrlSync": false,
738 "datasource": "$datasource",
745 "query": "label_values(ceph_disk_occupation, exported_instance)",
747 "regex": "([^.]*).*",
748 "skipUrlSync": false,
750 "tagValuesQuery": "",
759 "datasource": "$datasource",
766 "query": "label_values(ceph_mon_metadata, ceph_daemon)",
769 "skipUrlSync": false,
771 "tagValuesQuery": "",
780 "datasource": "$datasource",
787 "query": "label_values(ceph_mds_inodes, ceph_daemon)",
790 "skipUrlSync": false,
792 "tagValuesQuery": "",
801 "datasource": "$datasource",
808 "query": "label_values(ceph_rgw_qlen, ceph_daemon)",
811 "skipUrlSync": false,
813 "tagValuesQuery": "",
826 "refresh_intervals": [
851 "title": "Host Overview",