27 "datasource": "-- Grafana --",
30 "iconColor": "rgba(0, 211, 255, 1)",
31 "name": "Annotations & Alerts",
42 "hideControls": false,
58 "repeatIteration": null,
61 "title": "$ceph_hosts System Overview",
67 "colorBackground": false,
71 "rgba(237, 129, 40, 0.89)",
74 "datasource": "$datasource",
80 "thresholdLabels": false,
81 "thresholdMarkers": true
95 "name": "value to text",
99 "name": "range to text",
103 "maxDataPoints": 100,
104 "nullPointMode": "connected",
107 "postfixFontSize": "50%",
109 "prefixFontSize": "50%",
118 "fillColor": "rgba(31, 118, 189, 0.18)",
120 "lineColor": "rgb(31, 120, 193)",
126 "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
127 "format": "time_series",
135 "type": "singlestat",
136 "valueFontSize": "80%",
144 "valueName": "current"
148 "interrupt": "#447EBC",
157 "datasource": "$datasource",
158 "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
168 "alignAsTable": false,
182 "nullPointMode": "null",
188 "seriesOverrides": [ ],
191 "steppedLine": false,
194 "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100",
195 "format": "time_series",
197 "legendFormat": "{{mode}}",
204 "title": "CPU Utilization",
208 "value_type": "individual"
221 "label": "% Utilization",
239 "Available": "#508642",
249 "datasource": "$datasource",
260 "alignAsTable": false,
274 "nullPointMode": "null",
291 "steppedLine": false,
294 "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ",
295 "format": "time_series",
297 "legendFormat": "Free",
301 "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ",
302 "format": "time_series",
304 "legendFormat": "total",
308 "expr": "(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n",
309 "format": "time_series",
311 "legendFormat": "buffers/cache",
315 "expr": "(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n",
316 "format": "time_series",
318 "legendFormat": "used",
325 "title": "RAM Usage",
329 "value_type": "individual"
363 "datasource": "$datasource",
364 "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
374 "alignAsTable": false,
388 "nullPointMode": "null",
397 "transform": "negative-Y"
402 "steppedLine": false,
405 "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
406 "format": "time_series",
408 "legendFormat": "{{device}}.rx",
412 "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
413 "format": "time_series",
415 "legendFormat": "{{device}}.tx",
422 "title": "Network Load",
426 "value_type": "individual"
438 "format": "decbytes",
439 "label": "Send (-) / Receive (+)",
460 "datasource": "$datasource",
471 "alignAsTable": false,
485 "nullPointMode": "null",
494 "transform": "negative-Y"
499 "steppedLine": false,
502 "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
503 "format": "time_series",
505 "legendFormat": "{{device}}.rx",
509 "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
510 "format": "time_series",
512 "legendFormat": "{{device}}.tx",
519 "title": "Network drop rate",
523 "value_type": "individual"
536 "label": "Send (-) / Receive (+)",
553 "cacheTimeout": null,
554 "colorBackground": false,
558 "rgba(237, 129, 40, 0.89)",
561 "datasource": "$datasource",
562 "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
568 "thresholdLabels": false,
569 "thresholdMarkers": true
583 "name": "value to text",
587 "name": "range to text",
591 "maxDataPoints": 100,
592 "nullPointMode": "connected",
595 "postfixFontSize": "50%",
597 "prefixFontSize": "50%",
606 "fillColor": "rgba(31, 118, 189, 0.18)",
608 "lineColor": "rgb(31, 120, 193)",
614 "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})",
615 "format": "time_series",
622 "title": "Raw Capacity",
623 "type": "singlestat",
624 "valueFontSize": "80%",
632 "valueName": "current"
639 "datasource": "$datasource",
650 "alignAsTable": false,
664 "nullPointMode": "null",
673 "transform": "negative-Y"
678 "steppedLine": false,
681 "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
682 "format": "time_series",
684 "legendFormat": "{{device}}.rx",
688 "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
689 "format": "time_series",
691 "legendFormat": "{{device}}.tx",
698 "title": "Network error rate",
702 "value_type": "individual"
715 "label": "Send (-) / Receive (+)",
743 "repeatIteration": null,
746 "title": "OSD Disk Performance Statistics",
755 "datasource": "$datasource",
756 "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
766 "alignAsTable": false,
780 "nullPointMode": "connected",
788 "alias": "/.*reads/",
789 "transform": "negative-Y"
794 "steppedLine": false,
797 "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
798 "format": "time_series",
800 "legendFormat": "{{device}}({{ceph_daemon}}) writes",
804 "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
805 "format": "time_series",
807 "legendFormat": "{{device}}({{ceph_daemon}}) reads",
814 "title": "$ceph_hosts Disk IOPS",
818 "value_type": "individual"
831 "label": "Read (-) / Write (+)",
852 "datasource": "$datasource",
853 "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id",
863 "alignAsTable": false,
877 "nullPointMode": "connected",
886 "transform": "negative-Y"
891 "steppedLine": false,
894 "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
895 "format": "time_series",
897 "legendFormat": "{{device}}({{ceph_daemon}}) write",
901 "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
902 "format": "time_series",
904 "legendFormat": "{{device}}({{ceph_daemon}}) read",
911 "title": "$ceph_hosts Throughput by Disk",
915 "value_type": "individual"
928 "label": "Read (-) / Write (+)",
949 "datasource": "$datasource",
950 "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
960 "alignAsTable": false,
974 "nullPointMode": "null as zero",
980 "seriesOverrides": [ ],
983 "steppedLine": false,
986 "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
987 "format": "time_series",
989 "legendFormat": "{{device}}({{ceph_daemon}})",
996 "title": "$ceph_hosts Disk Latency",
1000 "value_type": "individual"
1034 "datasource": "$datasource",
1035 "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.",
1045 "alignAsTable": false,
1059 "nullPointMode": "connected",
1060 "percentage": false,
1065 "seriesOverrides": [ ],
1068 "steppedLine": false,
1071 "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
1072 "format": "time_series",
1073 "intervalFactor": 1,
1074 "legendFormat": "{{device}}({{ceph_daemon}})",
1081 "title": "$ceph_hosts Disk utilization",
1085 "value_type": "individual"
1097 "format": "percent",
1117 "schemaVersion": 16,
1130 "label": "Data Source",
1131 "name": "datasource",
1133 "query": "prometheus",
1136 "type": "datasource"
1141 "datasource": "$datasource",
1143 "includeAll": false,
1144 "label": "Hostname",
1146 "name": "ceph_hosts",
1148 "query": "label_values(node_scrape_collector_success, instance) ",
1150 "regex": "([^.:]*).*",
1152 "tagValuesQuery": "",
1165 "refresh_intervals": [
1190 "title": "Host Details",