27 "datasource": "-- Grafana --",
30 "iconColor": "rgba(0, 211, 255, 1)",
31 "name": "Annotations & Alerts",
42 "hideControls": false,
58 "repeatIteration": null,
61 "title": "$ceph_hosts System Overview",
67 "colorBackground": false,
71 "rgba(237, 129, 40, 0.89)",
74 "datasource": "$datasource",
80 "thresholdLabels": false,
81 "thresholdMarkers": true
95 "name": "value to text",
99 "name": "range to text",
103 "maxDataPoints": 100,
104 "nullPointMode": "connected",
107 "postfixFontSize": "50%",
109 "prefixFontSize": "50%",
118 "fillColor": "rgba(31, 118, 189, 0.18)",
120 "lineColor": "rgb(31, 120, 193)",
126 "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
127 "format": "time_series",
135 "type": "singlestat",
136 "valueFontSize": "80%",
144 "valueName": "current"
148 "interrupt": "#447EBC",
157 "datasource": "$datasource",
158 "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
169 "alignAsTable": false,
183 "nullPointMode": "null",
189 "seriesOverrides": [ ],
192 "steppedLine": false,
195 "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100",
196 "format": "time_series",
198 "legendFormat": "{{mode}}",
205 "title": "CPU Utilization",
209 "value_type": "individual"
222 "label": "% Utilization",
240 "Available": "#508642",
250 "datasource": "$datasource",
262 "alignAsTable": false,
276 "nullPointMode": "null",
293 "steppedLine": false,
296 "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ",
297 "format": "time_series",
299 "legendFormat": "Free",
303 "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ",
304 "format": "time_series",
306 "legendFormat": "total",
310 "expr": "(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n",
311 "format": "time_series",
313 "legendFormat": "buffers/cache",
317 "expr": "(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n",
318 "format": "time_series",
320 "legendFormat": "used",
327 "title": "RAM Usage",
331 "value_type": "individual"
365 "datasource": "$datasource",
366 "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
377 "alignAsTable": false,
391 "nullPointMode": "null",
400 "transform": "negative-Y"
405 "steppedLine": false,
408 "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
409 "format": "time_series",
411 "legendFormat": "{{device}}.rx",
415 "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
416 "format": "time_series",
418 "legendFormat": "{{device}}.tx",
425 "title": "Network Load",
429 "value_type": "individual"
441 "format": "decbytes",
442 "label": "Send (-) / Receive (+)",
463 "datasource": "$datasource",
475 "alignAsTable": false,
489 "nullPointMode": "null",
498 "transform": "negative-Y"
503 "steppedLine": false,
506 "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
507 "format": "time_series",
509 "legendFormat": "{{device}}.rx",
513 "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
514 "format": "time_series",
516 "legendFormat": "{{device}}.tx",
523 "title": "Network drop rate",
527 "value_type": "individual"
540 "label": "Send (-) / Receive (+)",
557 "cacheTimeout": null,
558 "colorBackground": false,
562 "rgba(237, 129, 40, 0.89)",
565 "datasource": "$datasource",
566 "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
572 "thresholdLabels": false,
573 "thresholdMarkers": true
587 "name": "value to text",
591 "name": "range to text",
595 "maxDataPoints": 100,
596 "nullPointMode": "connected",
599 "postfixFontSize": "50%",
601 "prefixFontSize": "50%",
610 "fillColor": "rgba(31, 118, 189, 0.18)",
612 "lineColor": "rgb(31, 120, 193)",
618 "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})",
619 "format": "time_series",
626 "title": "Raw Capacity",
627 "type": "singlestat",
628 "valueFontSize": "80%",
636 "valueName": "current"
643 "datasource": "$datasource",
655 "alignAsTable": false,
669 "nullPointMode": "null",
678 "transform": "negative-Y"
683 "steppedLine": false,
686 "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
687 "format": "time_series",
689 "legendFormat": "{{device}}.rx",
693 "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
694 "format": "time_series",
696 "legendFormat": "{{device}}.tx",
703 "title": "Network error rate",
707 "value_type": "individual"
720 "label": "Send (-) / Receive (+)",
748 "repeatIteration": null,
751 "title": "OSD Disk Performance Statistics",
760 "datasource": "$datasource",
761 "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
772 "alignAsTable": false,
786 "nullPointMode": "connected",
794 "alias": "/.*reads/",
795 "transform": "negative-Y"
800 "steppedLine": false,
803 "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
804 "format": "time_series",
806 "legendFormat": "{{device}}({{ceph_daemon}}) writes",
810 "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
811 "format": "time_series",
813 "legendFormat": "{{device}}({{ceph_daemon}}) reads",
820 "title": "$ceph_hosts Disk IOPS",
824 "value_type": "individual"
837 "label": "Read (-) / Write (+)",
858 "datasource": "$datasource",
859 "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id",
870 "alignAsTable": false,
884 "nullPointMode": "connected",
893 "transform": "negative-Y"
898 "steppedLine": false,
901 "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
902 "format": "time_series",
904 "legendFormat": "{{device}}({{ceph_daemon}}) write",
908 "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
909 "format": "time_series",
911 "legendFormat": "{{device}}({{ceph_daemon}}) read",
918 "title": "$ceph_hosts Throughput by Disk",
922 "value_type": "individual"
935 "label": "Read (-) / Write (+)",
956 "datasource": "$datasource",
957 "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
968 "alignAsTable": false,
982 "nullPointMode": "null as zero",
988 "seriesOverrides": [ ],
991 "steppedLine": false,
994 "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
995 "format": "time_series",
997 "legendFormat": "{{device}}({{ceph_daemon}})",
1004 "title": "$ceph_hosts Disk Latency",
1008 "value_type": "individual"
1042 "datasource": "$datasource",
1043 "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.",
1054 "alignAsTable": false,
1068 "nullPointMode": "connected",
1069 "percentage": false,
1074 "seriesOverrides": [ ],
1077 "steppedLine": false,
1080 "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
1081 "format": "time_series",
1082 "intervalFactor": 1,
1083 "legendFormat": "{{device}}({{ceph_daemon}})",
1090 "title": "$ceph_hosts Disk utilization",
1094 "value_type": "individual"
1106 "format": "percent",
1126 "schemaVersion": 16,
1139 "label": "Data Source",
1140 "name": "datasource",
1142 "query": "prometheus",
1145 "type": "datasource"
1150 "datasource": "$datasource",
1152 "includeAll": false,
1153 "label": "Hostname",
1155 "name": "ceph_hosts",
1157 "query": "label_values(node_scrape_collector_success, instance) ",
1159 "regex": "([^.:]*).*",
1161 "tagValuesQuery": "",
1174 "refresh_intervals": [
1199 "title": "Host Details",