27 "datasource": "-- Grafana --",
30 "iconColor": "rgba(0, 211, 255, 1)",
31 "name": "Annotations & Alerts",
40 "iteration": 1557386759572,
44 "icon": "external link",
61 "title": "$ceph_hosts System Overview",
66 "colorBackground": false,
69 "rgba(245, 54, 54, 0.9)",
70 "rgba(237, 129, 40, 0.89)",
71 "rgba(50, 172, 45, 0.97)"
73 "datasource": "$datasource",
79 "thresholdLabels": false,
80 "thresholdMarkers": true
95 "name": "value to text",
99 "name": "range to text",
105 "nullPointMode": "connected",
108 "postfixFontSize": "50%",
110 "prefixFontSize": "50%",
119 "fillColor": "rgba(31, 118, 189, 0.18)",
121 "lineColor": "rgb(31, 120, 193)",
127 "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
128 "format": "time_series",
137 "type": "singlestat",
138 "valueFontSize": "80%",
146 "valueName": "current"
150 "interrupt": "#447EBC",
159 "datasource": "$datasource",
160 "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
170 "alignAsTable": false,
183 "nullPointMode": "null",
189 "seriesOverrides": [],
192 "steppedLine": false,
195 "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts).*\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts).*\"}[1m]))\n) * 100",
196 "format": "time_series",
198 "legendFormat": "{{mode}}",
207 "title": "CPU Utilisation",
211 "value_type": "individual"
224 "label": "% Utilization",
246 "Available": "#508642",
256 "datasource": "$datasource",
277 "nullPointMode": "null",
293 "steppedLine": false,
296 "expr": "(node_memory_MemTotal{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]].*\"})- (\n (node_memory_MemFree{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n (node_memory_Cached{instance=~\"[[ceph_hosts]].*\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n (node_memory_Buffers{instance=~\"[[ceph_hosts]].*\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]].*\"}) +\n (node_memory_Slab{instance=~\"[[ceph_hosts]].*\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]].*\"})\n )\n \n",
297 "format": "time_series",
299 "legendFormat": "used",
303 "expr": "node_memory_MemFree{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]].*\"} ",
304 "format": "time_series",
307 "legendFormat": "Free",
311 "expr": "(node_memory_Cached{instance=~\"[[ceph_hosts]].*\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n(node_memory_Buffers{instance=~\"[[ceph_hosts]].*\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]].*\"}) +\n(node_memory_Slab{instance=~\"[[ceph_hosts]].*\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]].*\"}) \n",
312 "format": "time_series",
315 "legendFormat": "buffers/cache",
319 "expr": "node_memory_MemTotal{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]].*\"} ",
320 "format": "time_series",
323 "legendFormat": "total",
330 "title": "RAM Usage",
334 "value_type": "individual"
372 "datasource": "$datasource",
373 "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
383 "alignAsTable": false,
398 "nullPointMode": "null",
406 "transform": "negative-Y"
411 "steppedLine": false,
414 "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m])\n)",
415 "format": "time_series",
417 "legendFormat": "{{device}}.rx",
423 "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m])\n)",
424 "format": "time_series",
426 "legendFormat": "{{device}}.tx",
434 "title": "Network Load",
438 "value_type": "individual"
450 "format": "decbytes",
451 "label": "Send (-) / Receive (+)",
476 "datasource": "$datasource",
484 "hideTimeOverride": true,
498 "nullPointMode": "null",
506 "transform": "negative-Y"
511 "steppedLine": false,
514 "expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
515 "format": "time_series",
518 "legendFormat": "{{device}}.rx",
522 "expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
523 "format": "time_series",
525 "legendFormat": "{{device}}.tx",
532 "title": "Network drop rate",
536 "value_type": "individual"
549 "label": "Send (-) / Receive (+)",
570 "cacheTimeout": null,
571 "colorBackground": false,
574 "rgba(245, 54, 54, 0.9)",
575 "rgba(237, 129, 40, 0.89)",
576 "rgba(50, 172, 45, 0.97)"
578 "datasource": "$datasource",
580 "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
586 "thresholdLabels": false,
587 "thresholdMarkers": true
602 "name": "value to text",
606 "name": "range to text",
612 "nullPointMode": "connected",
615 "postfixFontSize": "50%",
617 "prefixFontSize": "50%",
626 "fillColor": "rgba(31, 118, 189, 0.18)",
628 "lineColor": "rgb(31, 120, 193)",
634 "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts).*\"})",
635 "format": "time_series",
643 "title": "Raw Capacity",
644 "type": "singlestat",
645 "valueFontSize": "80%",
653 "valueName": "current"
660 "datasource": "$datasource",
668 "hideTimeOverride": true,
682 "nullPointMode": "null",
690 "transform": "negative-Y"
695 "steppedLine": false,
698 "expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
699 "format": "time_series",
702 "legendFormat": "{{device}}.rx",
706 "expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
707 "format": "time_series",
709 "legendFormat": "{{device}}.tx",
716 "title": "Network error rate",
720 "value_type": "individual"
733 "label": "Send (-) / Receive (+)",
764 "title": "OSD Disk Performance Statistics",
772 "datasource": "$datasource",
773 "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
795 "nullPointMode": "connected",
802 "alias": "/.*reads/",
803 "transform": "negative-Y"
808 "steppedLine": false,
811 "expr": "(irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")",
812 "format": "time_series",
814 "legendFormat": "{{device}}({{ceph_daemon}}) writes",
820 "expr": "(irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")",
821 "format": "time_series",
824 "legendFormat": "{{device}}({{ceph_daemon}}) reads",
831 "title": "$ceph_hosts Disk IOPS",
835 "value_type": "individual"
848 "label": "Read (-) / Write (+)",
873 "datasource": "$datasource",
874 "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id",
896 "nullPointMode": "connected",
904 "transform": "negative-Y"
909 "steppedLine": false,
912 "expr": "(irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")",
913 "format": "time_series",
915 "legendFormat": "{{device}}({{ceph_daemon}}) write",
919 "expr": "(irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")",
920 "format": "time_series",
922 "legendFormat": "{{device}}({{ceph_daemon}}) read",
929 "title": "$ceph_hosts Throughput by Disk",
933 "value_type": "individual"
946 "label": "Read (-) / Write (+)",
971 "datasource": "$datasource",
972 "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
994 "nullPointMode": "null as zero",
999 "seriesOverrides": [],
1002 "steppedLine": false,
1005 "expr": "max by(instance,device) ((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) ) * on(instance,device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")",
1006 "format": "time_series",
1008 "intervalFactor": 1,
1009 "legendFormat": "{{device}}({{ceph_daemon}})",
1016 "title": "$ceph_hosts Disk Latency",
1020 "value_type": "individual"
1058 "datasource": "$datasource",
1059 "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.",
1082 "nullPointMode": "connected",
1086 "percentage": false,
1090 "seriesOverrides": [],
1093 "steppedLine": false,
1096 "expr": "((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts).*\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts).*\"}[5m]) * 100)* on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")",
1097 "format": "time_series",
1099 "intervalFactor": 1,
1100 "legendFormat": "{{device}}({{ceph_daemon}})",
1108 "title": "$ceph_hosts Disk utilisation",
1112 "value_type": "individual"
1124 "format": "percent",
1147 "schemaVersion": 16,
1160 "label": "Data Source",
1161 "name": "datasource",
1163 "query": "prometheus",
1166 "skipUrlSync": false,
1167 "type": "datasource"
1172 "datasource": "$datasource",
1174 "includeAll": false,
1175 "label": "Hostname",
1177 "name": "ceph_hosts",
1179 "query": "label_values(node_scrape_collector_success, instance) ",
1181 "regex": "([^:]*).*",
1182 "skipUrlSync": false,
1184 "tagValuesQuery": "",
1197 "refresh_intervals": [
1221 "timezone": "browser",
1222 "title": "Host Details",